From deea19c587d4e94fe4724fccbda6f46d2e1314d5 Mon Sep 17 00:00:00 2001
From: Zhe Yu <zcabzyu@ucl.ac.uk>
Date: Mon, 25 Aug 2025 15:46:46 +0800
Subject: [PATCH 1/2] revert(cli): default to `NaiveReranker`

---
 docs/cli.md                 | 13 ++++---------
 src/vectorcode/cli_utils.py |  2 +-
 tests/test_cli_utils.py     |  2 +-
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/docs/cli.md b/docs/cli.md
index 1ab6d914..d1376d64 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -311,16 +311,11 @@ The JSON configuration file may hold the following values:
   guarantees the return of `n` documents, but with the risk of including too
   many less-relevant chunks that may affect the document selection. Default: 
   `-1` (any negative value means selecting documents based on all indexed chunks);
-- `reranker`: string, the reranking method to use. Currently supports
-  `CrossEncoderReranker` (default, using 
+- `reranker`: string, the reranking method to use. Currently supports `NaiveReranker` 
+  (sort chunks by the "distance" between the embedding vectors) and 
+  `CrossEncoderReranker` (using 
   [sentence-transformers cross-encoder](https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html)
-  ) and `NaiveReranker` (sort chunks by the "distance" between the embedding
-  vectors).
-  Note: If you're using a good embedding model (eg. a hosted service from OpenAI, or 
-  a LLM-based embedding model like 
-  [Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B)), you
-  may get better results if you use `NaiveReranker` here because a good embedding
-  model may understand texts better than a mediocre reranking model.
+  ).
 - `reranker_params`: dictionary, similar to `embedding_params`. The options
   passed to the reranker class constructor. For `CrossEncoderReranker`, these
   are the options passed to the 
diff --git a/src/vectorcode/cli_utils.py b/src/vectorcode/cli_utils.py
index 7c49def2..0131a5e2 100644
--- a/src/vectorcode/cli_utils.py
+++ b/src/vectorcode/cli_utils.py
@@ -100,7 +100,7 @@ class Config:
     overlap_ratio: float = 0.2
     query_multiplier: int = -1
     query_exclude: list[Union[str, os.PathLike]] = field(default_factory=list)
-    reranker: Optional[str] = "CrossEncoderReranker"
+    reranker: Optional[str] = "NaiveReranker"
     reranker_params: dict[str, Any] = field(default_factory=lambda: {})
     check_item: Optional[str] = None
     use_absolute_path: bool = False
diff --git a/tests/test_cli_utils.py b/tests/test_cli_utils.py
index e8e79f2b..bd10efc5 100644
--- a/tests/test_cli_utils.py
+++ b/tests/test_cli_utils.py
@@ -113,7 +113,7 @@ async def test_config_import_from_missing_keys():
     assert config.chunk_size == 2500
     assert config.overlap_ratio == 0.2
     assert config.query_multiplier == -1
-    assert config.reranker == "CrossEncoderReranker"
+    assert config.reranker == "NaiveReranker"
     assert config.reranker_params == {}
     assert config.db_settings is None
 

From e5cb57fc236af7a514077a6389b522fe746bd4a7 Mon Sep 17 00:00:00 2001
From: Davidyz <30951234+Davidyz@users.noreply.github.com>
Date: Mon, 25 Aug 2025 07:47:45 +0000
Subject: [PATCH 2/2] Auto generate docs

---
 doc/VectorCode-cli.txt | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt
index 66e2f0c3..f20ec83c 100644
--- a/doc/VectorCode-cli.txt
+++ b/doc/VectorCode-cli.txt
@@ -358,18 +358,13 @@ most `n` documents. A larger value of `query_multiplier` guarantees the return
 of `n` documents, but with the risk of including too many less-relevant chunks
 that may affect the document selection. Default: `-1` (any negative value means
 selecting documents based on all indexed chunks); - `reranker`string, the
-reranking method to use. Currently supports `CrossEncoderReranker` (default,
-using sentence-transformers cross-encoder
-<https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html> )
-and `NaiveReranker` (sort chunks by the "distance" between the embedding
-vectors). Note: If you’re using a good embedding model (eg. a hosted service
-from OpenAI, or a LLM-based embedding model like Qwen3-Embedding-0.6B
-<https://huggingface.co/Qwen/Qwen3-Embedding-0.6B>), you may get better results
-if you use `NaiveReranker` here because a good embedding model may understand
-texts better than a mediocre reranking model. - `reranker_params`dictionary,
-similar to `embedding_params`. The options passed to the reranker class
-constructor. For `CrossEncoderReranker`, these are the options passed to the
-`CrossEncoder`
+reranking method to use. Currently supports `NaiveReranker` (sort chunks by the
+"distance" between the embedding vectors) and `CrossEncoderReranker` (using
+sentence-transformers cross-encoder
+<https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html> ).
+- `reranker_params`dictionary, similar to `embedding_params`. The options
+passed to the reranker class constructor. For `CrossEncoderReranker`, these are
+the options passed to the `CrossEncoder`
 <https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html#id1>
 class. For example, if you want to use a non-default model, you can use the
 following: `json { "reranker_params": { "model_name_or_path": "your_model_here"