From 5eeb81f867d93be4a6d46238556acaf6b9019f02 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Thu, 29 May 2025 10:05:10 +0800 Subject: [PATCH] feat(cli): set the default `hnsw:M` value to 64 --- docs/cli.md | 4 ++-- src/vectorcode/common.py | 1 + tests/test_common.py | 21 +++++++++++++++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index c5931da9..4f1514f3 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -318,10 +318,10 @@ The JSON configuration file may hold the following values: queries. **It's recommended to re-vectorise the collection after modifying these options, because some of the options can only be set during collection creation.** Example: - ```json + ```json5 + // the following is the default value. "hnsw": { "hnsw:M": 64, - "hnsw:construction_ef": 100 } ``` - `filetype_map`: `dict[str, list[str]]`, a dictionary where keys are diff --git a/src/vectorcode/common.py b/src/vectorcode/common.py index bfde24a0..f4fff1a6 100644 --- a/src/vectorcode/common.py +++ b/src/vectorcode/common.py @@ -193,6 +193,7 @@ async def get_collection( "USER", os.environ.get("USERNAME", "DEFAULT_USER") ), "embedding_function": configs.embedding_function, + "hnsw:M": 64, } if configs.hnsw: for key in configs.hnsw.keys(): diff --git a/tests/test_common.py b/tests/test_common.py index 16382ad2..98f1370b 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -327,7 +327,19 @@ async def test_get_collection(): ), "created-by": "VectorCode", } - mock_client.get_or_create_collection.return_value = mock_collection + + async def mock_get_or_create_collection( + self, + name=None, + configuration=None, + metadata=None, + embedding_function=None, + data_loader=None, + ): + mock_collection.metadata.update(metadata or {}) + return mock_collection + + mock_client.get_or_create_collection.side_effect = mock_get_or_create_collection MockAsyncHttpClient.return_value = mock_client collection = await get_collection(mock_client, config, make_if_missing=True) @@ -336,6 +348,7 @@ async def test_get_collection(): "USER", os.environ.get("USERNAME", "DEFAULT_USER") ) assert collection.metadata["created-by"] == "VectorCode" + assert collection.metadata["hnsw:M"] == 64 mock_client.get_or_create_collection.assert_called_once() mock_client.get_collection.side_effect = None @@ -361,7 +374,7 @@ async def test_get_collection_hnsw(): embedding_function="SentenceTransformerEmbeddingFunction", embedding_params={}, project_root="/test_project", - hnsw={"ef_construction": 200, "m": 32}, + hnsw={"ef_construction": 200, "M": 32}, ) with patch("chromadb.AsyncHttpClient") as MockAsyncHttpClient: @@ -374,7 +387,7 @@ async def test_get_collection_hnsw(): ), "created-by": "VectorCode", "hnsw:ef_construction": 200, - "hnsw:m": 32, + "hnsw:M": 32, "embedding_function": "SentenceTransformerEmbeddingFunction", "path": "/test_project", } @@ -394,7 +407,7 @@ async def test_get_collection_hnsw(): ) assert collection.metadata["created-by"] == "VectorCode" assert collection.metadata["hnsw:ef_construction"] == 200 - assert collection.metadata["hnsw:m"] == 32 + assert collection.metadata["hnsw:M"] == 32 mock_client.get_or_create_collection.assert_called_once() assert ( mock_client.get_or_create_collection.call_args.kwargs["metadata"]