From 19d25c601772c38ad8a76171c99d6a23657b4ba8 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Wed, 2 Jul 2025 17:29:43 +0800 Subject: [PATCH 1/3] feat(cli): Add `files_ls` and `files_rm` tools to MCP server --- src/vectorcode/mcp_main.py | 51 ++++++++++++++++++++++++++++-- tests/test_mcp.py | 65 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/src/vectorcode/mcp_main.py b/src/vectorcode/mcp_main.py index 72f57d92..2ba782c1 100644 --- a/src/vectorcode/mcp_main.py +++ b/src/vectorcode/mcp_main.py @@ -5,9 +5,10 @@ import sys from dataclasses import dataclass from pathlib import Path -from typing import Optional +from typing import Optional, cast import shtab +from chromadb.types import Where from vectorcode.subcommands.vectorise import ( VectoriseStats, @@ -33,11 +34,17 @@ cleanup_path, config_logging, expand_globs, + expand_path, find_project_config_dir, get_project_config, load_config_file, ) -from vectorcode.common import ClientManager, get_collection, get_collections +from vectorcode.common import ( + ClientManager, + get_collection, + get_collections, + list_collection_files, +) from vectorcode.subcommands.prompt import prompt_by_categories from vectorcode.subcommands.query import get_query_result_files @@ -224,6 +231,34 @@ async def query_tool( ) +async def ls_files(project_root: str) -> list[str]: + """ + project_root: Directory to the repository. MUST be from the vectorcode `ls` tool or user input; + """ + configs = await get_project_config(expand_path(project_root, True)) + async with ClientManager().get_client(configs) as client: + return await list_collection_files(await get_collection(client, configs, False)) + + +async def rm_files(files: list[str], project_root: str): + """ + files: list of paths of the files to be removed; + project_root: Directory to the repository. MUST be from the vectorcode `ls` tool or user input; + """ + configs = await get_project_config(expand_path(project_root, True)) + async with ClientManager().get_client(configs) as client: + try: + collection = await get_collection(client, configs, False) + files = [str(expand_path(i, True)) for i in files if os.path.isfile(i)] + if files: + await collection.delete(where=cast(Where, {"path": {"$in": files}})) + else: # pragma: nocover + logger.warning(f"All paths were invalid: {files}") + except ValueError: # pragma: nocover + logger.warning(f"Failed to find the collection at {configs.project_root}") + return + + async def mcp_server(): global default_config, default_project_root @@ -283,6 +318,18 @@ async def mcp_server(): ), ) + mcp.add_tool( + fn=rm_files, + name="files_rm", + description="Remove files from VectorCode embedding database.", + ) + + mcp.add_tool( + fn=ls_files, + name="files_ls", + description="List files that have been indexed by VectorCode.", + ) + return mcp diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 2057c589..f48e684b 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -7,12 +7,15 @@ from mcp import McpError from vectorcode.cli_utils import Config +from vectorcode.common import ClientManager from vectorcode.mcp_main import ( get_arg_parser, list_collections, + ls_files, mcp_server, parse_cli_args, query_tool, + rm_files, vectorise_files, ) @@ -335,7 +338,7 @@ async def test_mcp_server(): await mcp_server() - assert mock_add_tool.call_count == 3 + assert mock_add_tool.call_count == 5 @pytest.mark.asyncio @@ -374,10 +377,68 @@ async def new_get_collections(clients): await mcp_server() - assert mock_add_tool.call_count == 3 + assert mock_add_tool.call_count == 5 mock_get_collections.assert_called() +@pytest.mark.asyncio +async def test_ls_files_success(): + ClientManager().clear() + mock_client = MagicMock() + mock_collection = MagicMock() + expected_files = ["/test/project/file1.py", "/test/project/dir/file2.txt"] + + with ( + patch("vectorcode.mcp_main.get_project_config") as mock_get_project_config, + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), + patch("vectorcode.common.try_server", return_value=True), + patch("vectorcode.mcp_main.get_collection", return_value=mock_collection), + patch( + "vectorcode.mcp_main.list_collection_files", return_value=expected_files + ) as mock_list_collection_files, + patch( + "vectorcode.cli_utils.expand_path", side_effect=lambda x, y: x + ), # Mock expand_path to return input + ): + mock_get_project_config.return_value = Config(project_root="/test/project") + result = await ls_files(project_root="/test/project") + + assert result == expected_files + mock_get_project_config.assert_called_once_with("/test/project") + + mock_list_collection_files.assert_called_once_with(mock_collection) + + +@pytest.mark.asyncio +async def test_rm_files_success(): + ClientManager().clear() + mock_client = MagicMock() + mock_collection = MagicMock() + files_to_remove = ["/test/project/file1.py", "/test/project/file2.txt"] + + with ( + patch("os.path.isfile", side_effect=lambda x: x in files_to_remove), + patch("vectorcode.mcp_main.get_project_config") as mock_get_project_config, + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), + patch("vectorcode.common.try_server", return_value=True), + patch("vectorcode.mcp_main.get_collection", return_value=mock_collection), + patch("vectorcode.cli_utils.expand_path", side_effect=lambda x, y: x), + ): + mock_get_project_config.return_value = Config(project_root="/test/project") + mock_collection.delete = AsyncMock() + + await rm_files(files=files_to_remove, project_root="/test/project") + + mock_get_project_config.assert_called_once_with("/test/project") + mock_collection.delete.assert_called_once_with( + where={"path": {"$in": files_to_remove}} + ) + + def test_arg_parser(): assert isinstance(get_arg_parser(), ArgumentParser) From dfff4e6885a795bdb1e812d50e9b703658edc9c4 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Wed, 2 Jul 2025 17:54:26 +0800 Subject: [PATCH 2/3] docs(cli): Document `files_ls` and `files_rm` tools in cli.md --- docs/cli.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/cli.md b/docs/cli.md index c7d48a4d..55a6b3ae 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -726,7 +726,9 @@ features: - `ls`: list local collections, similar to the `ls` subcommand in the CLI; - `query`: query from a given collection, similar to the `query` subcommand in the CLI; -- `vectorise`: vectorise files into a given project. +- `vectorise`: vectorise files into a given project; +- `files_ls`: show files that have been indexed for the current project; +- `files_rm`: remove some files from the database for a project. To try it out, install the `vectorcode[mcp]` dependency group and the MCP server is available in the shell as `vectorcode-mcp-server`. From 75f2c4727e2aa08fc3f949bdb5953aef72426c14 Mon Sep 17 00:00:00 2001 From: Davidyz Date: Wed, 2 Jul 2025 09:55:04 +0000 Subject: [PATCH 3/3] Auto generate docs --- doc/VectorCode-cli.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index 633b0199..754bb9cd 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -803,7 +803,9 @@ features: - `ls`list local collections, similar to the `ls` subcommand in the CLI; - `query`query from a given collection, similar to the `query` subcommand in the CLI; -- `vectorise`vectorise files into a given project. +- `vectorise`vectorise files into a given project; +- `files_ls`show files that have been indexed for the current project; +- `files_rm`remove some files from the database for a project. To try it out, install the `vectorcode[mcp]` dependency group and the MCP server is available in the shell as `vectorcode-mcp-server`.