diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index 639e3d90..2bd4ae33 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -765,9 +765,8 @@ Note that: 1. For easier parsing, `--pipe` is assumed to be enabled in LSP mode; 2. At the time this only work with vectorcode setup that uses a **standalone ChromaDB server**, which is not difficult to setup using docker; -3. At the time this only work with `query` subcommand. I will consider adding -support for other subcommand but first I need to figure out how to properly -manage `project_root` across different requests if they change. +3. The LSP server supports `vectorise`, `query` and `ls` subcommands. The other +subcommands may be added in the future. MCP SERVER ~ diff --git a/docs/cli.md b/docs/cli.md index 681df395..3b9b2b75 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -690,9 +690,8 @@ Note that: 1. For easier parsing, `--pipe` is assumed to be enabled in LSP mode; 2. At the time this only work with vectorcode setup that uses a **standalone ChromaDB server**, which is not difficult to setup using docker; -3. At the time this only work with `query` subcommand. I will consider adding - support for other subcommand but first I need to figure out how to properly - manage `project_root` across different requests if they change. +3. The LSP server supports `vectorise`, `query` and `ls` subcommands. The other + subcommands may be added in the future. ### MCP Server diff --git a/lua/vectorcode/jobrunner/lsp.lua b/lua/vectorcode/jobrunner/lsp.lua index 69d165a6..7d689710 100644 --- a/lua/vectorcode/jobrunner/lsp.lua +++ b/lua/vectorcode/jobrunner/lsp.lua @@ -101,14 +101,7 @@ function jobrunner.run_async(args, callback, bufnr) end vim.schedule_wrap(callback)(result, err_message, code) if result then - logger.debug( - "lsp jobrunner result:\n", - vim.tbl_map(function(item) - item.document = nil - item.chunk = nil - return item - end, vim.deepcopy(result)) - ) + logger.debug("lsp jobrunner result:\n", result) end if err then logger.info("lsp jobrunner error:\n", err) diff --git a/src/vectorcode/lsp_main.py b/src/vectorcode/lsp_main.py index a67aaab1..d376e96e 100644 --- a/src/vectorcode/lsp_main.py +++ b/src/vectorcode/lsp_main.py @@ -9,6 +9,13 @@ import shtab +from vectorcode.subcommands.vectorise import ( + chunked_add, + exclude_paths_by_spec, + find_exclude_specs, + load_files_from_include, +) + try: # pragma: nocover from lsprotocol import types from pygls.exceptions import ( @@ -29,6 +36,7 @@ Config, cleanup_path, config_logging, + expand_globs, find_project_root, get_project_config, parse_cli_args, @@ -86,14 +94,6 @@ async def execute_command(ls: LanguageServer, args: list[str]): logger.info("Received command arguments: %s", args) parsed_args = await parse_cli_args(args) logger.info("Parsed command arguments: %s", parsed_args) - if parsed_args.action not in {CliAction.query, CliAction.ls}: - error_message = ( - f"Unsupported vectorcode subcommand: {str(parsed_args.action)}" - ) - logger.error( - error_message, - ) - raise JsonRpcInvalidRequest(error_message) if parsed_args.project_root is None: if DEFAULT_PROJECT_ROOT is not None: parsed_args.project_root = DEFAULT_PROJECT_ROOT @@ -136,12 +136,12 @@ async def execute_command(ls: LanguageServer, args: list[str]): ) final_results = [] try: - if collection is None: - print("Please specify a project to search in.", file=sys.stderr) - else: - final_results.extend( - await build_query_results(collection, final_configs) - ) + assert collection is not None, ( + "Failed to find the correct collection." + ) + final_results.extend( + await build_query_results(collection, final_configs) + ) finally: log_message = f"Retrieved {len(final_results)} result{'s' if len(final_results) > 1 else ''} in {round(time.time() - start_time, 2)}s." ls.progress.end( @@ -168,11 +168,73 @@ async def execute_command(ls: LanguageServer, args: list[str]): ) logger.info(f"Retrieved {len(projects)} project(s).") return projects - except Exception as e: + case CliAction.vectorise: + assert collection is not None, "Failed to find the correct collection." + ls.progress.begin( + progress_token, + types.WorkDoneProgressBegin( + title="VectorCode", message="Vectorising files...", percentage=0 + ), + ) + files = await expand_globs( + final_configs.files + or load_files_from_include(str(final_configs.project_root)), + recursive=final_configs.recursive, + include_hidden=final_configs.include_hidden, + ) + if not final_configs.force: # pragma: nocover + # tested in 'vectorise.py' + for spec in find_exclude_specs(final_configs): + if os.path.isfile(spec): + logger.info(f"Loading ignore specs from {spec}.") + files = exclude_paths_by_spec((str(i) for i in files), spec) + stats = {"add": 0, "update": 0, "removed": 0} + collection_lock = asyncio.Lock() + stats_lock = asyncio.Lock() + max_batch_size = await client.get_max_batch_size() + semaphore = asyncio.Semaphore(os.cpu_count() or 1) + tasks = [ + asyncio.create_task( + chunked_add( + str(file), + collection, + collection_lock, + stats, + stats_lock, + final_configs, + max_batch_size, + semaphore, + ) + ) + for file in files + ] + for i, task in enumerate(asyncio.as_completed(tasks), start=1): + await task + ls.progress.report( + progress_token, + types.WorkDoneProgressReport( + message="Vectorising files...", + percentage=int(100 * i / len(tasks)), + ), + ) + ls.progress.end( + progress_token, + types.WorkDoneProgressEnd( + message=f"Vectorised {stats['add'] + stats['update']} files." + ), + ) + return stats + case _ as c: # pragma: nocover + error_message = f"Unsupported vectorcode subcommand: {str(c)}" + logger.error( + error_message, + ) + raise JsonRpcInvalidRequest(error_message) + except Exception as e: # pragma: nocover if isinstance(e, JsonRpcException): # pygls exception. raise it as is. raise - else: # pragma: nocover + else: # wrap non-pygls errors for error codes. raise JsonRpcInternalError(message=traceback.format_exc()) from e diff --git a/src/vectorcode/subcommands/vectorise.py b/src/vectorcode/subcommands/vectorise.py index a45bc361..8efc91a4 100644 --- a/src/vectorcode/subcommands/vectorise.py +++ b/src/vectorcode/subcommands/vectorise.py @@ -141,10 +141,15 @@ def show_stats(configs: Config, stats): ) -def exclude_paths_by_spec(paths: Iterable[str], specs: pathspec.PathSpec) -> list[str]: +def exclude_paths_by_spec( + paths: Iterable[str], specs: pathspec.PathSpec | str +) -> list[str]: """ Files matched by the specs will be excluded. """ + if isinstance(specs, str): + with open(specs) as fin: + specs = pathspec.GitIgnoreSpec.from_lines(fin.readlines()) return [path for path in paths if not specs.match_file(path)] @@ -180,6 +185,25 @@ def load_files_from_include(project_root: str) -> list[str]: return [] +def find_exclude_specs(configs: Config) -> list[str]: + """ + Load a list of paths to exclude specs. + Can be `.gitignore` or local/global `vectorcode.exclude` + """ + gitignore_path = os.path.join(str(configs.project_root), ".gitignore") + specs = [ + gitignore_path, + ] + exclude_spec_path = os.path.join( + str(configs.project_root), ".vectorcode", "vectorcode.exclude" + ) + if os.path.isfile(exclude_spec_path): + specs.append(exclude_spec_path) + elif os.path.isfile(GLOBAL_EXCLUDE_SPEC): + specs.append(GLOBAL_EXCLUDE_SPEC) + return specs + + async def vectorise(configs: Config) -> int: assert configs.project_root is not None client = await get_client(configs) @@ -198,23 +222,10 @@ async def vectorise(configs: Config) -> int: ) if not configs.force: - gitignore_path = os.path.join(str(configs.project_root), ".gitignore") - specs = [ - gitignore_path, - ] - exclude_spec_path = os.path.join( - configs.project_root, ".vectorcode", "vectorcode.exclude" - ) - if os.path.isfile(exclude_spec_path): - specs.append(exclude_spec_path) - elif os.path.isfile(GLOBAL_EXCLUDE_SPEC): - specs.append(GLOBAL_EXCLUDE_SPEC) - for spec_path in specs: + for spec_path in find_exclude_specs(configs): if os.path.isfile(spec_path): logger.info(f"Loading ignore specs from {spec_path}.") - with open(spec_path) as fin: - spec = pathspec.GitIgnoreSpec.from_lines(fin.readlines()) - files = exclude_paths_by_spec((str(i) for i in files), spec) + files = exclude_paths_by_spec((str(i) for i in files), spec_path) else: # pragma: nocover logger.info("Ignoring exclude specs.") diff --git a/tests/test_lsp.py b/tests/test_lsp.py index d2a70f41..d5036f99 100644 --- a/tests/test_lsp.py +++ b/tests/test_lsp.py @@ -1,7 +1,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from pygls.exceptions import JsonRpcInvalidRequest +from pygls.exceptions import JsonRpcInternalError, JsonRpcInvalidRequest from pygls.server import LanguageServer from vectorcode import __version__ @@ -217,6 +217,126 @@ async def test_execute_command_ls(mock_language_server, mock_config): mock_language_server.progress.end.assert_called() +@pytest.mark.asyncio +async def test_execute_command_vectorise(mock_language_server, mock_config: Config): + mock_config.action = CliAction.vectorise # Set action to vectorise + mock_config.project_root = "/test/project" # Ensure project_root is set + mock_config.files = None # Simulate no files explicitly passed, so load_files_from_include is called + mock_config.recursive = True + mock_config.include_hidden = False + mock_config.force = False # To test exclude_paths_by_spec path + + # Files that load_files_from_include will return and expand_globs will process + dummy_initial_files = ["file_a.py", "file_b.txt"] + # Files after expand_globs + dummy_expanded_files = ["/test/project/file_a.py", "/test/project/file_b.txt"] + + # Mock dependencies + with ( + patch( + "vectorcode.lsp_main.parse_cli_args", new_callable=AsyncMock + ) as mock_parse_cli_args, + patch( + "vectorcode.lsp_main.get_client", new_callable=AsyncMock + ) as mock_get_client, + patch( + "vectorcode.lsp_main.get_collection", new_callable=AsyncMock + ) as mock_get_collection, + patch( + "vectorcode.lsp_main.expand_globs", new_callable=AsyncMock + ) as mock_expand_globs, + patch( + "vectorcode.lsp_main.find_exclude_specs", return_value=[] + ) as mock_find_exclude_specs, + patch( + "vectorcode.lsp_main.exclude_paths_by_spec", + side_effect=lambda files, spec: files, + ) as mock_exclude_paths_by_spec, + patch( + "vectorcode.lsp_main.chunked_add", new_callable=AsyncMock + ) as mock_chunked_add, + patch("vectorcode.lsp_main.try_server", return_value=True), + patch("vectorcode.lsp_main.cached_project_configs", {}), + patch( + "vectorcode.lsp_main.load_files_from_include", + return_value=dummy_initial_files, + ) as mock_load_files_from_include, + patch("os.cpu_count", return_value=1), # For asyncio.Semaphore + patch( + "vectorcode.lsp_main.make_caches", new_callable=AsyncMock + ), # Mock make_caches to avoid actual file system ops + ): + from unittest.mock import ANY + + from lsprotocol import types + + from vectorcode.lsp_main import cached_project_configs + + cached_project_configs.clear() + cached_project_configs["/test/project"] = mock_config # Add config to cache + + # Set return values for mocks + mock_parse_cli_args.return_value = mock_config + mock_client = AsyncMock() + mock_get_client.return_value = mock_client + mock_collection = MagicMock() + mock_get_collection.return_value = mock_collection + mock_client.get_max_batch_size.return_value = 100 # Mock batch size + + mock_expand_globs.return_value = ( + dummy_expanded_files # What expand_globs should return + ) + + # Mock merge_from as it's called + mock_config.merge_from = AsyncMock(return_value=mock_config) + + # Execute the command + result = await execute_command( + mock_language_server, ["vectorise", "/test/project"] + ) + assert isinstance(result, dict) + + # Assertions + mock_language_server.progress.create_async.assert_called_once() + mock_language_server.progress.begin.assert_called_once_with( + ANY, # progress_token + types.WorkDoneProgressBegin( + title="VectorCode", message="Vectorising files...", percentage=0 + ), + ) + + mock_load_files_from_include.assert_called_once_with( + str(mock_config.project_root) + ) + mock_expand_globs.assert_called_once_with( + dummy_initial_files, # Should be the result of load_files_from_include + recursive=mock_config.recursive, + include_hidden=mock_config.include_hidden, + ) + mock_find_exclude_specs.assert_called_once_with(mock_config) + mock_exclude_paths_by_spec.assert_not_called() # Because mock_find_exclude_specs returns empty list (no specs to exclude by) + mock_client.get_max_batch_size.assert_called_once() + + # Check chunked_add calls + assert mock_chunked_add.call_count == len(dummy_expanded_files) + for file_path in dummy_expanded_files: + mock_chunked_add.assert_any_call( + file_path, + mock_collection, + ANY, # asyncio.Lock object + ANY, # stats dict + ANY, # stats_lock + mock_config, + 100, # max_batch_size + ANY, # semaphore + ) + # Check progress report calls + assert mock_language_server.progress.report.call_count == len( + dummy_expanded_files + ) + mock_language_server.progress.end.assert_called_once() + + @pytest.mark.asyncio async def test_execute_command_unsupported_action( mock_language_server, mock_config, capsys @@ -232,6 +352,9 @@ async def test_execute_command_unsupported_action( patch( "vectorcode.lsp_main.parse_cli_args", new_callable=AsyncMock ) as mock_parse_cli_args, + patch( + "vectorcode.lsp_main.get_collection", new_callable=AsyncMock + ) as mock_get_collection, patch("vectorcode.lsp_main.cached_project_configs", {}), patch("vectorcode.lsp_main.try_server", return_value=True), ): @@ -242,11 +365,13 @@ async def test_execute_command_unsupported_action( # Add a mock config to cached_project_configs cached_project_configs["/test/project"] = mock_config + mock_collection = MagicMock() + mock_get_collection.return_value = mock_collection # Mock the merge_from method mock_config.merge_from = AsyncMock(return_value=mock_config) - with pytest.raises(JsonRpcInvalidRequest): + with pytest.raises((JsonRpcInternalError, JsonRpcInvalidRequest)): await execute_command(mock_language_server, ["invalid_action"]) @@ -317,10 +442,9 @@ async def test_execute_command_no_default_project_root( patch( "vectorcode.lsp_main.parse_cli_args", new_callable=AsyncMock ) as mock_parse_cli_args, - patch("sys.stderr.write") as stderr, patch("vectorcode.lsp_main.get_client", new_callable=AsyncMock), ): mock_parse_cli_args.return_value = mock_config - await execute_command(mock_language_server, ["query", "test"]) - stderr.assert_called() + with pytest.raises((AssertionError, JsonRpcInternalError)): + await execute_command(mock_language_server, ["query", "test"]) DEFAULT_PROJECT_ROOT = None # Reset the global variable