Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions doc/VectorCode-cli.txt
Original file line number Diff line number Diff line change
Expand Up @@ -765,9 +765,8 @@ Note that:

1. For easier parsing, `--pipe` is assumed to be enabled in LSP mode;
2. At the time this only work with vectorcode setup that uses a **standalone ChromaDB server**, which is not difficult to setup using docker;
3. At the time this only work with `query` subcommand. I will consider adding
support for other subcommand but first I need to figure out how to properly
manage `project_root` across different requests if they change.
3. The LSP server supports `vectorise`, `query` and `ls` subcommands. The other
subcommands may be added in the future.


MCP SERVER ~
Expand Down
5 changes: 2 additions & 3 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -690,9 +690,8 @@ Note that:
1. For easier parsing, `--pipe` is assumed to be enabled in LSP mode;
2. At the time this only work with vectorcode setup that uses a **standalone
ChromaDB server**, which is not difficult to setup using docker;
3. At the time this only work with `query` subcommand. I will consider adding
support for other subcommand but first I need to figure out how to properly
manage `project_root` across different requests if they change.
3. The LSP server supports `vectorise`, `query` and `ls` subcommands. The other
subcommands may be added in the future.

### MCP Server

Expand Down
9 changes: 1 addition & 8 deletions lua/vectorcode/jobrunner/lsp.lua
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,7 @@ function jobrunner.run_async(args, callback, bufnr)
end
vim.schedule_wrap(callback)(result, err_message, code)
if result then
logger.debug(
"lsp jobrunner result:\n",
vim.tbl_map(function(item)
item.document = nil
item.chunk = nil
return item
end, vim.deepcopy(result))
)
logger.debug("lsp jobrunner result:\n", result)
end
if err then
logger.info("lsp jobrunner error:\n", err)
Expand Down
94 changes: 78 additions & 16 deletions src/vectorcode/lsp_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@

import shtab

from vectorcode.subcommands.vectorise import (
chunked_add,
exclude_paths_by_spec,
find_exclude_specs,
load_files_from_include,
)

try: # pragma: nocover
from lsprotocol import types
from pygls.exceptions import (
Expand All @@ -29,6 +36,7 @@
Config,
cleanup_path,
config_logging,
expand_globs,
find_project_root,
get_project_config,
parse_cli_args,
Expand Down Expand Up @@ -86,14 +94,6 @@ async def execute_command(ls: LanguageServer, args: list[str]):
logger.info("Received command arguments: %s", args)
parsed_args = await parse_cli_args(args)
logger.info("Parsed command arguments: %s", parsed_args)
if parsed_args.action not in {CliAction.query, CliAction.ls}:
error_message = (
f"Unsupported vectorcode subcommand: {str(parsed_args.action)}"
)
logger.error(
error_message,
)
raise JsonRpcInvalidRequest(error_message)
if parsed_args.project_root is None:
if DEFAULT_PROJECT_ROOT is not None:
parsed_args.project_root = DEFAULT_PROJECT_ROOT
Expand Down Expand Up @@ -136,12 +136,12 @@ async def execute_command(ls: LanguageServer, args: list[str]):
)
final_results = []
try:
if collection is None:
print("Please specify a project to search in.", file=sys.stderr)
else:
final_results.extend(
await build_query_results(collection, final_configs)
)
assert collection is not None, (
"Failed to find the correct collection."
)
final_results.extend(
await build_query_results(collection, final_configs)
)
finally:
log_message = f"Retrieved {len(final_results)} result{'s' if len(final_results) > 1 else ''} in {round(time.time() - start_time, 2)}s."
ls.progress.end(
Expand All @@ -168,11 +168,73 @@ async def execute_command(ls: LanguageServer, args: list[str]):
)
logger.info(f"Retrieved {len(projects)} project(s).")
return projects
except Exception as e:
case CliAction.vectorise:
assert collection is not None, "Failed to find the correct collection."
ls.progress.begin(
progress_token,
types.WorkDoneProgressBegin(
title="VectorCode", message="Vectorising files...", percentage=0
),
)
files = await expand_globs(
final_configs.files
or load_files_from_include(str(final_configs.project_root)),
recursive=final_configs.recursive,
include_hidden=final_configs.include_hidden,
)
if not final_configs.force: # pragma: nocover
# tested in 'vectorise.py'
for spec in find_exclude_specs(final_configs):
if os.path.isfile(spec):
logger.info(f"Loading ignore specs from {spec}.")
files = exclude_paths_by_spec((str(i) for i in files), spec)
stats = {"add": 0, "update": 0, "removed": 0}
collection_lock = asyncio.Lock()
stats_lock = asyncio.Lock()
max_batch_size = await client.get_max_batch_size()
semaphore = asyncio.Semaphore(os.cpu_count() or 1)
tasks = [
asyncio.create_task(
chunked_add(
str(file),
collection,
collection_lock,
stats,
stats_lock,
final_configs,
max_batch_size,
semaphore,
)
)
for file in files
]
for i, task in enumerate(asyncio.as_completed(tasks), start=1):
await task
ls.progress.report(
progress_token,
types.WorkDoneProgressReport(
message="Vectorising files...",
percentage=int(100 * i / len(tasks)),
),
)
ls.progress.end(
progress_token,
types.WorkDoneProgressEnd(
message=f"Vectorised {stats['add'] + stats['update']} files."
),
)
return stats
case _ as c: # pragma: nocover
error_message = f"Unsupported vectorcode subcommand: {str(c)}"
logger.error(
error_message,
)
raise JsonRpcInvalidRequest(error_message)
except Exception as e: # pragma: nocover
if isinstance(e, JsonRpcException):
# pygls exception. raise it as is.
raise
else: # pragma: nocover
else:
# wrap non-pygls errors for error codes.
raise JsonRpcInternalError(message=traceback.format_exc()) from e

Expand Down
43 changes: 27 additions & 16 deletions src/vectorcode/subcommands/vectorise.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,15 @@ def show_stats(configs: Config, stats):
)


def exclude_paths_by_spec(paths: Iterable[str], specs: pathspec.PathSpec) -> list[str]:
def exclude_paths_by_spec(
paths: Iterable[str], specs: pathspec.PathSpec | str
) -> list[str]:
"""
Files matched by the specs will be excluded.
"""
if isinstance(specs, str):
with open(specs) as fin:
specs = pathspec.GitIgnoreSpec.from_lines(fin.readlines())
return [path for path in paths if not specs.match_file(path)]


Expand Down Expand Up @@ -180,6 +185,25 @@ def load_files_from_include(project_root: str) -> list[str]:
return []


def find_exclude_specs(configs: Config) -> list[str]:
"""
Load a list of paths to exclude specs.
Can be `.gitignore` or local/global `vectorcode.exclude`
"""
gitignore_path = os.path.join(str(configs.project_root), ".gitignore")
specs = [
gitignore_path,
]
exclude_spec_path = os.path.join(
str(configs.project_root), ".vectorcode", "vectorcode.exclude"
)
if os.path.isfile(exclude_spec_path):
specs.append(exclude_spec_path)
elif os.path.isfile(GLOBAL_EXCLUDE_SPEC):
specs.append(GLOBAL_EXCLUDE_SPEC)
return specs


async def vectorise(configs: Config) -> int:
assert configs.project_root is not None
client = await get_client(configs)
Expand All @@ -198,23 +222,10 @@ async def vectorise(configs: Config) -> int:
)

if not configs.force:
gitignore_path = os.path.join(str(configs.project_root), ".gitignore")
specs = [
gitignore_path,
]
exclude_spec_path = os.path.join(
configs.project_root, ".vectorcode", "vectorcode.exclude"
)
if os.path.isfile(exclude_spec_path):
specs.append(exclude_spec_path)
elif os.path.isfile(GLOBAL_EXCLUDE_SPEC):
specs.append(GLOBAL_EXCLUDE_SPEC)
for spec_path in specs:
for spec_path in find_exclude_specs(configs):
if os.path.isfile(spec_path):
logger.info(f"Loading ignore specs from {spec_path}.")
with open(spec_path) as fin:
spec = pathspec.GitIgnoreSpec.from_lines(fin.readlines())
files = exclude_paths_by_spec((str(i) for i in files), spec)
files = exclude_paths_by_spec((str(i) for i in files), spec_path)
else: # pragma: nocover
logger.info("Ignoring exclude specs.")

Expand Down
Loading