Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/VectorCode-cli.txt
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,10 @@ certain conditions. See the wiki
<https://github.com/Davidyz/VectorCode/wiki/Tips-and-Tricks#git-hooks> for an
example to use it with git hooks.

If you’re working with nested repos, you can pass `--recursive`/`-r` so that
the `vectorise` command will honour the `.gitignore`s and `vectorcode.exclude`s
in the nested repos.


MAKING A QUERY ~

Expand Down
4 changes: 4 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,10 @@ on certain conditions. See
[the wiki](https://github.com/Davidyz/VectorCode/wiki/Tips-and-Tricks#git-hooks)
for an example to use it with git hooks.

If you're working with nested repos, you can pass `--recursive`/`-r` so that
the `vectorise` command will honour the `.gitignore`s and `vectorcode.exclude`s
in the nested repos.

### Making a Query

To retrieve a list of documents from the database, you can use the following command:
Expand Down
58 changes: 57 additions & 1 deletion src/vectorcode/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from datetime import datetime
from enum import Enum, StrEnum
from pathlib import Path
from typing import Any, Optional, Sequence, Union
from typing import Any, Generator, Iterable, Optional, Sequence, Union

import json5
import shtab
from filelock import AsyncFileLock
from pathspec import GitIgnoreSpec

from vectorcode import __version__

Expand Down Expand Up @@ -671,3 +672,58 @@ def get_lock(self, path: str | os.PathLike) -> AsyncFileLock:
if self.__locks.get(path) is None:
self.__locks[path] = AsyncFileLock(path) # pyright: ignore[reportArgumentType]
return self.__locks[path]


class SpecResolver:
"""
This class is a wrapper around filespec that makes it easier to work with file specs that are not in cwd.
"""

@classmethod
def from_path(cls, spec_path: str, project_root: Optional[str] = None):
"""
Automatically determine the appropriate `base_dir` for resolving file specs that are outside of the project root.
Only supports `.gitignore` and `.vectorcode/vectorcode.{include,exclude}`.
Raises `ValueError` if the spec path is not one of them.
"""
base_dir = "."
if spec_path.endswith(".gitignore"):
base_dir = spec_path.replace(".gitignore", "")
else:
path_obj = Path(spec_path)
if path_obj.name in {"vectorcode.include", "vectorcode.exclude"}:
if path_obj.parent.name == ".vectorcode":
# project config
base_dir = str(path_obj.parent.parent)
else:
# assume to be global config
base_dir = project_root or "."
else: # pragma: nocover
raise ValueError(f"Unsupported spec path: {spec_path}")
return cls(spec_path, base_dir)

def __init__(self, spec: str | GitIgnoreSpec, base_dir: str = "."):
if isinstance(spec, str):
with open(spec) as fin:
self.spec = GitIgnoreSpec.from_lines(
(i.strip() for i in fin.readlines())
)
else:
self.spec = spec
self.base_dir = base_dir

def match(
self, paths: Iterable[str], negated: bool = False
) -> Generator[str, None, None]:
# get paths relative to `base_dir`

base = Path(self.base_dir).resolve()
for p in paths:
if base in Path(p).resolve().parents:
should_yield = self.spec.match_file(os.path.relpath(p, self.base_dir))
if negated:
should_yield = not should_yield
if should_yield:
yield p
else:
yield p
36 changes: 20 additions & 16 deletions src/vectorcode/subcommands/vectorise.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import glob
import hashlib
import json
import logging
Expand All @@ -20,6 +21,7 @@
GLOBAL_EXCLUDE_SPEC,
GLOBAL_INCLUDE_SPEC,
Config,
SpecResolver,
expand_globs,
expand_path,
)
Expand Down Expand Up @@ -187,22 +189,13 @@ def show_stats(configs: Config, stats: VectoriseStats):


def exclude_paths_by_spec(
paths: Iterable[str], specs: pathspec.PathSpec | str
paths: Iterable[str], spec_path: str, project_root: Optional[str] = None
) -> list[str]:
"""
Files matched by the specs will be excluded.
"""
if isinstance(specs, str):
with open(specs) as fin:
specs = pathspec.GitIgnoreSpec.from_lines(fin.readlines())
return [path for path in paths if not specs.match_file(path)]


def include_paths_by_spec(paths: Iterable[str], specs: pathspec.PathSpec) -> list[str]:
"""
Only include paths matched by the specs.
"""
return [path for path in paths if specs.match_file(path)]
return list(SpecResolver.from_path(spec_path, project_root).match(paths, True))


def load_files_from_include(project_root: str) -> list[str]:
Expand Down Expand Up @@ -235,17 +228,25 @@ def find_exclude_specs(configs: Config) -> list[str]:
Load a list of paths to exclude specs.
Can be `.gitignore` or local/global `vectorcode.exclude`
"""
gitignore_path = os.path.join(str(configs.project_root), ".gitignore")
specs = [
gitignore_path,
]
if configs.recursive:
specs = glob.glob(
os.path.join(str(configs.project_root), "**", ".gitignore"), recursive=True
) + glob.glob(
os.path.join(str(configs.project_root), "**", "vectorcode.exclude"),
recursive=True,
)
else:
specs = [os.path.join(str(configs.project_root), ".gitignore")]

exclude_spec_path = os.path.join(
str(configs.project_root), ".vectorcode", "vectorcode.exclude"
)
if os.path.isfile(exclude_spec_path):
specs.append(exclude_spec_path)
elif os.path.isfile(GLOBAL_EXCLUDE_SPEC):
specs.append(GLOBAL_EXCLUDE_SPEC)
specs = [i for i in specs if os.path.isfile(i)]
logger.debug(f"Loaded exclude specs: {specs}")
return specs


Expand All @@ -272,7 +273,10 @@ async def vectorise(configs: Config) -> int:
for spec_path in find_exclude_specs(configs):
if os.path.isfile(spec_path):
logger.info(f"Loading ignore specs from {spec_path}.")
files = exclude_paths_by_spec((str(i) for i in files), spec_path)
files = exclude_paths_by_spec(
(str(i) for i in files), spec_path, str(configs.project_root)
)
logger.debug(f"Files after excluding: {files}")
else: # pragma: nocover
logger.info("Ignoring exclude specs.")

Expand Down
Loading
Loading