diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index 2bd4ae33..5947fca0 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -796,7 +796,10 @@ If you want to integrate VectorCode in your LLM application, you may want to write some prompt that tells the LLM how to use this tool. Apart from the function signatures, a list of instructions used by the MCP server is included in this package. This can be retrieved by running the `vectorcode prompts` -command. +command. This commands optionally accepts names of other subcommands as +arguments. It’ll print a list of pre-defined prompts that are suitable for +the specified subcommands. You may run `vectorcode prompts --help` for the +supported options. Generated by panvimdoc diff --git a/docs/cli.md b/docs/cli.md index 3b9b2b75..71397191 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -719,3 +719,6 @@ If you want to integrate VectorCode in your LLM application, you may want to write some prompt that tells the LLM how to use this tool. Apart from the function signatures, a list of instructions used by the MCP server is included in this package. This can be retrieved by running the `vectorcode prompts` command. +This commands optionally accepts names of other subcommands as arguments. It'll +print a list of pre-defined prompts that are suitable for the specified +subcommands. You may run `vectorcode prompts --help` for the supported options. diff --git a/lua/vectorcode/init.lua b/lua/vectorcode/init.lua index a92d88e1..ba583889 100644 --- a/lua/vectorcode/init.lua +++ b/lua/vectorcode/init.lua @@ -201,9 +201,19 @@ function M.check(check_item, stdout_cb) return return_code == 0 end +---@alias prompt_type "ls"|"query"|"vectorise" +---@param item prompt_type|prompt_type[]|nil ---@return string[] -M.prompts = vc_config.check_cli_wrap(function() - local result, error = jobrunner.run({ "prompts", "-p" }, -1, 0) +M.prompts = vc_config.check_cli_wrap(function(item) + local args = { "prompts", "-p" } + if item then + if type(item) == "string" then + table.insert(args, item) + else + vim.list_extend(args, item) + end + end + local result, error = jobrunner.run(args, -1, 0) if result == nil or vim.tbl_isempty(result) then logger.warn(vim.inspect(error)) if vc_config.get_user_config().notify then diff --git a/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua b/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua index 8442e3cf..f6963ac0 100644 --- a/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua @@ -207,7 +207,7 @@ return check_cli_wrap(function(opts) guidelines, vim.tbl_map(function(line) return " - " .. line - end, require("vectorcode").prompts()) + end, require("vectorcode").prompts({ "query", "ls" })) ) if opts.ls_on_start then job_runner = cc_common.initialise_runner(opts.use_lsp) diff --git a/src/vectorcode/cli_utils.py b/src/vectorcode/cli_utils.py index 82ecdadf..5fa23b3c 100644 --- a/src/vectorcode/cli_utils.py +++ b/src/vectorcode/cli_utils.py @@ -47,6 +47,12 @@ def to_header(self) -> str: return f"{self.value.capitalize()}: " +class PromptCategory(StrEnum): + query = "query" + vectorise = "vectorise" + ls = "ls" + + class CliAction(Enum): vectorise = "vectorise" query = "query" @@ -96,6 +102,7 @@ class Config: filetype_map: dict[str, list[str]] = field(default_factory=dict) encoding: str = "utf8" hooks: bool = False + prompt_categories: Optional[list[str]] = None @classmethod async def import_from(cls, config_dict: dict[str, Any]) -> "Config": @@ -344,11 +351,19 @@ def get_cli_parser(): help="Remove empty collections in the database.", ) - subparsers.add_parser( + prompts_parser = subparsers.add_parser( "prompts", parents=[shared_parser], help="Print a list of guidelines intended to be used as system prompts for an LLM.", ) + prompts_parser.add_argument( + "prompt_categories", + choices=PromptCategory, + type=PromptCategory, + nargs="*", + help="The subcommand(s) to get the prompts for. When not provided, VectorCode will print the prompts for `query`.", + default=None, + ) chunks_parser = subparsers.add_parser( "chunks", @@ -400,6 +415,8 @@ async def parse_cli_args(args: Optional[Sequence[str]] = None): configs_items["chunk_size"] = main_args.chunk_size configs_items["overlap_ratio"] = main_args.overlap configs_items["encoding"] = main_args.encoding + case "prompts": + configs_items["prompt_categories"] = main_args.prompt_categories return Config(**configs_items) diff --git a/src/vectorcode/mcp_main.py b/src/vectorcode/mcp_main.py index 13a8defb..d9df8645 100644 --- a/src/vectorcode/mcp_main.py +++ b/src/vectorcode/mcp_main.py @@ -31,7 +31,7 @@ load_config_file, ) from vectorcode.common import get_client, get_collection, get_collections -from vectorcode.subcommands.prompt import prompt_strings +from vectorcode.subcommands.prompt import prompt_by_categories from vectorcode.subcommands.query import get_query_result_files logger = logging.getLogger(name=__name__) @@ -167,7 +167,9 @@ async def mcp_server(): except InvalidCollectionException: # pragma: nocover default_collection = None - default_instructions = "\n".join(prompt_strings) + default_instructions = "\n".join( + "\n".join(i) for i in prompt_by_categories.values() + ) if default_client is None: if mcp_config.ls_on_start: # pragma: nocover logger.warning( diff --git a/src/vectorcode/subcommands/prompt.py b/src/vectorcode/subcommands/prompt.py index aeea2211..fa2a08d9 100644 --- a/src/vectorcode/subcommands/prompt.py +++ b/src/vectorcode/subcommands/prompt.py @@ -1,28 +1,48 @@ import json +import logging -from vectorcode.cli_utils import Config +from vectorcode.cli_utils import Config, PromptCategory -prompt_strings = [ - "**Use at your discretion** when you feel you don't have enough information about the repository or project", - "**Don't escape** special characters", - "separate phrases into distinct keywords when appropriate", - "If a class, type or function has been imported from another file, this tool may be able to find its source. Add the name of the imported symbol to the query", - "Avoid retrieving one single file because the retrieval mechanism may not be very accurate", - "When providing answers based on VectorCode results, try to give references such as paths to files and line ranges, unless you're told otherwise (but do not include the full source code context)", - "VectorCode is the name of this tool. Do not include it in the query unless the user explicitly asks", - "If the retrieval results do not contain the needed context, increase the file count so that the result will more likely contain the desired files", - "If the returned paths are relative, they are relative to the root of the project directory", - "Do not suggest edits to retrieved files that are outside of the current working directory, unless the user instructed otherwise", - "When specifying the `project_root` parameter when making a query, make sure you run the `ls` tool first to retrieve a list of valid, indexed projects", - "If a query failed to retrieve desired results, a new attempt should use different keywords that are orthogonal to the previous ones but with similar meanings", - "Do not use exact query keywords that you have used in a previous tool call in the conversation, unless the user instructed otherwise, or with different count/project_root", -] +logger = logging.getLogger(name=__name__) + +prompt_by_categories: dict[str | PromptCategory, list[str]] = { + PromptCategory.query: [ + "separate phrases into distinct keywords when appropriate", + "If a class, type or function has been imported from another file, this tool may be able to find its source. Add the name of the imported symbol to the query", + "When providing answers based on VectorCode results, try to give references such as paths to files and line ranges, unless you're told otherwise (but do not include the full source code context)", + "Avoid retrieving one single file because the retrieval mechanism may not be very accurate", + "If the query results do not contain the needed context, increase the file count so that the result will more likely contain the desired files", + "If the returned paths are relative, they are relative to the root of the project directory", + "Do not suggest edits to retrieved files that are outside of the current working directory, unless the user instructed otherwise", + "When specifying the `project_root` parameter when making a query, make sure you run the `ls` tool first to retrieve a list of valid, indexed projects", + "If a query failed to retrieve desired results, a new attempt should use different keywords that are orthogonal to the previous ones but with similar meanings", + "Do not use exact query keywords that you have used in a previous tool call in the conversation, unless the user instructed otherwise, or with different count/project_root", + "Include related keywords as the search query. For example, when querying for `function`, include `return value`, `parameter`, `arguments` and alike.", + ], + PromptCategory.ls: [ + "Use `ls` tool to obtain a list of indexed projects that are available to be queried by the `query` command." + ], + PromptCategory.vectorise: [ + "When vectorising the files, provide accurate and case-sensitive paths to the file" + ], + "general": [ + "VectorCode is the name of this tool. Do not include it in the query unless the user explicitly asks", + "**Use at your discretion** when you feel you don't have enough information about the repository or project", + "**Don't escape** special characters", + ], +} +prompt_strings = [] def prompts(configs: Config) -> int: + results = prompt_by_categories["general"].copy() + for item in sorted(set(configs.prompt_categories or [PromptCategory.query])): + logger.info(f"Loading {len(prompt_by_categories[item])} prompts for {item}") + results.extend(prompt_by_categories[item]) + results.sort() if configs.pipe: - print(json.dumps(prompt_strings)) + print(json.dumps(results)) else: - for i in prompt_strings: + for i in results: print(f"- {i}") return 0 diff --git a/tests/subcommands/test_prompts.py b/tests/subcommands/test_prompts.py index e4885285..7b107d42 100644 --- a/tests/subcommands/test_prompts.py +++ b/tests/subcommands/test_prompts.py @@ -2,12 +2,12 @@ import json import sys -from vectorcode.cli_utils import Config +from vectorcode.cli_utils import Config, PromptCategory from vectorcode.subcommands import prompt def test_prompts_pipe_true(): - configs = Config(pipe=True) + configs = Config(pipe=True, prompt_categories=PromptCategory) # Mock stdout captured_output = io.StringIO() @@ -17,13 +17,15 @@ def test_prompts_pipe_true(): sys.stdout = sys.__stdout__ # Reset stdout - expected_output = json.dumps(prompt.prompt_strings) + "\n" + expected_output = ( + json.dumps(sorted(sum(prompt.prompt_by_categories.values(), start=[]))) + "\n" + ) assert captured_output.getvalue() == expected_output assert return_code == 0 def test_prompts_pipe_false(): - configs = Config(pipe=False) + configs = Config(pipe=False, prompt_categories=PromptCategory) # Mock stdout captured_output = io.StringIO() @@ -34,7 +36,7 @@ def test_prompts_pipe_false(): sys.stdout = sys.__stdout__ # Reset stdout expected_output = "" - for i in prompt.prompt_strings: + for i in sorted(sum(prompt.prompt_by_categories.values(), start=[])): expected_output += f"- {i}\n" assert captured_output.getvalue() == expected_output diff --git a/tests/test_cli_utils.py b/tests/test_cli_utils.py index beff168b..6f6f09f7 100644 --- a/tests/test_cli_utils.py +++ b/tests/test_cli_utils.py @@ -9,6 +9,7 @@ from vectorcode.cli_utils import ( CliAction, Config, + PromptCategory, QueryInclude, cleanup_path, expand_envs_in_dict, @@ -482,6 +483,14 @@ async def test_parse_cli_args_init(): assert config.action == CliAction.init +@pytest.mark.asyncio +async def test_parse_cli_args_prompts(): + with patch("sys.argv", ["vectorcode", "prompts", "ls"]): + config = await parse_cli_args() + assert config.action == CliAction.prompts + assert config.prompt_categories == [PromptCategory.ls] + + @pytest.mark.asyncio async def test_parse_cli_args_chunks(): with patch(