From ff02f57d0b74a8be90626d5d3f906624b45e10e7 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 9 Jun 2025 19:44:57 +0800 Subject: [PATCH 01/11] refactor(nvim): move `VectorCode.Result` processing into a function. --- .../integrations/codecompanion/common.lua | 25 ++ .../codecompanion/func_calling_tool.lua | 329 ++++++++++++++++++ lua/vectorcode/types.lua | 1 + 3 files changed, 355 insertions(+) create mode 100644 lua/vectorcode/integrations/codecompanion/func_calling_tool.lua diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 987bfcb5..080984f9 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -2,8 +2,16 @@ local job_runner local vc_config = require("vectorcode.config") +local cc_config = require("codecompanion.config").config local notify_opts = vc_config.notify_opts local logger = vc_config.logger +local http_client = require("codecompanion.http") + +---@class VectorCode.CodeCompanion.SummariseOpts +---@field enabled boolean? +---@field adapter string|CodeCompanion.Adapter|nil +---@field threshold integer? +---@field system_prompt string ---@type VectorCode.CodeCompanion.QueryToolOpts local default_query_options = { @@ -11,6 +19,22 @@ local default_query_options = { default_num = { chunk = 50, document = 10 }, no_duplicate = true, chunk_mode = false, + summarise = { + enabled = false, + system_prompt = [[ +You are an experienced code analyser. +Your task is to write summaries of source code that are informative and concise. +The summary will serve as a source of information for others to quickly understand how the code works and how to work with the code without going through the source code +Your summary should include the following information: +- variables, functions, classes and other objects that are importable/includeable by other programs +- for a function or method, include its signature and high-level implementation details. For example, + - when summarising a sorting function, include the sorting algorithm, the parameter types and return types + - when summarising a function that makes an http request, include the network library used by the function, the parameter types and return types +- if the code contains syntax or semantics errors, include them as well. +- for anything that you quote from the source code, include the line numbers from which you're quote them. +- DO NOT include local variables and functions that are not accessible by other functions. +]], + }, } ---@type VectorCode.CodeCompanion.LsToolOpts @@ -23,6 +47,7 @@ local TOOL_RESULT_SOURCE = "VectorCodeToolResult" return { tool_result_source = TOOL_RESULT_SOURCE, + ---@param t table|string ---@return string flatten_table_to_string = function(t) diff --git a/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua b/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua new file mode 100644 index 00000000..451a3a70 --- /dev/null +++ b/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua @@ -0,0 +1,329 @@ +---@module "codecompanion" + +local cc_common = require("vectorcode.integrations.codecompanion.common") +local vc_config = require("vectorcode.config") +local check_cli_wrap = vc_config.check_cli_wrap +local logger = vc_config.logger + +local job_runner = nil + +---@param opts VectorCode.CodeCompanion.ToolOpts? +---@return CodeCompanion.Agent.Tool +return check_cli_wrap(function(opts) + opts = cc_common.get_tool_opts(opts) + assert( + type(opts.max_num) == "number" and type(opts.default_num) == "number", + string.format("Options are not correctly formatted:%s", vim.inspect(opts)) + ) + ---@type "file"|"chunk" + local mode + if opts.chunk_mode then + mode = "chunk" + else + mode = "file" + end + logger.info("Creating CodeCompanion tool with the following args:\n", opts) + local capping_message = "" + if opts.max_num > 0 then + capping_message = (" - Request for at most %d documents"):format(opts.max_num) + end + + return { + name = "vectorcode", + cmds = { + ---@param agent CodeCompanion.Agent + ---@param action table + ---@return nil|{ status: string, msg: string } + function(agent, action, _, cb) + logger.info("CodeCompanion tool called with the following arguments:\n", action) + job_runner = cc_common.initialise_runner(opts.use_lsp) + assert(job_runner ~= nil, "Jobrunner not initialised!") + assert( + type(cb) == "function", + "Please upgrade CodeCompanion.nvim to at least 13.5.0" + ) + if not (vim.list_contains({ "ls", "query" }, action.command)) then + if action.options.query ~= nil then + action.command = "query" + else + return { + status = "error", + data = "Need to specify the command (`ls` or `query`).", + } + end + end + + if action.command == "query" then + if action.options.query == nil then + return { + status = "error", + data = "Missing argument: option.query, please refine the tool argument.", + } + end + if type(action.options.query) == "string" then + action.options.query = { action.options.query } + end + local args = { "query" } + vim.list_extend(args, action.options.query) + vim.list_extend(args, { "--pipe", "-n", tostring(action.options.count) }) + if opts.chunk_mode then + vim.list_extend(args, { "--include", "path", "chunk" }) + else + vim.list_extend(args, { "--include", "path", "document" }) + end + if action.options.project_root == "" then + action.options.project_root = nil + end + if action.options.project_root ~= nil then + action.options.project_root = vim.fs.normalize(action.options.project_root) + if + vim.uv.fs_stat(action.options.project_root) ~= nil + and vim.uv.fs_stat(action.options.project_root).type == "directory" + then + vim.list_extend(args, { "--project_root", action.options.project_root }) + else + return { + status = "error", + data = "INVALID PROJECT ROOT! USE THE LS COMMAND!", + } + end + end + + if opts.no_duplicate and agent.chat.refs ~= nil then + -- exclude files that has been added to the context + local existing_files = { "--exclude" } + for _, ref in pairs(agent.chat.refs) do + if ref.source == cc_common.tool_result_source then + table.insert(existing_files, ref.id) + elseif type(ref.path) == "string" then + table.insert(existing_files, ref.path) + elseif ref.bufnr then + local fname = vim.api.nvim_buf_get_name(ref.bufnr) + if fname ~= nil then + local stat = vim.uv.fs_stat(fname) + if stat and stat.type == "file" then + table.insert(existing_files, fname) + end + end + end + end + if #existing_files > 1 then + vim.list_extend(args, existing_files) + end + end + vim.list_extend(args, { "--absolute" }) + logger.info( + "CodeCompanion query tool called the runner with the following args: ", + args + ) + + job_runner.run_async(args, function(result, error) + if vim.islist(result) and #result > 0 and result[1].path ~= nil then ---@cast result VectorCode.Result[] + cb({ status = "success", data = result }) + else + if type(error) == "table" then + error = cc_common.flatten_table_to_string(error) + end + cb({ + status = "error", + data = error, + }) + end + end, agent.chat.bufnr) + elseif action.command == "ls" then + job_runner.run_async({ "ls", "--pipe" }, function(result, error) + if vim.islist(result) and #result > 0 then + cb({ status = "success", data = result }) + else + if type(error) == "table" then + error = cc_common.flatten_table_to_string(error) + end + cb({ + status = "error", + data = error, + }) + end + end, agent.chat.bufnr) + end + end, + }, + schema = { + type = "function", + ["function"] = { + name = "vectorcode", + description = "Retrieves code documents using semantic search or lists indexed projects", + parameters = { + type = "object", + properties = { + command = { + type = "string", + enum = { "query", "ls" }, + description = "Action to perform: 'query' for semantic search or 'ls' to list projects", + }, + options = { + type = "object", + properties = { + query = { + type = "array", + items = { type = "string" }, + description = "Query messages used for the search.", + }, + count = { + type = "integer", + description = "Number of documents to retrieve, must be positive", + }, + project_root = { + type = "string", + description = "Project path to search within (must be from 'ls' results). Use empty string for the current project.", + }, + }, + required = { "query", "count", "project_root" }, + additionalProperties = false, + }, + }, + required = { "command", "options" }, + additionalProperties = false, + }, + strict = true, + }, + }, + system_prompt = function() + local guidelines = { + " - The path of a retrieved file will be wrapped in `` and `` tags. Its content will be right after the `` tag, wrapped by `` and `` tags. Do not include the ```` tags in your answers when you mention the paths.", + " - The results may also be chunks of the source code. In this case, the text chunks will be wrapped in . If the starting and ending line ranges are available, they will be wrapped in and tags. Make use of the line numbers (NOT THE XML TAGS) when you're quoting the source code.", + " - If you used the tool, tell users that they may need to wait for the results and there will be a virtual text indicator showing the tool is still running", + " - Include one single command call for VectorCode each time. You may include multiple keywords in the command", + " - VectorCode is the name of this tool. Do not include it in the query unless the user explicitly asks", + " - Use the `ls` command to retrieve a list of indexed project and pick one that may be relevant, unless the user explicitly mentioned 'this project' (or in other equivalent expressions)", + " - **The project root option MUST be a valid path on the filesystem. It can only be one of the results from the `ls` command or from user input**", + capping_message, + (" - If the user did not specify how many documents to retrieve, **start with %d documents**"):format( + opts.default_num + ), + " - If you decide to call VectorCode tool, do not start answering the question until you have the results. Provide answers based on the results and let the user decide whether to run the tool again", + } + vim.list_extend( + guidelines, + vim.tbl_map(function(line) + return " - " .. line + end, require("vectorcode").prompts({ "query", "ls" })) + ) + if opts.ls_on_start then + job_runner = cc_common.initialise_runner(opts.use_lsp) + if job_runner ~= nil then + local projects = job_runner.run({ "ls", "--pipe" }, -1, 0) + if vim.islist(projects) and #projects > 0 then + vim.list_extend(guidelines, { + " - The following projects are indexed by VectorCode and are available for you to search in:", + }) + vim.list_extend( + guidelines, + vim.tbl_map(function(s) + return string.format(" - %s", s["project-root"]) + end, projects) + ) + end + end + end + local root = vim.fs.root(0, { ".vectorcode", ".git" }) + if root ~= nil then + vim.list_extend(guidelines, { + string.format( + " - The current working directory is %s. Assume the user query is about this project, unless the user asked otherwise or queries from the current project fails to return useful results.", + root + ), + }) + end + return string.format( + [[### VectorCode, a repository indexing and query tool. + +1. **Purpose**: This gives you the ability to access the repository to find information that you may need to assist the user. + +2. **Key Points**: +%s +]], + table.concat(guidelines, "\n") + ) + end, + output = { + ---@param agent CodeCompanion.Agent + ---@param cmd table + ---@param stderr table|string + error = function(self, agent, cmd, stderr) + logger.error( + ("CodeCompanion tool with command %s thrown with the following error: %s"):format( + vim.inspect(cmd), + vim.inspect(stderr) + ) + ) + stderr = cc_common.flatten_table_to_string(stderr) + agent.chat:add_tool_output( + self, + string.format("**VectorCode Tool**: Failed with error:\n```\n%s\n```", stderr) + ) + end, + ---@param agent CodeCompanion.Agent + ---@param cmd table + ---@param stdout table + success = function(self, agent, cmd, stdout) + stdout = stdout[1] + logger.info( + ("CodeCompanion tool with command %s finished."):format(vim.inspect(cmd)) + ) + local user_message + if cmd.command == "query" then + local max_result = #stdout + if opts.max_num > 0 then + max_result = math.min(opts.max_num or 1, max_result) + end + for i, file in pairs(stdout) do + if i <= max_result then + if i == 1 then + user_message = string.format( + "**VectorCode Tool**: Retrieved %d %s(s)", + max_result, + mode + ) + if cmd.options.project_root then + user_message = user_message .. " from " .. cmd.options.project_root + end + user_message = user_message .. "\n" + else + user_message = "" + end + agent.chat:add_tool_output( + self, + cc_common.process_result(file), + user_message + ) + if not opts.chunk_mode then + -- skip referencing because there will be multiple chunks with the same path (id). + -- TODO: figure out a way to deduplicate. + agent.chat.references:add({ + source = cc_common.tool_result_source, + id = file.path, + path = file.path, + opts = { visible = false }, + }) + end + end + end + elseif cmd.command == "ls" then + for i, col in pairs(stdout) do + if i == 1 then + user_message = + string.format("Fetched %s indexed project from VectorCode.", #stdout) + else + user_message = "" + end + agent.chat:add_tool_output( + self, + string.format("%s", col["project-root"]), + user_message + ) + end + end + end, + }, + } +end) diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index 779493bf..543648fa 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -93,6 +93,7 @@ --- Whether to send chunks instead of full files to the LLM. Default: `false` --- > Make sure you adjust `max_num` and `default_num` accordingly. ---@field chunk_mode boolean? +---@field summarise VectorCode.CodeCompanion.SummariseOpts? ---@class VectorCode.CodeCompanion.VectoriseToolOpts: VectorCode.CodeCompanion.ToolOpts From 7316364fd15c9ce900a167cda8efb2409a5cbe79 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 10 Jun 2025 15:50:52 +0800 Subject: [PATCH 02/11] feat(nvim): add file summarization to codecompanion tool --- .../integrations/codecompanion/common.lua | 173 ++++++++- .../codecompanion/func_calling_tool.lua | 329 ------------------ lua/vectorcode/types.lua | 3 + 3 files changed, 163 insertions(+), 342 deletions(-) delete mode 100644 lua/vectorcode/integrations/codecompanion/func_calling_tool.lua diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 080984f9..27775f61 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -3,6 +3,7 @@ local job_runner local vc_config = require("vectorcode.config") local cc_config = require("codecompanion.config").config +local cc_schema = require("codecompanion.schema") local notify_opts = vc_config.notify_opts local logger = vc_config.logger local http_client = require("codecompanion.http") @@ -12,6 +13,58 @@ local http_client = require("codecompanion.http") ---@field adapter string|CodeCompanion.Adapter|nil ---@field threshold integer? ---@field system_prompt string +---@field timeout integer + +--- Suspends the current coroutine until a given amount of time has passed +--- or an interrupt condition is met. +--- +--- @param ms number The maximum number of milliseconds to sleep. +--- @param interrupt_fn? fun(): boolean An optional function that is checked periodically. +--- If it returns true, the sleep is interrupted. +--- @return boolean interrupted True if the sleep was interrupted, false otherwise. +local function sleep(ms, interrupt_fn) + local co = coroutine.running() + if not co then + error("sleep() must be called from within a coroutine.", 2) + end + + local main_timer = vim.uv.new_timer() + local check_timer + + local function cleanup() + main_timer:stop() + main_timer:close() + if check_timer then + check_timer:stop() + check_timer:close() + end + end + + local function resume(interrupted) + if co and coroutine.status(co) == "suspended" then + vim.schedule(function() + coroutine.resume(co, interrupted) + end) + end + end + + main_timer:start(ms, 0, function() + cleanup() + resume(false) + end) + + if interrupt_fn then + check_timer = vim.uv.new_timer() + check_timer:start(0, 10, function() + if interrupt_fn() then + cleanup() + resume(true) + end + end) + end + + return coroutine.yield() +end ---@type VectorCode.CodeCompanion.QueryToolOpts local default_query_options = { @@ -21,18 +74,50 @@ local default_query_options = { chunk_mode = false, summarise = { enabled = false, - system_prompt = [[ -You are an experienced code analyser. -Your task is to write summaries of source code that are informative and concise. -The summary will serve as a source of information for others to quickly understand how the code works and how to work with the code without going through the source code -Your summary should include the following information: -- variables, functions, classes and other objects that are importable/includeable by other programs -- for a function or method, include its signature and high-level implementation details. For example, - - when summarising a sorting function, include the sorting algorithm, the parameter types and return types - - when summarising a function that makes an http request, include the network library used by the function, the parameter types and return types -- if the code contains syntax or semantics errors, include them as well. -- for anything that you quote from the source code, include the line numbers from which you're quote them. -- DO NOT include local variables and functions that are not accessible by other functions. + timeout = 5000, + system_prompt = [[You are an expert and experienced code analyzer and summarizer. Your primary task is to analyze provided source code and generate a comprehensive, well-structured Markdown summary. This summary will serve as a concise source of information for others to quickly understand how the code works and how to interact with it, without needing to delve into the full source code. Adhere strictly to the following formatting and content guidelines: + +Markdown Structure: + + Top-Level Header (#): The absolute file path of the source code. + + Secondary Headers (##): For each top-level symbol (e.g., functions, classes, global variables) defined directly within the source code file that are importable or includable by other programs. + + Tertiary Headers (###): For symbols nested one level deep within a secondary header's symbol (e.g., methods within a class, inner functions). + + Quaternary Headers (####): For symbols nested two levels deep (e.g., a function defined within a method of a class). + + Continue this pattern, incrementing the header level for each deeper level of nesting. + +Content for Each Section: + + Descriptive Summary: Each header section (from secondary headers downwards) must contain a concise and informative summary of the symbol defined by that header. + + For Functions/Methods: Explain their purpose, parameters (including types), return values (including types), high-level implementation details, and any significant side effects or core logic. For example, if summarizing a sorting function, include the sorting algorithm used. If summarizing a function that makes an HTTP request, mention the network library employed. + + For Classes: Describe the class's role, its main responsibilities, and key characteristics. + + For Variables (global or within scope): State their purpose, type (if discernible), and initial value or common usage. + + For Modules/Files (under the top-level header): Provide an overall description of the file's purpose, its main components, and its role within the larger project (if context is available). + +General Guidelines: + + Clarity and Conciseness: Summaries should be easy to understand, avoiding jargon where possible, and as brief as possible while retaining essential information. + + Accuracy: Ensure the summary accurately reflects the code's functionality. + + Focus on Public Interface/Behavior: Prioritize describing what a function/class does and how it's used. Only include details about symbols (variables, functions, classes) that are importable/includable by other programs. DO NOT include local variables and functions that are not accessible by other functions outside their immediate scope. + + No Code Snippets: Do not include any actual code snippets in the summary. Focus solely on descriptive text. If you need to refer to a specific element for context (e.g., in an error description), describe it and provide line numbers for reference from the source code. + + Syntax/Semantic Errors: If the code contains syntax or semantic errors, describe them clearly within the summary, indicating the nature of the error. + + Language Agnostic: Adapt the summary to the specific programming language of the provided source code (e.g., Python, JavaScript, Java, C++, etc.). + + Handle Edge Cases/Dependencies: If a symbol relies heavily on external dependencies or handles specific edge cases, briefly mention these if they are significant to its overall function. + + Information Source: There will be no extra information available to you. Provide the summary solely based on the provided file. ]], }, } @@ -45,6 +130,24 @@ local default_vectorise_options = {} local TOOL_RESULT_SOURCE = "VectorCodeToolResult" +---@alias ChatMessage {role: string, content:string} + +---@param adapter CodeCompanion.Adapter +---@param system_prompt string +---@param user_messages string|string[] +---@return {messages: ChatMessage[], tools:table?} +local function make_oneshot_payload(adapter, system_prompt, user_messages) + if type(user_messages) == "string" then + user_messages = { user_messages } + end + local messages = + { { role = cc_config.constants.SYSTEM_ROLE, content = system_prompt } } + for _, m in pairs(user_messages) do + table.insert(messages, { role = cc_config.constants.USER_ROLE, content = m }) + end + return { messages = adapter:map_roles(messages) } +end + return { tool_result_source = TOOL_RESULT_SOURCE, @@ -126,9 +229,13 @@ return { end, ---@param result VectorCode.QueryResult + ---@param summarise_opts VectorCode.CodeCompanion.SummariseOpts|{}|nil + ---@param callback fun(summary:string)? ---@return string - process_result = function(result) + process_result = function(result, summarise_opts, callback) + -- TODO: Unify the handling of summarised and non-summarised result local llm_message + if result.chunk then -- chunk mode llm_message = @@ -149,9 +256,49 @@ return { result.document ) end + + summarise_opts = + ---@cast summarise_opts VectorCode.CodeCompanion.SummariseOpts + vim.tbl_deep_extend( + "force", + default_query_options.summarise, + summarise_opts or {} + ) + + if summarise_opts.enabled and type(callback) == "function" then + ---@type CodeCompanion.Adapter + local adapter = + vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) + + local payload = + make_oneshot_payload(adapter, summarise_opts.system_prompt, llm_message) + local settings = + vim.deepcopy(adapter:map_schema_to_params(cc_schema.get_default(adapter))) + settings.opts.stream = false + + ---@type CodeCompanion.Client + local client = http_client.new({ adapter = settings }) + client:request(payload, { + ---@param _adapter CodeCompanion.Adapter + callback = function(err, data, _adapter) + if data then + local res = _adapter.handlers.chat_output(_adapter, data) + if res and res.status == "success" then + local summary = vim.trim(res.output.content or "") + if summary ~= "" then + return callback(summary) + end + end + return callback(llm_message) + end + end, + }, { silent = true }) + end return llm_message end, + async_sleep = sleep, + ---@param use_lsp boolean ---@return VectorCode.JobRunner initialise_runner = function(use_lsp) diff --git a/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua b/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua deleted file mode 100644 index 451a3a70..00000000 --- a/lua/vectorcode/integrations/codecompanion/func_calling_tool.lua +++ /dev/null @@ -1,329 +0,0 @@ ----@module "codecompanion" - -local cc_common = require("vectorcode.integrations.codecompanion.common") -local vc_config = require("vectorcode.config") -local check_cli_wrap = vc_config.check_cli_wrap -local logger = vc_config.logger - -local job_runner = nil - ----@param opts VectorCode.CodeCompanion.ToolOpts? ----@return CodeCompanion.Agent.Tool -return check_cli_wrap(function(opts) - opts = cc_common.get_tool_opts(opts) - assert( - type(opts.max_num) == "number" and type(opts.default_num) == "number", - string.format("Options are not correctly formatted:%s", vim.inspect(opts)) - ) - ---@type "file"|"chunk" - local mode - if opts.chunk_mode then - mode = "chunk" - else - mode = "file" - end - logger.info("Creating CodeCompanion tool with the following args:\n", opts) - local capping_message = "" - if opts.max_num > 0 then - capping_message = (" - Request for at most %d documents"):format(opts.max_num) - end - - return { - name = "vectorcode", - cmds = { - ---@param agent CodeCompanion.Agent - ---@param action table - ---@return nil|{ status: string, msg: string } - function(agent, action, _, cb) - logger.info("CodeCompanion tool called with the following arguments:\n", action) - job_runner = cc_common.initialise_runner(opts.use_lsp) - assert(job_runner ~= nil, "Jobrunner not initialised!") - assert( - type(cb) == "function", - "Please upgrade CodeCompanion.nvim to at least 13.5.0" - ) - if not (vim.list_contains({ "ls", "query" }, action.command)) then - if action.options.query ~= nil then - action.command = "query" - else - return { - status = "error", - data = "Need to specify the command (`ls` or `query`).", - } - end - end - - if action.command == "query" then - if action.options.query == nil then - return { - status = "error", - data = "Missing argument: option.query, please refine the tool argument.", - } - end - if type(action.options.query) == "string" then - action.options.query = { action.options.query } - end - local args = { "query" } - vim.list_extend(args, action.options.query) - vim.list_extend(args, { "--pipe", "-n", tostring(action.options.count) }) - if opts.chunk_mode then - vim.list_extend(args, { "--include", "path", "chunk" }) - else - vim.list_extend(args, { "--include", "path", "document" }) - end - if action.options.project_root == "" then - action.options.project_root = nil - end - if action.options.project_root ~= nil then - action.options.project_root = vim.fs.normalize(action.options.project_root) - if - vim.uv.fs_stat(action.options.project_root) ~= nil - and vim.uv.fs_stat(action.options.project_root).type == "directory" - then - vim.list_extend(args, { "--project_root", action.options.project_root }) - else - return { - status = "error", - data = "INVALID PROJECT ROOT! USE THE LS COMMAND!", - } - end - end - - if opts.no_duplicate and agent.chat.refs ~= nil then - -- exclude files that has been added to the context - local existing_files = { "--exclude" } - for _, ref in pairs(agent.chat.refs) do - if ref.source == cc_common.tool_result_source then - table.insert(existing_files, ref.id) - elseif type(ref.path) == "string" then - table.insert(existing_files, ref.path) - elseif ref.bufnr then - local fname = vim.api.nvim_buf_get_name(ref.bufnr) - if fname ~= nil then - local stat = vim.uv.fs_stat(fname) - if stat and stat.type == "file" then - table.insert(existing_files, fname) - end - end - end - end - if #existing_files > 1 then - vim.list_extend(args, existing_files) - end - end - vim.list_extend(args, { "--absolute" }) - logger.info( - "CodeCompanion query tool called the runner with the following args: ", - args - ) - - job_runner.run_async(args, function(result, error) - if vim.islist(result) and #result > 0 and result[1].path ~= nil then ---@cast result VectorCode.Result[] - cb({ status = "success", data = result }) - else - if type(error) == "table" then - error = cc_common.flatten_table_to_string(error) - end - cb({ - status = "error", - data = error, - }) - end - end, agent.chat.bufnr) - elseif action.command == "ls" then - job_runner.run_async({ "ls", "--pipe" }, function(result, error) - if vim.islist(result) and #result > 0 then - cb({ status = "success", data = result }) - else - if type(error) == "table" then - error = cc_common.flatten_table_to_string(error) - end - cb({ - status = "error", - data = error, - }) - end - end, agent.chat.bufnr) - end - end, - }, - schema = { - type = "function", - ["function"] = { - name = "vectorcode", - description = "Retrieves code documents using semantic search or lists indexed projects", - parameters = { - type = "object", - properties = { - command = { - type = "string", - enum = { "query", "ls" }, - description = "Action to perform: 'query' for semantic search or 'ls' to list projects", - }, - options = { - type = "object", - properties = { - query = { - type = "array", - items = { type = "string" }, - description = "Query messages used for the search.", - }, - count = { - type = "integer", - description = "Number of documents to retrieve, must be positive", - }, - project_root = { - type = "string", - description = "Project path to search within (must be from 'ls' results). Use empty string for the current project.", - }, - }, - required = { "query", "count", "project_root" }, - additionalProperties = false, - }, - }, - required = { "command", "options" }, - additionalProperties = false, - }, - strict = true, - }, - }, - system_prompt = function() - local guidelines = { - " - The path of a retrieved file will be wrapped in `` and `` tags. Its content will be right after the `` tag, wrapped by `` and `` tags. Do not include the ```` tags in your answers when you mention the paths.", - " - The results may also be chunks of the source code. In this case, the text chunks will be wrapped in . If the starting and ending line ranges are available, they will be wrapped in and tags. Make use of the line numbers (NOT THE XML TAGS) when you're quoting the source code.", - " - If you used the tool, tell users that they may need to wait for the results and there will be a virtual text indicator showing the tool is still running", - " - Include one single command call for VectorCode each time. You may include multiple keywords in the command", - " - VectorCode is the name of this tool. Do not include it in the query unless the user explicitly asks", - " - Use the `ls` command to retrieve a list of indexed project and pick one that may be relevant, unless the user explicitly mentioned 'this project' (or in other equivalent expressions)", - " - **The project root option MUST be a valid path on the filesystem. It can only be one of the results from the `ls` command or from user input**", - capping_message, - (" - If the user did not specify how many documents to retrieve, **start with %d documents**"):format( - opts.default_num - ), - " - If you decide to call VectorCode tool, do not start answering the question until you have the results. Provide answers based on the results and let the user decide whether to run the tool again", - } - vim.list_extend( - guidelines, - vim.tbl_map(function(line) - return " - " .. line - end, require("vectorcode").prompts({ "query", "ls" })) - ) - if opts.ls_on_start then - job_runner = cc_common.initialise_runner(opts.use_lsp) - if job_runner ~= nil then - local projects = job_runner.run({ "ls", "--pipe" }, -1, 0) - if vim.islist(projects) and #projects > 0 then - vim.list_extend(guidelines, { - " - The following projects are indexed by VectorCode and are available for you to search in:", - }) - vim.list_extend( - guidelines, - vim.tbl_map(function(s) - return string.format(" - %s", s["project-root"]) - end, projects) - ) - end - end - end - local root = vim.fs.root(0, { ".vectorcode", ".git" }) - if root ~= nil then - vim.list_extend(guidelines, { - string.format( - " - The current working directory is %s. Assume the user query is about this project, unless the user asked otherwise or queries from the current project fails to return useful results.", - root - ), - }) - end - return string.format( - [[### VectorCode, a repository indexing and query tool. - -1. **Purpose**: This gives you the ability to access the repository to find information that you may need to assist the user. - -2. **Key Points**: -%s -]], - table.concat(guidelines, "\n") - ) - end, - output = { - ---@param agent CodeCompanion.Agent - ---@param cmd table - ---@param stderr table|string - error = function(self, agent, cmd, stderr) - logger.error( - ("CodeCompanion tool with command %s thrown with the following error: %s"):format( - vim.inspect(cmd), - vim.inspect(stderr) - ) - ) - stderr = cc_common.flatten_table_to_string(stderr) - agent.chat:add_tool_output( - self, - string.format("**VectorCode Tool**: Failed with error:\n```\n%s\n```", stderr) - ) - end, - ---@param agent CodeCompanion.Agent - ---@param cmd table - ---@param stdout table - success = function(self, agent, cmd, stdout) - stdout = stdout[1] - logger.info( - ("CodeCompanion tool with command %s finished."):format(vim.inspect(cmd)) - ) - local user_message - if cmd.command == "query" then - local max_result = #stdout - if opts.max_num > 0 then - max_result = math.min(opts.max_num or 1, max_result) - end - for i, file in pairs(stdout) do - if i <= max_result then - if i == 1 then - user_message = string.format( - "**VectorCode Tool**: Retrieved %d %s(s)", - max_result, - mode - ) - if cmd.options.project_root then - user_message = user_message .. " from " .. cmd.options.project_root - end - user_message = user_message .. "\n" - else - user_message = "" - end - agent.chat:add_tool_output( - self, - cc_common.process_result(file), - user_message - ) - if not opts.chunk_mode then - -- skip referencing because there will be multiple chunks with the same path (id). - -- TODO: figure out a way to deduplicate. - agent.chat.references:add({ - source = cc_common.tool_result_source, - id = file.path, - path = file.path, - opts = { visible = false }, - }) - end - end - end - elseif cmd.command == "ls" then - for i, col in pairs(stdout) do - if i == 1 then - user_message = - string.format("Fetched %s indexed project from VectorCode.", #stdout) - else - user_message = "" - end - agent.chat:add_tool_output( - self, - string.format("%s", col["project-root"]), - user_message - ) - end - end - end, - }, - } -end) diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index 543648fa..230ab846 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -1,3 +1,5 @@ +---@module "codecompanion" + ---Type definition of the retrieval result. ---@class VectorCode.QueryResult ---@field path string Path to the file @@ -6,6 +8,7 @@ ---@field start_line integer? ---@field end_line integer? ---@field chunk_id string? +---@field summary string? ---@class VectorCode.LsResult ---@field project-root string From c007834d66da1f054fc313b4d9529d58f446f60b Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sat, 21 Jun 2025 15:29:56 +0800 Subject: [PATCH 03/11] feat(nvim): Add result summarisation to query tool --- .../integrations/codecompanion/common.lua | 107 ++---------------- .../integrations/codecompanion/query_tool.lua | 83 +++++++++++++- 2 files changed, 90 insertions(+), 100 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 27775f61..7c90a2fe 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -2,11 +2,8 @@ local job_runner local vc_config = require("vectorcode.config") -local cc_config = require("codecompanion.config").config -local cc_schema = require("codecompanion.schema") local notify_opts = vc_config.notify_opts local logger = vc_config.logger -local http_client = require("codecompanion.http") ---@class VectorCode.CodeCompanion.SummariseOpts ---@field enabled boolean? @@ -15,57 +12,6 @@ local http_client = require("codecompanion.http") ---@field system_prompt string ---@field timeout integer ---- Suspends the current coroutine until a given amount of time has passed ---- or an interrupt condition is met. ---- ---- @param ms number The maximum number of milliseconds to sleep. ---- @param interrupt_fn? fun(): boolean An optional function that is checked periodically. ---- If it returns true, the sleep is interrupted. ---- @return boolean interrupted True if the sleep was interrupted, false otherwise. -local function sleep(ms, interrupt_fn) - local co = coroutine.running() - if not co then - error("sleep() must be called from within a coroutine.", 2) - end - - local main_timer = vim.uv.new_timer() - local check_timer - - local function cleanup() - main_timer:stop() - main_timer:close() - if check_timer then - check_timer:stop() - check_timer:close() - end - end - - local function resume(interrupted) - if co and coroutine.status(co) == "suspended" then - vim.schedule(function() - coroutine.resume(co, interrupted) - end) - end - end - - main_timer:start(ms, 0, function() - cleanup() - resume(false) - end) - - if interrupt_fn then - check_timer = vim.uv.new_timer() - check_timer:start(0, 10, function() - if interrupt_fn() then - cleanup() - resume(true) - end - end) - end - - return coroutine.yield() -end - ---@type VectorCode.CodeCompanion.QueryToolOpts local default_query_options = { max_num = { chunk = -1, document = -1 }, @@ -229,14 +175,17 @@ return { end, ---@param result VectorCode.QueryResult - ---@param summarise_opts VectorCode.CodeCompanion.SummariseOpts|{}|nil - ---@param callback fun(summary:string)? ---@return string - process_result = function(result, summarise_opts, callback) + process_result = function(result) -- TODO: Unify the handling of summarised and non-summarised result local llm_message - - if result.chunk then + if result.summary then + llm_message = string.format( + "%s%s", + result.path, + result.summary + ) + elseif result.chunk then -- chunk mode llm_message = string.format("%s%s", result.path, result.chunk) @@ -256,49 +205,9 @@ return { result.document ) end - - summarise_opts = - ---@cast summarise_opts VectorCode.CodeCompanion.SummariseOpts - vim.tbl_deep_extend( - "force", - default_query_options.summarise, - summarise_opts or {} - ) - - if summarise_opts.enabled and type(callback) == "function" then - ---@type CodeCompanion.Adapter - local adapter = - vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) - - local payload = - make_oneshot_payload(adapter, summarise_opts.system_prompt, llm_message) - local settings = - vim.deepcopy(adapter:map_schema_to_params(cc_schema.get_default(adapter))) - settings.opts.stream = false - - ---@type CodeCompanion.Client - local client = http_client.new({ adapter = settings }) - client:request(payload, { - ---@param _adapter CodeCompanion.Adapter - callback = function(err, data, _adapter) - if data then - local res = _adapter.handlers.chat_output(_adapter, data) - if res and res.status == "success" then - local summary = vim.trim(res.output.content or "") - if summary ~= "" then - return callback(summary) - end - end - return callback(llm_message) - end - end, - }, { silent = true }) - end return llm_message end, - async_sleep = sleep, - ---@param use_lsp boolean ---@return VectorCode.JobRunner initialise_runner = function(use_lsp) diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index a98b0624..c0b6d71f 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -1,6 +1,9 @@ ---@module "codecompanion" local cc_common = require("vectorcode.integrations.codecompanion.common") +local cc_config = require("codecompanion.config").config +local cc_schema = require("codecompanion.schema") +local http_client = require("codecompanion.http") local vc_config = require("vectorcode.config") local check_cli_wrap = vc_config.check_cli_wrap local logger = vc_config.logger @@ -80,6 +83,79 @@ local filter_results = function(results, chat) return filtered_results end +--- Produce a callback function that triggers `cb` on the nth time this is called. +---@param n integer +---@param cb function +local function cb_on_count(n, cb) + local count = n + return function() + count = count - 1 + assert( + count >= 0, + string.format("This function should be called at most %d times.", n) + ) + if count == 0 then + cb() + end + end +end + +---@alias ChatMessage {role: string, content:string} + +---@param adapter CodeCompanion.Adapter +---@param system_prompt string +---@param user_messages string|string[] +---@return {messages: ChatMessage[], tools:table?} +local function make_oneshot_payload(adapter, system_prompt, user_messages) + if type(user_messages) == "string" then + user_messages = { user_messages } + end + local messages = + { { role = cc_config.constants.SYSTEM_ROLE, content = system_prompt } } + for _, m in pairs(user_messages) do + table.insert(messages, { role = cc_config.constants.USER_ROLE, content = m }) + end + return { messages = adapter:map_roles(messages) } +end + +---@param result VectorCode.QueryResult +---@param summarise_opts VectorCode.CodeCompanion.SummariseOpts +---@param callback fun(result: VectorCode.QueryResult) +local function generate_summary(result, summarise_opts, callback) + if summarise_opts.enabled and type(callback) == "function" then + ---@type CodeCompanion.Adapter + local adapter = + vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) + + local payload = make_oneshot_payload( + adapter, + summarise_opts.system_prompt, + cc_common.process_result(result) + ) + local settings = + vim.deepcopy(adapter:map_schema_to_params(cc_schema.get_default(adapter))) + settings.opts.stream = false + + ---@type CodeCompanion.Client + local client = http_client.new({ adapter = settings }) + client:request(payload, { + ---@param _adapter CodeCompanion.Adapter + callback = function(err, data, _adapter) + if data then + local res = _adapter.handlers.chat_output(_adapter, data) + if res and res.status == "success" then + local summary = vim.trim(res.output.content or "") + if summary ~= "" then + result.summary = summary + end + end + end + callback(result) + end, + }, { silent = true }) + end +end + ---@param opts VectorCode.CodeCompanion.QueryToolOpts? ---@return CodeCompanion.Agent.Tool return check_cli_wrap(function(opts) @@ -181,7 +257,12 @@ return check_cli_wrap(function(opts) job_runner.run_async(args, function(result, error) if vim.islist(result) and #result > 0 and result[1].path ~= nil then ---@cast result VectorCode.QueryResult[] - cb({ status = "success", data = result }) + local counted_cb = cb_on_count(#result, function() + cb({ status = "success", data = result }) + end) + for _, res in pairs(result) do + generate_summary(res, opts.summarise, counted_cb) + end else if type(error) == "table" then error = cc_common.flatten_table_to_string(error) From 24066796a8c2789987972e5a968fa5c6ea0afcec Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 14:47:54 +0800 Subject: [PATCH 04/11] feat(nvim): smash a list of results into one HUGE string and send only one request to the summariser. --- .../integrations/codecompanion/common.lua | 19 ++- .../integrations/codecompanion/query_tool.lua | 111 +++++++++--------- 2 files changed, 71 insertions(+), 59 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 7c90a2fe..35e6a1c2 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -21,11 +21,20 @@ local default_query_options = { summarise = { enabled = false, timeout = 5000, - system_prompt = [[You are an expert and experienced code analyzer and summarizer. Your primary task is to analyze provided source code and generate a comprehensive, well-structured Markdown summary. This summary will serve as a concise source of information for others to quickly understand how the code works and how to interact with it, without needing to delve into the full source code. Adhere strictly to the following formatting and content guidelines: + system_prompt = [[You are an expert and experienced code analyzer and summarizer. Your primary task is to analyze provided source code, which will be given as a list of XML objects, and generate a comprehensive, well-structured Markdown summary. This summary will serve as a concise source of information for others to quickly understand how the code works and how to interact with it, without needing to delve into the full source code. + +Input Format: +Each XML object represents either a full file or a chunk of a file, containing the following tags: +- `...`: The absolute file path of the source code. +- `...`: The full content of a source code file. This tag will not coexist with ``. +- `...`: A segment of source code from a file. This tag will not coexist with ``. +- `...` and `...`: These tags will be present only when a `` tag is used, indicating the starting and ending line numbers of the chunk within its respective file. + +Your goal is to process each of these XML objects. If multiple chunks belong to the same file, you must synthesize them to form a cohesive understanding of that file. Generate a single Markdown summary that combines insights from all provided objects. Markdown Structure: - Top-Level Header (#): The absolute file path of the source code. + Top-Level Header (#): The absolute or relative file path of the source code. Secondary Headers (##): For each top-level symbol (e.g., functions, classes, global variables) defined directly within the source code file that are importable or includable by other programs. @@ -49,7 +58,7 @@ Content for Each Section: General Guidelines: - Clarity and Conciseness: Summaries should be easy to understand, avoiding jargon where possible, and as brief as possible while retaining essential information. + Clarity and Conciseness: Summaries should be easy to understand, avoiding jargon where possible, and as brief as possible while retaining essential information. The full summary MUST NOT be longer than the original code input. When quoting a symbol in the code, include the line numbers where possible. Accuracy: Ensure the summary accurately reflects the code's functionality. @@ -63,7 +72,9 @@ General Guidelines: Handle Edge Cases/Dependencies: If a symbol relies heavily on external dependencies or handles specific edge cases, briefly mention these if they are significant to its overall function. - Information Source: There will be no extra information available to you. Provide the summary solely based on the provided file. + Information Source: There will be no extra information available to you. Provide the summary solely based on the provided XML objects. + + Omit meaningless results: For an xml object that contains no meaningful information, you're free to omit it, but please leave a sentence in the summary saying that you did this. ]], }, } diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index c0b6d71f..ed100dce 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -83,6 +83,12 @@ local filter_results = function(results, chat) return filtered_results end +--- The result of the query tool should be structured in the following table +---@class VectorCode.CodeCompanion.QueryToolResult +---@field raw_results VectorCode.QueryResult[] +---@field count integer +---@field summary string|nil + --- Produce a callback function that triggers `cb` on the nth time this is called. ---@param n integer ---@param cb function @@ -118,20 +124,24 @@ local function make_oneshot_payload(adapter, system_prompt, user_messages) return { messages = adapter:map_roles(messages) } end ----@param result VectorCode.QueryResult +---@param result VectorCode.QueryResult[] ---@param summarise_opts VectorCode.CodeCompanion.SummariseOpts ----@param callback fun(result: VectorCode.QueryResult) +---@param callback fun(summary:string) local function generate_summary(result, summarise_opts, callback) + assert(vim.islist(result), "result should be a list of VectorCode.QueryResult") if summarise_opts.enabled and type(callback) == "function" then ---@type CodeCompanion.Adapter local adapter = vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) - local payload = make_oneshot_payload( - adapter, - summarise_opts.system_prompt, - cc_common.process_result(result) - ) + local result_xml = table.concat(vim + .iter(result) + :map(function(res) + return cc_common.process_result(res) + end) + :totable()) + local payload = + make_oneshot_payload(adapter, summarise_opts.system_prompt, result_xml) local settings = vim.deepcopy(adapter:map_schema_to_params(cc_schema.get_default(adapter))) settings.opts.stream = false @@ -144,13 +154,13 @@ local function generate_summary(result, summarise_opts, callback) if data then local res = _adapter.handlers.chat_output(_adapter, data) if res and res.status == "success" then - local summary = vim.trim(res.output.content or "") - if summary ~= "" then - result.summary = summary + local gen_summary = vim.trim(res.output.content or "") + if gen_summary ~= "" then + return callback(gen_summary) end end end - callback(result) + return callback(result_xml) end, }, { silent = true }) end @@ -257,12 +267,20 @@ return check_cli_wrap(function(opts) job_runner.run_async(args, function(result, error) if vim.islist(result) and #result > 0 and result[1].path ~= nil then ---@cast result VectorCode.QueryResult[] - local counted_cb = cb_on_count(#result, function() - cb({ status = "success", data = result }) - end) - for _, res in pairs(result) do - generate_summary(res, opts.summarise, counted_cb) + local max_result = #result + if opts.max_num > 0 then + max_result = math.min(tonumber(opts.max_num) or 1, max_result) + end + while #result > max_result do + table.remove(result) end + generate_summary(result, opts.summarise, function(s) + cb({ + status = "success", + ---@type VectorCode.CodeCompanion.QueryToolResult + data = { raw_results = result, count = #result, summary = s }, + }) + end) else if type(error) == "table" then error = cc_common.flatten_table_to_string(error) @@ -361,50 +379,33 @@ If a query returned empty or repeated results, you should avoid using these quer end, ---@param agent CodeCompanion.Agent ---@param cmd QueryToolArgs - ---@param stdout VectorCode.QueryResult[][] + ---@param stdout VectorCode.CodeCompanion.QueryToolResult[] success = function(self, agent, cmd, stdout) stdout = stdout[1] logger.info( ("CodeCompanion tool with command %s finished."):format(vim.inspect(cmd)) ) - local user_message - local max_result = #stdout - if opts.max_num > 0 then - max_result = math.min(opts.max_num or 1, max_result) - end - if opts.no_duplicate then - stdout = filter_results(stdout, agent.chat) - end - for i, file in pairs(stdout) do - if i <= max_result then - if i == 1 then - user_message = string.format( - "**VectorCode Tool**: Retrieved %d %s(s)", - max_result, - mode - ) - if cmd.project_root then - user_message = user_message .. " from " .. cmd.project_root - end - user_message = user_message .. "\n" - else - user_message = "" - end - agent.chat:add_tool_output( - self, - cc_common.process_result(file), - user_message - ) - if not opts.chunk_mode then - -- only add to reference if running in full document mode - local ref = { - source = cc_common.tool_result_source, - id = file.path, - path = file.path, - opts = { visible = false }, - } - agent.chat.references:add(ref) - end + agent.chat:add_tool_output( + self, + stdout.summary + or table.concat(vim + .iter(stdout.raw_results or {}) + :map(function(res) + return cc_common.process_result(res) + end) + :totable()), + string.format("**VectorCode Tool**: Retrieved %d %s(s)", stdout.count, mode) + ) + for _, file in pairs(stdout) do + if not opts.chunk_mode then + -- skip referencing because there will be multiple chunks with the same path (id). + -- TODO: figure out a way to deduplicate. + agent.chat.references:add({ + source = cc_common.tool_result_source, + id = file.path, + path = file.path, + opts = { visible = false }, + }) end end end, From f0d1eb88eb59164bffe4ea40c68ac4a77f277e3f Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 14:50:07 +0800 Subject: [PATCH 05/11] fix(nvim): Move result processing before conditional summarisation --- .../integrations/codecompanion/query_tool.lua | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index ed100dce..e7b30a6c 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -129,17 +129,17 @@ end ---@param callback fun(summary:string) local function generate_summary(result, summarise_opts, callback) assert(vim.islist(result), "result should be a list of VectorCode.QueryResult") + local result_xml = table.concat(vim + .iter(result) + :map(function(res) + return cc_common.process_result(res) + end) + :totable()) if summarise_opts.enabled and type(callback) == "function" then ---@type CodeCompanion.Adapter local adapter = vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) - local result_xml = table.concat(vim - .iter(result) - :map(function(res) - return cc_common.process_result(res) - end) - :totable()) local payload = make_oneshot_payload(adapter, summarise_opts.system_prompt, result_xml) local settings = @@ -163,6 +163,8 @@ local function generate_summary(result, summarise_opts, callback) return callback(result_xml) end, }, { silent = true }) + else + callback(result_xml) end end From 7728fbcd67b5a6ff13cd25d268a335920731e578 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 15:04:15 +0800 Subject: [PATCH 06/11] refactor(nvim): Remove summary field from VectorCode.Result type --- .../integrations/codecompanion/common.lua | 8 +------- .../integrations/codecompanion/query_tool.lua | 19 +------------------ 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 35e6a1c2..2b7e63e2 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -190,13 +190,7 @@ return { process_result = function(result) -- TODO: Unify the handling of summarised and non-summarised result local llm_message - if result.summary then - llm_message = string.format( - "%s%s", - result.path, - result.summary - ) - elseif result.chunk then + if result.chunk then -- chunk mode llm_message = string.format("%s%s", result.path, result.chunk) diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index e7b30a6c..c117eb53 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -89,23 +89,6 @@ end ---@field count integer ---@field summary string|nil ---- Produce a callback function that triggers `cb` on the nth time this is called. ----@param n integer ----@param cb function -local function cb_on_count(n, cb) - local count = n - return function() - count = count - 1 - assert( - count >= 0, - string.format("This function should be called at most %d times.", n) - ) - if count == 0 then - cb() - end - end -end - ---@alias ChatMessage {role: string, content:string} ---@param adapter CodeCompanion.Adapter @@ -150,7 +133,7 @@ local function generate_summary(result, summarise_opts, callback) local client = http_client.new({ adapter = settings }) client:request(payload, { ---@param _adapter CodeCompanion.Adapter - callback = function(err, data, _adapter) + callback = function(_, data, _adapter) if data then local res = _adapter.handlers.chat_output(_adapter, data) if res and res.status == "success" then From 65973500f05a41e03c4c7a21460471c843ef466d Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 15:23:38 +0800 Subject: [PATCH 07/11] feat(nvim): allow dynamically switching on/off the summarisation --- lua/vectorcode/integrations/codecompanion/common.lua | 4 +--- .../integrations/codecompanion/query_tool.lua | 12 +++++++++++- lua/vectorcode/types.lua | 6 ++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 2b7e63e2..48c6b55b 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -6,11 +6,10 @@ local notify_opts = vc_config.notify_opts local logger = vc_config.logger ---@class VectorCode.CodeCompanion.SummariseOpts ----@field enabled boolean? +---@field enabled boolean|fun(chat: CodeCompanion.Chat,results: VectorCode.QueryResult[]):boolean|nil ---@field adapter string|CodeCompanion.Adapter|nil ---@field threshold integer? ---@field system_prompt string ----@field timeout integer ---@type VectorCode.CodeCompanion.QueryToolOpts local default_query_options = { @@ -20,7 +19,6 @@ local default_query_options = { chunk_mode = false, summarise = { enabled = false, - timeout = 5000, system_prompt = [[You are an expert and experienced code analyzer and summarizer. Your primary task is to analyze provided source code, which will be given as a list of XML objects, and generate a comprehensive, well-structured Markdown summary. This summary will serve as a concise source of information for others to quickly understand how the code works and how to interact with it, without needing to delve into the full source code. Input Format: diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index c117eb53..bd5f36ef 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -89,6 +89,12 @@ end ---@field count integer ---@field summary string|nil +--- The result of the query tool should be structured in the following table +---@class VectorCode.CodeCompanion.QueryToolResult +---@field raw_results VectorCode.QueryResult[] +---@field count integer +---@field summary string|nil + ---@alias ChatMessage {role: string, content:string} ---@param adapter CodeCompanion.Adapter @@ -259,7 +265,11 @@ return check_cli_wrap(function(opts) while #result > max_result do table.remove(result) end - generate_summary(result, opts.summarise, function(s) + local summary_opts = vim.deepcopy(opts.summarise) + if type(summary_opts.enabled) == "function" then + summary_opts.enabled = summary_opts.enabled(agent.chat, result) + end + generate_summary(result, summary_opts, function(s) cb({ status = "success", ---@type VectorCode.CodeCompanion.QueryToolResult diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index 230ab846..a4f01d63 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -107,3 +107,9 @@ ---@field collapse boolean --- Other tools that you'd like to include in `vectorcode_toolbox` ---@field extras string[] + +--- The result of the query tool should be structured in the following table +---@class VectorCode.CodeCompanion.QueryToolResult +---@field raw_results VectorCode.QueryResult[] +---@field count integer +---@field summary string|nil From 59a9ddde1e2614a6cd6840465a9ccd69c0f79bb8 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 15:34:31 +0800 Subject: [PATCH 08/11] feat(nvim): Augment result summary with user query context --- .../integrations/codecompanion/common.lua | 6 +++-- .../integrations/codecompanion/query_tool.lua | 22 ++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 48c6b55b..b4435b3d 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -6,10 +6,11 @@ local notify_opts = vc_config.notify_opts local logger = vc_config.logger ---@class VectorCode.CodeCompanion.SummariseOpts ----@field enabled boolean|fun(chat: CodeCompanion.Chat,results: VectorCode.QueryResult[]):boolean|nil +---@field enabled boolean|(fun(chat: CodeCompanion.Chat,results: VectorCode.QueryResult[]):boolean)|nil ---@field adapter string|CodeCompanion.Adapter|nil ----@field threshold integer? ---@field system_prompt string +---When set to true, include the query messages so that the LLM may make task-related summarisations. +---@field query_augmented boolean ---@type VectorCode.CodeCompanion.QueryToolOpts local default_query_options = { @@ -19,6 +20,7 @@ local default_query_options = { chunk_mode = false, summarise = { enabled = false, + query_augmented = true, system_prompt = [[You are an expert and experienced code analyzer and summarizer. Your primary task is to analyze provided source code, which will be given as a list of XML objects, and generate a comprehensive, well-structured Markdown summary. This summary will serve as a concise source of information for others to quickly understand how the code works and how to interact with it, without needing to delve into the full source code. Input Format: diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index bd5f36ef..5e8bbedb 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -114,9 +114,10 @@ local function make_oneshot_payload(adapter, system_prompt, user_messages) end ---@param result VectorCode.QueryResult[] +---@param cmd QueryToolArgs ---@param summarise_opts VectorCode.CodeCompanion.SummariseOpts ---@param callback fun(summary:string) -local function generate_summary(result, summarise_opts, callback) +local function generate_summary(result, summarise_opts, cmd, callback) assert(vim.islist(result), "result should be a list of VectorCode.QueryResult") local result_xml = table.concat(vim .iter(result) @@ -129,8 +130,19 @@ local function generate_summary(result, summarise_opts, callback) local adapter = vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) - local payload = - make_oneshot_payload(adapter, summarise_opts.system_prompt, result_xml) + local system_prompt = summarise_opts.system_prompt + if summarise_opts.query_augmented then + system_prompt = string.format( + [[%s + +The code provided to you is the result of a search in a codebase from the following query: %s. +When summarising the code, pay extra attention on information related to the queries. + ]], + system_prompt, + table.concat(cmd.query, ", ") + ) + end + local payload = make_oneshot_payload(adapter, system_prompt, result_xml) local settings = vim.deepcopy(adapter:map_schema_to_params(cc_schema.get_default(adapter))) settings.opts.stream = false @@ -265,11 +277,11 @@ return check_cli_wrap(function(opts) while #result > max_result do table.remove(result) end - local summary_opts = vim.deepcopy(opts.summarise) + local summary_opts = vim.deepcopy(opts.summarise) or {} if type(summary_opts.enabled) == "function" then summary_opts.enabled = summary_opts.enabled(agent.chat, result) end - generate_summary(result, summary_opts, function(s) + generate_summary(result, summary_opts, action, function(s) cb({ status = "success", ---@type VectorCode.CodeCompanion.QueryToolResult From b517c761d791782b1a38432abaeacff75beb062d Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 17:31:09 +0800 Subject: [PATCH 09/11] fix(nvim): merge conflicts --- .../integrations/codecompanion/common.lua | 20 +------------------ .../integrations/codecompanion/query_tool.lua | 15 +++----------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index b4435b3d..0a8bba3f 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -87,24 +87,6 @@ local default_vectorise_options = {} local TOOL_RESULT_SOURCE = "VectorCodeToolResult" ----@alias ChatMessage {role: string, content:string} - ----@param adapter CodeCompanion.Adapter ----@param system_prompt string ----@param user_messages string|string[] ----@return {messages: ChatMessage[], tools:table?} -local function make_oneshot_payload(adapter, system_prompt, user_messages) - if type(user_messages) == "string" then - user_messages = { user_messages } - end - local messages = - { { role = cc_config.constants.SYSTEM_ROLE, content = system_prompt } } - for _, m in pairs(user_messages) do - table.insert(messages, { role = cc_config.constants.USER_ROLE, content = m }) - end - return { messages = adapter:map_roles(messages) } -end - return { tool_result_source = TOOL_RESULT_SOURCE, @@ -166,7 +148,7 @@ return { ) end if type(opts.max_num) == "table" then - if opts._ then + if opts.chunk_mode then opts.max_num = opts.max_num.chunk else opts.max_num = opts.max_num.document diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index 5e8bbedb..1099e2dd 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -83,18 +83,6 @@ local filter_results = function(results, chat) return filtered_results end ---- The result of the query tool should be structured in the following table ----@class VectorCode.CodeCompanion.QueryToolResult ----@field raw_results VectorCode.QueryResult[] ----@field count integer ----@field summary string|nil - ---- The result of the query tool should be structured in the following table ----@class VectorCode.CodeCompanion.QueryToolResult ----@field raw_results VectorCode.QueryResult[] ----@field count integer ----@field summary string|nil - ---@alias ChatMessage {role: string, content:string} ---@param adapter CodeCompanion.Adapter @@ -270,6 +258,9 @@ return check_cli_wrap(function(opts) job_runner.run_async(args, function(result, error) if vim.islist(result) and #result > 0 and result[1].path ~= nil then ---@cast result VectorCode.QueryResult[] + if opts.no_duplicate then + result = filter_results(result, agent.chat) + end local max_result = #result if opts.max_num > 0 then max_result = math.min(tonumber(opts.max_num) or 1, max_result) From 407068b6bdb078ea4866fd74d56110c3d92b622b Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 23 Jun 2025 13:46:11 +0800 Subject: [PATCH 10/11] feat(nvim): allow customising the system prompt as a function --- .../integrations/codecompanion/common.lua | 9 ++------- .../integrations/codecompanion/query_tool.lua | 11 +++++++++++ lua/vectorcode/types.lua | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 0a8bba3f..ce9a6258 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -5,13 +5,6 @@ local vc_config = require("vectorcode.config") local notify_opts = vc_config.notify_opts local logger = vc_config.logger ----@class VectorCode.CodeCompanion.SummariseOpts ----@field enabled boolean|(fun(chat: CodeCompanion.Chat,results: VectorCode.QueryResult[]):boolean)|nil ----@field adapter string|CodeCompanion.Adapter|nil ----@field system_prompt string ----When set to true, include the query messages so that the LLM may make task-related summarisations. ----@field query_augmented boolean - ---@type VectorCode.CodeCompanion.QueryToolOpts local default_query_options = { max_num = { chunk = -1, document = -1 }, @@ -75,6 +68,8 @@ General Guidelines: Information Source: There will be no extra information available to you. Provide the summary solely based on the provided XML objects. Omit meaningless results: For an xml object that contains no meaningful information, you're free to omit it, but please leave a sentence in the summary saying that you did this. + + No extra reply: Your reply should solely consist of the summary. Do not say anything else. ]], }, } diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index 1099e2dd..04a86600 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -113,12 +113,23 @@ local function generate_summary(result, summarise_opts, cmd, callback) return cc_common.process_result(res) end) :totable()) + if summarise_opts.enabled and type(callback) == "function" then ---@type CodeCompanion.Adapter local adapter = vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter)) local system_prompt = summarise_opts.system_prompt + if type(system_prompt) == "function" then + system_prompt = system_prompt( + cc_common.get_query_tool_opts().summarise.system_prompt --[[@as string]] + ) + end + + assert( + type(system_prompt) == "string", + "`system_prompt` should have been converted to a string." + ) if summarise_opts.query_augmented then system_prompt = string.format( [[%s diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index a4f01d63..01d62264 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -113,3 +113,22 @@ ---@field raw_results VectorCode.QueryResult[] ---@field count integer ---@field summary string|nil + +---@class VectorCode.CodeCompanion.SummariseOpts +---A boolean flag that controls whether summarisation should be enabled. +---This can also be a function that returns a boolean. +---In this case, you can use this option to dynamically control whether summarisation is enabled during a chat. +--- +---This function recieves 2 parameters: +--- - `CodeCompanion.Chat`: the chat object; +--- - `VectorCode.QueryResult[]`: a list of query results. +---@field enabled boolean|(fun(chat: CodeCompanion.Chat, results: VectorCode.QueryResult[]):boolean)|nil +---The adapter used for the summarisation task. When set to `nil`, the adapter from the current chat will be used. +---@field adapter string|CodeCompanion.Adapter|nil +---The system prompt sent to the summariser model. +---When set to a function, it'll recieve the default system prompt as the only parameter, +---and should return the new (full) system prompt. This allows you to customise or rewrite the system prompt. +---@field system_prompt string|(fun(original_prompt: string): string) +---When set to true, include the query messages so that the LLM may make task-related summarisations. +---This happens __after__ the `system_prompt` callback processing +---@field query_augmented boolean From 9ff39fd9b4a286aa2b23553b0ff03f1c72ffc67d Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 23 Jun 2025 14:47:04 +0800 Subject: [PATCH 11/11] feat(nvim): add system prompt to merge chunks from the same file in result summarisation --- lua/vectorcode/integrations/codecompanion/common.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index ce9a6258..64f265cd 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -70,6 +70,8 @@ General Guidelines: Omit meaningless results: For an xml object that contains no meaningful information, you're free to omit it, but please leave a sentence in the summary saying that you did this. No extra reply: Your reply should solely consist of the summary. Do not say anything else. + + Merge chunks from the same file: When there are chunks that belong to the same file, merge their content so that they're grouped under the same top level header. ]], }, }