diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua
index 987bfcb5..64f265cd 100644
--- a/lua/vectorcode/integrations/codecompanion/common.lua
+++ b/lua/vectorcode/integrations/codecompanion/common.lua
@@ -11,6 +11,69 @@ local default_query_options = {
default_num = { chunk = 50, document = 10 },
no_duplicate = true,
chunk_mode = false,
+ summarise = {
+ enabled = false,
+ query_augmented = true,
+ system_prompt = [[You are an expert and experienced code analyzer and summarizer. Your primary task is to analyze provided source code, which will be given as a list of XML objects, and generate a comprehensive, well-structured Markdown summary. This summary will serve as a concise source of information for others to quickly understand how the code works and how to interact with it, without needing to delve into the full source code.
+
+Input Format:
+Each XML object represents either a full file or a chunk of a file, containing the following tags:
+- `...`: The absolute file path of the source code.
+- `...`: The full content of a source code file. This tag will not coexist with ``.
+- `...`: A segment of source code from a file. This tag will not coexist with ``.
+- `...` and `...`: These tags will be present only when a `` tag is used, indicating the starting and ending line numbers of the chunk within its respective file.
+
+Your goal is to process each of these XML objects. If multiple chunks belong to the same file, you must synthesize them to form a cohesive understanding of that file. Generate a single Markdown summary that combines insights from all provided objects.
+
+Markdown Structure:
+
+ Top-Level Header (#): The absolute or relative file path of the source code.
+
+ Secondary Headers (##): For each top-level symbol (e.g., functions, classes, global variables) defined directly within the source code file that are importable or includable by other programs.
+
+ Tertiary Headers (###): For symbols nested one level deep within a secondary header's symbol (e.g., methods within a class, inner functions).
+
+ Quaternary Headers (####): For symbols nested two levels deep (e.g., a function defined within a method of a class).
+
+ Continue this pattern, incrementing the header level for each deeper level of nesting.
+
+Content for Each Section:
+
+ Descriptive Summary: Each header section (from secondary headers downwards) must contain a concise and informative summary of the symbol defined by that header.
+
+ For Functions/Methods: Explain their purpose, parameters (including types), return values (including types), high-level implementation details, and any significant side effects or core logic. For example, if summarizing a sorting function, include the sorting algorithm used. If summarizing a function that makes an HTTP request, mention the network library employed.
+
+ For Classes: Describe the class's role, its main responsibilities, and key characteristics.
+
+ For Variables (global or within scope): State their purpose, type (if discernible), and initial value or common usage.
+
+ For Modules/Files (under the top-level header): Provide an overall description of the file's purpose, its main components, and its role within the larger project (if context is available).
+
+General Guidelines:
+
+ Clarity and Conciseness: Summaries should be easy to understand, avoiding jargon where possible, and as brief as possible while retaining essential information. The full summary MUST NOT be longer than the original code input. When quoting a symbol in the code, include the line numbers where possible.
+
+ Accuracy: Ensure the summary accurately reflects the code's functionality.
+
+ Focus on Public Interface/Behavior: Prioritize describing what a function/class does and how it's used. Only include details about symbols (variables, functions, classes) that are importable/includable by other programs. DO NOT include local variables and functions that are not accessible by other functions outside their immediate scope.
+
+ No Code Snippets: Do not include any actual code snippets in the summary. Focus solely on descriptive text. If you need to refer to a specific element for context (e.g., in an error description), describe it and provide line numbers for reference from the source code.
+
+ Syntax/Semantic Errors: If the code contains syntax or semantic errors, describe them clearly within the summary, indicating the nature of the error.
+
+ Language Agnostic: Adapt the summary to the specific programming language of the provided source code (e.g., Python, JavaScript, Java, C++, etc.).
+
+ Handle Edge Cases/Dependencies: If a symbol relies heavily on external dependencies or handles specific edge cases, briefly mention these if they are significant to its overall function.
+
+ Information Source: There will be no extra information available to you. Provide the summary solely based on the provided XML objects.
+
+ Omit meaningless results: For an xml object that contains no meaningful information, you're free to omit it, but please leave a sentence in the summary saying that you did this.
+
+ No extra reply: Your reply should solely consist of the summary. Do not say anything else.
+
+ Merge chunks from the same file: When there are chunks that belong to the same file, merge their content so that they're grouped under the same top level header.
+]],
+ },
}
---@type VectorCode.CodeCompanion.LsToolOpts
@@ -23,6 +86,7 @@ local TOOL_RESULT_SOURCE = "VectorCodeToolResult"
return {
tool_result_source = TOOL_RESULT_SOURCE,
+
---@param t table|string
---@return string
flatten_table_to_string = function(t)
@@ -81,7 +145,7 @@ return {
)
end
if type(opts.max_num) == "table" then
- if opts._ then
+ if opts.chunk_mode then
opts.max_num = opts.max_num.chunk
else
opts.max_num = opts.max_num.document
@@ -103,6 +167,7 @@ return {
---@param result VectorCode.QueryResult
---@return string
process_result = function(result)
+ -- TODO: Unify the handling of summarised and non-summarised result
local llm_message
if result.chunk then
-- chunk mode
diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua
index a98b0624..04a86600 100644
--- a/lua/vectorcode/integrations/codecompanion/query_tool.lua
+++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua
@@ -1,6 +1,9 @@
---@module "codecompanion"
local cc_common = require("vectorcode.integrations.codecompanion.common")
+local cc_config = require("codecompanion.config").config
+local cc_schema = require("codecompanion.schema")
+local http_client = require("codecompanion.http")
local vc_config = require("vectorcode.config")
local check_cli_wrap = vc_config.check_cli_wrap
local logger = vc_config.logger
@@ -80,6 +83,91 @@ local filter_results = function(results, chat)
return filtered_results
end
+---@alias ChatMessage {role: string, content:string}
+
+---@param adapter CodeCompanion.Adapter
+---@param system_prompt string
+---@param user_messages string|string[]
+---@return {messages: ChatMessage[], tools:table?}
+local function make_oneshot_payload(adapter, system_prompt, user_messages)
+ if type(user_messages) == "string" then
+ user_messages = { user_messages }
+ end
+ local messages =
+ { { role = cc_config.constants.SYSTEM_ROLE, content = system_prompt } }
+ for _, m in pairs(user_messages) do
+ table.insert(messages, { role = cc_config.constants.USER_ROLE, content = m })
+ end
+ return { messages = adapter:map_roles(messages) }
+end
+
+---@param result VectorCode.QueryResult[]
+---@param cmd QueryToolArgs
+---@param summarise_opts VectorCode.CodeCompanion.SummariseOpts
+---@param callback fun(summary:string)
+local function generate_summary(result, summarise_opts, cmd, callback)
+ assert(vim.islist(result), "result should be a list of VectorCode.QueryResult")
+ local result_xml = table.concat(vim
+ .iter(result)
+ :map(function(res)
+ return cc_common.process_result(res)
+ end)
+ :totable())
+
+ if summarise_opts.enabled and type(callback) == "function" then
+ ---@type CodeCompanion.Adapter
+ local adapter =
+ vim.deepcopy(require("codecompanion.adapters").resolve(summarise_opts.adapter))
+
+ local system_prompt = summarise_opts.system_prompt
+ if type(system_prompt) == "function" then
+ system_prompt = system_prompt(
+ cc_common.get_query_tool_opts().summarise.system_prompt --[[@as string]]
+ )
+ end
+
+ assert(
+ type(system_prompt) == "string",
+ "`system_prompt` should have been converted to a string."
+ )
+ if summarise_opts.query_augmented then
+ system_prompt = string.format(
+ [[%s
+
+The code provided to you is the result of a search in a codebase from the following query: %s.
+When summarising the code, pay extra attention on information related to the queries.
+ ]],
+ system_prompt,
+ table.concat(cmd.query, ", ")
+ )
+ end
+ local payload = make_oneshot_payload(adapter, system_prompt, result_xml)
+ local settings =
+ vim.deepcopy(adapter:map_schema_to_params(cc_schema.get_default(adapter)))
+ settings.opts.stream = false
+
+ ---@type CodeCompanion.Client
+ local client = http_client.new({ adapter = settings })
+ client:request(payload, {
+ ---@param _adapter CodeCompanion.Adapter
+ callback = function(_, data, _adapter)
+ if data then
+ local res = _adapter.handlers.chat_output(_adapter, data)
+ if res and res.status == "success" then
+ local gen_summary = vim.trim(res.output.content or "")
+ if gen_summary ~= "" then
+ return callback(gen_summary)
+ end
+ end
+ end
+ return callback(result_xml)
+ end,
+ }, { silent = true })
+ else
+ callback(result_xml)
+ end
+end
+
---@param opts VectorCode.CodeCompanion.QueryToolOpts?
---@return CodeCompanion.Agent.Tool
return check_cli_wrap(function(opts)
@@ -181,7 +269,27 @@ return check_cli_wrap(function(opts)
job_runner.run_async(args, function(result, error)
if vim.islist(result) and #result > 0 and result[1].path ~= nil then ---@cast result VectorCode.QueryResult[]
- cb({ status = "success", data = result })
+ if opts.no_duplicate then
+ result = filter_results(result, agent.chat)
+ end
+ local max_result = #result
+ if opts.max_num > 0 then
+ max_result = math.min(tonumber(opts.max_num) or 1, max_result)
+ end
+ while #result > max_result do
+ table.remove(result)
+ end
+ local summary_opts = vim.deepcopy(opts.summarise) or {}
+ if type(summary_opts.enabled) == "function" then
+ summary_opts.enabled = summary_opts.enabled(agent.chat, result)
+ end
+ generate_summary(result, summary_opts, action, function(s)
+ cb({
+ status = "success",
+ ---@type VectorCode.CodeCompanion.QueryToolResult
+ data = { raw_results = result, count = #result, summary = s },
+ })
+ end)
else
if type(error) == "table" then
error = cc_common.flatten_table_to_string(error)
@@ -280,50 +388,33 @@ If a query returned empty or repeated results, you should avoid using these quer
end,
---@param agent CodeCompanion.Agent
---@param cmd QueryToolArgs
- ---@param stdout VectorCode.QueryResult[][]
+ ---@param stdout VectorCode.CodeCompanion.QueryToolResult[]
success = function(self, agent, cmd, stdout)
stdout = stdout[1]
logger.info(
("CodeCompanion tool with command %s finished."):format(vim.inspect(cmd))
)
- local user_message
- local max_result = #stdout
- if opts.max_num > 0 then
- max_result = math.min(opts.max_num or 1, max_result)
- end
- if opts.no_duplicate then
- stdout = filter_results(stdout, agent.chat)
- end
- for i, file in pairs(stdout) do
- if i <= max_result then
- if i == 1 then
- user_message = string.format(
- "**VectorCode Tool**: Retrieved %d %s(s)",
- max_result,
- mode
- )
- if cmd.project_root then
- user_message = user_message .. " from " .. cmd.project_root
- end
- user_message = user_message .. "\n"
- else
- user_message = ""
- end
- agent.chat:add_tool_output(
- self,
- cc_common.process_result(file),
- user_message
- )
- if not opts.chunk_mode then
- -- only add to reference if running in full document mode
- local ref = {
- source = cc_common.tool_result_source,
- id = file.path,
- path = file.path,
- opts = { visible = false },
- }
- agent.chat.references:add(ref)
- end
+ agent.chat:add_tool_output(
+ self,
+ stdout.summary
+ or table.concat(vim
+ .iter(stdout.raw_results or {})
+ :map(function(res)
+ return cc_common.process_result(res)
+ end)
+ :totable()),
+ string.format("**VectorCode Tool**: Retrieved %d %s(s)", stdout.count, mode)
+ )
+ for _, file in pairs(stdout) do
+ if not opts.chunk_mode then
+ -- skip referencing because there will be multiple chunks with the same path (id).
+ -- TODO: figure out a way to deduplicate.
+ agent.chat.references:add({
+ source = cc_common.tool_result_source,
+ id = file.path,
+ path = file.path,
+ opts = { visible = false },
+ })
end
end
end,
diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua
index 779493bf..01d62264 100644
--- a/lua/vectorcode/types.lua
+++ b/lua/vectorcode/types.lua
@@ -1,3 +1,5 @@
+---@module "codecompanion"
+
---Type definition of the retrieval result.
---@class VectorCode.QueryResult
---@field path string Path to the file
@@ -6,6 +8,7 @@
---@field start_line integer?
---@field end_line integer?
---@field chunk_id string?
+---@field summary string?
---@class VectorCode.LsResult
---@field project-root string
@@ -93,6 +96,7 @@
--- Whether to send chunks instead of full files to the LLM. Default: `false`
--- > Make sure you adjust `max_num` and `default_num` accordingly.
---@field chunk_mode boolean?
+---@field summarise VectorCode.CodeCompanion.SummariseOpts?
---@class VectorCode.CodeCompanion.VectoriseToolOpts: VectorCode.CodeCompanion.ToolOpts
@@ -103,3 +107,28 @@
---@field collapse boolean
--- Other tools that you'd like to include in `vectorcode_toolbox`
---@field extras string[]
+
+--- The result of the query tool should be structured in the following table
+---@class VectorCode.CodeCompanion.QueryToolResult
+---@field raw_results VectorCode.QueryResult[]
+---@field count integer
+---@field summary string|nil
+
+---@class VectorCode.CodeCompanion.SummariseOpts
+---A boolean flag that controls whether summarisation should be enabled.
+---This can also be a function that returns a boolean.
+---In this case, you can use this option to dynamically control whether summarisation is enabled during a chat.
+---
+---This function recieves 2 parameters:
+--- - `CodeCompanion.Chat`: the chat object;
+--- - `VectorCode.QueryResult[]`: a list of query results.
+---@field enabled boolean|(fun(chat: CodeCompanion.Chat, results: VectorCode.QueryResult[]):boolean)|nil
+---The adapter used for the summarisation task. When set to `nil`, the adapter from the current chat will be used.
+---@field adapter string|CodeCompanion.Adapter|nil
+---The system prompt sent to the summariser model.
+---When set to a function, it'll recieve the default system prompt as the only parameter,
+---and should return the new (full) system prompt. This allows you to customise or rewrite the system prompt.
+---@field system_prompt string|(fun(original_prompt: string): string)
+---When set to true, include the query messages so that the LLM may make task-related summarisations.
+---This happens __after__ the `system_prompt` callback processing
+---@field query_augmented boolean