From bfd86b9add1708281dbff9e3a4f252da4726ccba Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Fri, 13 Jun 2025 12:10:31 +0800 Subject: [PATCH 1/6] feat(cli): Return `chunk_id` in structured query result output --- docs/cli.md | 6 +++++- src/vectorcode/subcommands/query/__init__.py | 5 ++++- tests/subcommands/query/test_query.py | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index 393ee0a3..8d619b71 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -595,16 +595,20 @@ If you used `--include chunk path` parameters, the array will look like this: "chunk": "foo", "start_line": 1, "end_line": 1, + "chunk_id": "chunk_id_1" }, { "path": "path_to_another_file.py", "chunk": "bar", "start_line": 1, "end_line": 1, + "chunk_id": "chunk_id_2" } ] ``` -Keep in mind that both `start_line` and `end_line` are inclusive. +Keep in mind that both `start_line` and `end_line` are inclusive. The `chunk_id` +is a random string that can be used as a unique identifier to distinguish +between chunks. These are the same IDs used in the database. #### `vectorcode vectorise` The output is in JSON format. It contains a dictionary with the following fields: diff --git a/src/vectorcode/subcommands/query/__init__.py b/src/vectorcode/subcommands/query/__init__.py index 4f4b507a..51c3a550 100644 --- a/src/vectorcode/subcommands/query/__init__.py +++ b/src/vectorcode/subcommands/query/__init__.py @@ -114,7 +114,10 @@ async def build_query_results( assert chunk_texts is not None, ( "QueryResult does not contain `documents`!" ) - full_result: dict[str, str | int] = {"chunk": str(chunk_texts[0])} + full_result: dict[str, str | int] = { + "chunk": str(chunk_texts[0]), + "chunk_id": identifier, + } if meta[0].get("start") is not None and meta[0].get("end") is not None: path = str(meta[0].get("path")) with open(path) as fin: diff --git a/tests/subcommands/query/test_query.py b/tests/subcommands/query/test_query.py index a6b17689..4a54de9d 100644 --- a/tests/subcommands/query/test_query.py +++ b/tests/subcommands/query/test_query.py @@ -173,6 +173,7 @@ async def test_build_query_results_chunk_mode_success(mock_collection, mock_conf "chunk": expected_chunk_content, "start_line": start_line, "end_line": end_line, + "chunk_id": identifier, } assert results[0] == expected_full_result From 82f77f76d7733140277882cb804a00805a5cdeec Mon Sep 17 00:00:00 2001 From: Davidyz Date: Fri, 13 Jun 2025 04:11:26 +0000 Subject: [PATCH 2/6] Auto generate docs --- doc/VectorCode-cli.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index 25589b47..9d675453 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -660,17 +660,21 @@ If you used `--include chunk path` parameters, the array will look like this: "chunk": "foo", "start_line": 1, "end_line": 1, + "chunk_id": "chunk_id_1" }, { "path": "path_to_another_file.py", "chunk": "bar", "start_line": 1, "end_line": 1, + "chunk_id": "chunk_id_2" } ] < -Keep in mind that both `start_line` and `end_line` are inclusive. +Keep in mind that both `start_line` and `end_line` are inclusive. The +`chunk_id` is a random string that can be used as a unique identifier to +distinguish between chunks. These are the same IDs used in the database. VECTORCODE VECTORISE From d20b5847a4c5c6f54af217d395470a6f82eab569 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Fri, 13 Jun 2025 20:44:29 +0800 Subject: [PATCH 3/6] feat(nvim): Deduplicate tool results using chunk_id (wip) --- .../integrations/codecompanion/common.lua | 56 ++++++++++++++++++- .../integrations/codecompanion/query_tool.lua | 10 ++-- lua/vectorcode/types.lua | 1 + 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 4bcc7f1b..0e2fd6b1 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -1,3 +1,5 @@ +---@module "codecompanion" + local job_runner local vc_config = require("vectorcode.config") local notify_opts = vc_config.notify_opts @@ -17,8 +19,10 @@ local default_ls_options = {} ---@type VectorCode.CodeCompanion.VectoriseToolOpts local default_vectorise_options = {} +local TOOL_RESULT_SOURCE = "VectorCodeToolResult" + return { - tool_result_source = "VectorCodeToolResult", + tool_result_source = TOOL_RESULT_SOURCE, ---@param t table|string ---@return string flatten_table_to_string = function(t) @@ -122,6 +126,7 @@ return { end return llm_message end, + ---@param use_lsp boolean ---@return VectorCode.JobRunner initialise_runner = function(use_lsp) @@ -145,4 +150,53 @@ return { end return job_runner end, + + ---@param results VectorCode.QueryResult[] + ---@param chat CodeCompanion.Chat + ---@return VectorCode.QueryResult[] + filter_results = function(results, chat) + local existing_refs = chat.refs + if existing_refs == nil then + return results + end + existing_refs = vim + .iter(existing_refs) + :filter( + ---@param ref CodeCompanion.Chat.Ref + function(ref) + return ref.source == TOOL_RESULT_SOURCE or ref.path or ref.bufnr + end + ) + :map( + ---@param ref CodeCompanion.Chat.Ref + function(ref) + if ref.source == TOOL_RESULT_SOURCE then + return ref.id + elseif ref.path then + return ref.path + elseif ref.bufnr then + return vim.api.nvim_buf_get_name(ref.bufnr) + end + end + ) + :totable() + + return vim + .iter(results) + :filter( + ---@param res VectorCode.QueryResult + function(res) + -- return true if res is not in refs + if res.chunk then + if res.chunk_id == nil then + return true + end + return not vim.tbl_contains(existing_refs, res.chunk_id) + else + return not vim.tbl_contains(existing_refs, res.path) + end + end + ) + :totable() + end, } diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index 03ac3ff8..5ac9a375 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -219,6 +219,7 @@ For example, you should include `parameter`, `arguments` and `return value` for if opts.max_num > 0 then max_result = math.min(opts.max_num or 1, max_result) end + stdout = cc_common.filter_results(stdout, agent.chat) for i, file in pairs(stdout) do if i <= max_result then if i == 1 then @@ -239,15 +240,16 @@ For example, you should include `parameter`, `arguments` and `return value` for cc_common.process_result(file), user_message ) - if not opts.chunk_mode then + if (not opts.chunk_mode) or file.chunk_id ~= nil then -- skip referencing because there will be multiple chunks with the same path (id). -- TODO: figure out a way to deduplicate. - agent.chat.references:add({ + local ref = { source = cc_common.tool_result_source, - id = file.path, + id = file.chunk_id or file.path, path = file.path, opts = { visible = false }, - }) + } + agent.chat.references:add(ref) end end end diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index 7421a343..779493bf 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -5,6 +5,7 @@ ---@field chunk string? ---@field start_line integer? ---@field end_line integer? +---@field chunk_id string? ---@class VectorCode.LsResult ---@field project-root string From 7c0dbcdedbd907b66ed45f3da9dee339b3508136 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sat, 21 Jun 2025 12:32:55 +0800 Subject: [PATCH 4/6] feat(nvim): Deduplicate tool results using in-house result tracker --- .../integrations/codecompanion/common.lua | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 0e2fd6b1..9a575f44 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -21,6 +21,11 @@ local default_vectorise_options = {} local TOOL_RESULT_SOURCE = "VectorCodeToolResult" +---@alias chat_id integer +---@alias result_id string +---@type +local result_tracker = {} + return { tool_result_source = TOOL_RESULT_SOURCE, ---@param t table|string @@ -181,22 +186,40 @@ return { ) :totable() - return vim + ---@type VectorCode.QueryResult[] + local filtered_results = vim .iter(results) :filter( ---@param res VectorCode.QueryResult function(res) - -- return true if res is not in refs + -- return true if res should be kept if res.chunk then if res.chunk_id == nil then return true end + if + result_tracker[chat.id] ~= nil and result_tracker[chat.id][res.chunk_id] + then + return false + end return not vim.tbl_contains(existing_refs, res.chunk_id) else + if result_tracker[chat.id] ~= nil and result_tracker[chat.id][res.path] then + return false + end return not vim.tbl_contains(existing_refs, res.path) end end ) :totable() + + for _, res in pairs(filtered_results) do + if result_tracker[chat.id] == nil then + result_tracker[chat.id] = {} + end + result_tracker[chat.id][res.chunk_id or res.path] = true + end + + return filtered_results end, } From 282141c77028d4b11aeddb2037597b474f4ac533 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sat, 21 Jun 2025 16:12:27 +0800 Subject: [PATCH 5/6] fix(nvim): make sure `no_duplicate` option is effective --- lua/vectorcode/integrations/codecompanion/common.lua | 6 ++---- lua/vectorcode/integrations/codecompanion/query_tool.lua | 5 ++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 9a575f44..6e8e4912 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -160,10 +160,8 @@ return { ---@param chat CodeCompanion.Chat ---@return VectorCode.QueryResult[] filter_results = function(results, chat) - local existing_refs = chat.refs - if existing_refs == nil then - return results - end + local existing_refs = chat.refs or {} + existing_refs = vim .iter(existing_refs) :filter( diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index 5ac9a375..31477bfd 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -148,6 +148,7 @@ You may include multiple keywords in the command. description = [[ Query messages used for the search. They should also contain relevant keywords. For example, you should include `parameter`, `arguments` and `return value` for the query `function`. +If a query returned empty or repeated results, you should avoid using these query keywords, unless the user instructed otherwise. ]], }, count = { @@ -219,7 +220,9 @@ For example, you should include `parameter`, `arguments` and `return value` for if opts.max_num > 0 then max_result = math.min(opts.max_num or 1, max_result) end - stdout = cc_common.filter_results(stdout, agent.chat) + if opts.no_duplicate then + stdout = cc_common.filter_results(stdout, agent.chat) + end for i, file in pairs(stdout) do if i <= max_result then if i == 1 then From 0150ba0c82cbb8b0b6ae019497985241a373f1a1 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Sun, 22 Jun 2025 16:50:00 +0800 Subject: [PATCH 6/6] refactor(nvim): refactoring and cleanup --- .../integrations/codecompanion/common.lua | 70 ---------------- .../integrations/codecompanion/query_tool.lua | 80 +++++++++++++++++-- 2 files changed, 75 insertions(+), 75 deletions(-) diff --git a/lua/vectorcode/integrations/codecompanion/common.lua b/lua/vectorcode/integrations/codecompanion/common.lua index 6e8e4912..987bfcb5 100644 --- a/lua/vectorcode/integrations/codecompanion/common.lua +++ b/lua/vectorcode/integrations/codecompanion/common.lua @@ -21,11 +21,6 @@ local default_vectorise_options = {} local TOOL_RESULT_SOURCE = "VectorCodeToolResult" ----@alias chat_id integer ----@alias result_id string ----@type -local result_tracker = {} - return { tool_result_source = TOOL_RESULT_SOURCE, ---@param t table|string @@ -155,69 +150,4 @@ return { end return job_runner end, - - ---@param results VectorCode.QueryResult[] - ---@param chat CodeCompanion.Chat - ---@return VectorCode.QueryResult[] - filter_results = function(results, chat) - local existing_refs = chat.refs or {} - - existing_refs = vim - .iter(existing_refs) - :filter( - ---@param ref CodeCompanion.Chat.Ref - function(ref) - return ref.source == TOOL_RESULT_SOURCE or ref.path or ref.bufnr - end - ) - :map( - ---@param ref CodeCompanion.Chat.Ref - function(ref) - if ref.source == TOOL_RESULT_SOURCE then - return ref.id - elseif ref.path then - return ref.path - elseif ref.bufnr then - return vim.api.nvim_buf_get_name(ref.bufnr) - end - end - ) - :totable() - - ---@type VectorCode.QueryResult[] - local filtered_results = vim - .iter(results) - :filter( - ---@param res VectorCode.QueryResult - function(res) - -- return true if res should be kept - if res.chunk then - if res.chunk_id == nil then - return true - end - if - result_tracker[chat.id] ~= nil and result_tracker[chat.id][res.chunk_id] - then - return false - end - return not vim.tbl_contains(existing_refs, res.chunk_id) - else - if result_tracker[chat.id] ~= nil and result_tracker[chat.id][res.path] then - return false - end - return not vim.tbl_contains(existing_refs, res.path) - end - end - ) - :totable() - - for _, res in pairs(filtered_results) do - if result_tracker[chat.id] == nil then - result_tracker[chat.id] = {} - end - result_tracker[chat.id][res.chunk_id or res.path] = true - end - - return filtered_results - end, } diff --git a/lua/vectorcode/integrations/codecompanion/query_tool.lua b/lua/vectorcode/integrations/codecompanion/query_tool.lua index 31477bfd..a98b0624 100644 --- a/lua/vectorcode/integrations/codecompanion/query_tool.lua +++ b/lua/vectorcode/integrations/codecompanion/query_tool.lua @@ -9,6 +9,77 @@ local job_runner = nil ---@alias QueryToolArgs { project_root:string, count: integer, query: string[] } +---@alias chat_id integer +---@alias result_id string +---@type +local result_tracker = {} + +---@param results VectorCode.QueryResult[] +---@param chat CodeCompanion.Chat +---@return VectorCode.QueryResult[] +local filter_results = function(results, chat) + local existing_refs = chat.refs or {} + + existing_refs = vim + .iter(existing_refs) + :filter( + ---@param ref CodeCompanion.Chat.Ref + function(ref) + return ref.source == cc_common.tool_result_source or ref.path or ref.bufnr + end + ) + :map( + ---@param ref CodeCompanion.Chat.Ref + function(ref) + if ref.source == cc_common.tool_result_source then + return ref.id + elseif ref.path then + return ref.path + elseif ref.bufnr then + return vim.api.nvim_buf_get_name(ref.bufnr) + end + end + ) + :totable() + + ---@type VectorCode.QueryResult[] + local filtered_results = vim + .iter(results) + :filter( + ---@param res VectorCode.QueryResult + function(res) + -- return true if res should be kept + if res.chunk then + if res.chunk_id == nil then + -- no chunk_id, always include + return true + end + if + result_tracker[chat.id] ~= nil and result_tracker[chat.id][res.chunk_id] + then + return false + end + return not vim.tbl_contains(existing_refs, res.chunk_id) + else + if result_tracker[chat.id] ~= nil and result_tracker[chat.id][res.path] then + return false + end + return not vim.tbl_contains(existing_refs, res.path) + end + end + ) + :totable() + + for _, res in pairs(filtered_results) do + if result_tracker[chat.id] == nil then + result_tracker[chat.id] = {} + end + result_tracker[chat.id][res.chunk_id or res.path] = true + end + + return filtered_results +end + ---@param opts VectorCode.CodeCompanion.QueryToolOpts? ---@return CodeCompanion.Agent.Tool return check_cli_wrap(function(opts) @@ -221,7 +292,7 @@ If a query returned empty or repeated results, you should avoid using these quer max_result = math.min(opts.max_num or 1, max_result) end if opts.no_duplicate then - stdout = cc_common.filter_results(stdout, agent.chat) + stdout = filter_results(stdout, agent.chat) end for i, file in pairs(stdout) do if i <= max_result then @@ -243,12 +314,11 @@ If a query returned empty or repeated results, you should avoid using these quer cc_common.process_result(file), user_message ) - if (not opts.chunk_mode) or file.chunk_id ~= nil then - -- skip referencing because there will be multiple chunks with the same path (id). - -- TODO: figure out a way to deduplicate. + if not opts.chunk_mode then + -- only add to reference if running in full document mode local ref = { source = cc_common.tool_result_source, - id = file.chunk_id or file.path, + id = file.path, path = file.path, opts = { visible = false }, }