diff --git a/docs/openapi.json b/docs/openapi.json index cfbed3a8c..ec0d57999 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -7648,6 +7648,14 @@ "Kubernetes is an open-source container orchestration system for automating ..." ] }, + "rag_chunks": { + "items": { + "$ref": "#/components/schemas/RAGChunk" + }, + "type": "array", + "title": "Rag Chunks", + "description": "Deprecated: List of RAG chunks used to generate the response." + }, "referenced_documents": { "items": { "$ref": "#/components/schemas/ReferencedDocument" @@ -7711,32 +7719,18 @@ ] }, "tool_calls": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/ToolCallSummary" - }, - "type": "array" - }, - { - "type": "null" - } - ], + "items": { + "$ref": "#/components/schemas/ToolCallSummary" + }, + "type": "array", "title": "Tool Calls", "description": "List of tool calls made during response generation" }, "tool_results": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/ToolResultSummary" - }, - "type": "array" - }, - { - "type": "null" - } - ], + "items": { + "$ref": "#/components/schemas/ToolResultSummary" + }, + "type": "array", "title": "Tool Results", "description": "List of tool results" } @@ -7746,7 +7740,7 @@ "response" ], "title": "QueryResponse", - "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: List of RAG chunks used to generate the response.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.", + "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.", "examples": [ { "available_quotas": { @@ -7979,6 +7973,45 @@ "title": "QuotaSchedulerConfiguration", "description": "Quota scheduler configuration." }, + "RAGChunk": { + "properties": { + "content": { + "type": "string", + "title": "Content", + "description": "The content of the chunk" + }, + "source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source", + "description": "Source document or URL" + }, + "score": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score", + "description": "Relevance score" + } + }, + "type": "object", + "required": [ + "content" + ], + "title": "RAGChunk", + "description": "Model representing a RAG chunk used in the response." + }, "RAGInfoResponse": { "properties": { "id": { diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index e91b8afdc..88fdeed99 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -441,6 +441,7 @@ async def query_endpoint_handler_base( # pylint: disable=R0914 response=summary.llm_response, tool_calls=summary.tool_calls, tool_results=summary.tool_results, + rag_chunks=summary.rag_chunks, referenced_documents=referenced_documents, truncated=False, # TODO: implement truncation detection input_tokens=token_usage.input_tokens, diff --git a/src/app/endpoints/query_v2.py b/src/app/endpoints/query_v2.py index 1ce31ac6d..5f0cdc6b2 100644 --- a/src/app/endpoints/query_v2.py +++ b/src/app/endpoints/query_v2.py @@ -492,7 +492,7 @@ def extract_rag_chunks_from_file_search_item( if item.results is not None: for result in item.results: rag_chunk = RAGChunk( - content=result.text, source="file_search", score=result.score + content=result.text, source=result.filename, score=result.score ) rag_chunks.append(rag_chunk) diff --git a/src/models/responses.py b/src/models/responses.py index 9b0051315..894ffb783 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -10,7 +10,7 @@ from quota.quota_exceed_error import QuotaExceedError from models.config import Action, Configuration -from utils.types import ToolCallSummary, ToolResultSummary +from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response" BAD_REQUEST_DESCRIPTION = "Invalid request format" @@ -348,9 +348,11 @@ class QueryResponse(AbstractSuccessfulResponse): Attributes: conversation_id: The optional conversation ID (UUID). response: The response. - rag_chunks: List of RAG chunks used to generate the response. + rag_chunks: Deprecated. List of RAG chunks used to generate the response. + This information is now available in tool_results under file_search_call type. referenced_documents: The URLs and titles for the documents used to generate the response. tool_calls: List of tool calls made during response generation. + tool_results: List of tool results. truncated: Whether conversation history was truncated. input_tokens: Number of tokens sent to LLM. output_tokens: Number of tokens received from LLM. @@ -370,6 +372,11 @@ class QueryResponse(AbstractSuccessfulResponse): ], ) + rag_chunks: list[RAGChunk] = Field( + default_factory=list, + description="Deprecated: List of RAG chunks used to generate the response.", + ) + referenced_documents: list[ReferencedDocument] = Field( default_factory=list, description="List of documents referenced in generating the response", diff --git a/tests/unit/app/endpoints/test_query_v2.py b/tests/unit/app/endpoints/test_query_v2.py index 47d925bb5..2ca1d6bee 100644 --- a/tests/unit/app/endpoints/test_query_v2.py +++ b/tests/unit/app/endpoints/test_query_v2.py @@ -998,8 +998,8 @@ async def test_retrieve_response_parses_referenced_documents( # Verify RAG chunks were extracted from file_search_call results assert len(_summary.rag_chunks) == 2 assert _summary.rag_chunks[0].content == "Sample text from file2.pdf" - assert _summary.rag_chunks[0].source == "file_search" + assert _summary.rag_chunks[0].source == "file2.pdf" assert _summary.rag_chunks[0].score == 0.95 assert _summary.rag_chunks[1].content == "Sample text from file3.docx" - assert _summary.rag_chunks[1].source == "file_search" + assert _summary.rag_chunks[1].source == "file3.docx" assert _summary.rag_chunks[1].score == 0.85