Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 56 additions & 23 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -7648,6 +7648,14 @@
"Kubernetes is an open-source container orchestration system for automating ..."
]
},
"rag_chunks": {
"items": {
"$ref": "#/components/schemas/RAGChunk"
},
"type": "array",
"title": "Rag Chunks",
"description": "Deprecated: List of RAG chunks used to generate the response."
},
"referenced_documents": {
"items": {
"$ref": "#/components/schemas/ReferencedDocument"
Expand Down Expand Up @@ -7711,32 +7719,18 @@
]
},
"tool_calls": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ToolCallSummary"
},
"type": "array"
},
{
"type": "null"
}
],
"items": {
"$ref": "#/components/schemas/ToolCallSummary"
},
"type": "array",
"title": "Tool Calls",
"description": "List of tool calls made during response generation"
},
"tool_results": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ToolResultSummary"
},
"type": "array"
},
{
"type": "null"
}
],
"items": {
"$ref": "#/components/schemas/ToolResultSummary"
},
"type": "array",
"title": "Tool Results",
"description": "List of tool results"
}
Expand All @@ -7746,7 +7740,7 @@
"response"
],
"title": "QueryResponse",
"description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: List of RAG chunks used to generate the response.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.",
"description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.",
"examples": [
{
"available_quotas": {
Expand Down Expand Up @@ -7979,6 +7973,45 @@
"title": "QuotaSchedulerConfiguration",
"description": "Quota scheduler configuration."
},
"RAGChunk": {
"properties": {
"content": {
"type": "string",
"title": "Content",
"description": "The content of the chunk"
},
"source": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Source",
"description": "Source document or URL"
},
"score": {
"anyOf": [
{
"type": "number"
},
{
"type": "null"
}
],
"title": "Score",
"description": "Relevance score"
}
},
"type": "object",
"required": [
"content"
],
"title": "RAGChunk",
"description": "Model representing a RAG chunk used in the response."
},
"RAGInfoResponse": {
"properties": {
"id": {
Expand Down
1 change: 1 addition & 0 deletions src/app/endpoints/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
response=summary.llm_response,
tool_calls=summary.tool_calls,
tool_results=summary.tool_results,
rag_chunks=summary.rag_chunks,
referenced_documents=referenced_documents,
truncated=False, # TODO: implement truncation detection
input_tokens=token_usage.input_tokens,
Expand Down
2 changes: 1 addition & 1 deletion src/app/endpoints/query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def extract_rag_chunks_from_file_search_item(
if item.results is not None:
for result in item.results:
rag_chunk = RAGChunk(
content=result.text, source="file_search", score=result.score
content=result.text, source=result.filename, score=result.score
)
rag_chunks.append(rag_chunk)

Expand Down
11 changes: 9 additions & 2 deletions src/models/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from quota.quota_exceed_error import QuotaExceedError
from models.config import Action, Configuration
from utils.types import ToolCallSummary, ToolResultSummary
from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary

SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response"
BAD_REQUEST_DESCRIPTION = "Invalid request format"
Expand Down Expand Up @@ -348,9 +348,11 @@ class QueryResponse(AbstractSuccessfulResponse):
Attributes:
conversation_id: The optional conversation ID (UUID).
response: The response.
rag_chunks: List of RAG chunks used to generate the response.
rag_chunks: Deprecated. List of RAG chunks used to generate the response.
This information is now available in tool_results under file_search_call type.
referenced_documents: The URLs and titles for the documents used to generate the response.
tool_calls: List of tool calls made during response generation.
tool_results: List of tool results.
truncated: Whether conversation history was truncated.
input_tokens: Number of tokens sent to LLM.
output_tokens: Number of tokens received from LLM.
Expand All @@ -370,6 +372,11 @@ class QueryResponse(AbstractSuccessfulResponse):
],
)

rag_chunks: list[RAGChunk] = Field(
default_factory=list,
description="Deprecated: List of RAG chunks used to generate the response.",
)

referenced_documents: list[ReferencedDocument] = Field(
default_factory=list,
description="List of documents referenced in generating the response",
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/app/endpoints/test_query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,8 +998,8 @@ async def test_retrieve_response_parses_referenced_documents(
# Verify RAG chunks were extracted from file_search_call results
assert len(_summary.rag_chunks) == 2
assert _summary.rag_chunks[0].content == "Sample text from file2.pdf"
assert _summary.rag_chunks[0].source == "file_search"
assert _summary.rag_chunks[0].source == "file2.pdf"
assert _summary.rag_chunks[0].score == 0.95
assert _summary.rag_chunks[1].content == "Sample text from file3.docx"
assert _summary.rag_chunks[1].source == "file_search"
assert _summary.rag_chunks[1].source == "file3.docx"
assert _summary.rag_chunks[1].score == 0.85
Loading