diff --git a/docs/openapi.json b/docs/openapi.json index 693ff1b9e..154e0b263 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -3664,7 +3664,7 @@ "rlsapi-v1" ], "summary": "Infer Endpoint", - "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", + "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n request: The FastAPI request object for accessing headers and state.\n background_tasks: FastAPI background tasks for async Splunk event sending.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", "operationId": "infer_endpoint_v1_infer_post", "requestBody": { "content": { @@ -4290,7 +4290,7 @@ ], "summary": "Handle A2A Jsonrpc", "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", + "operationId": "handle_a2a_jsonrpc_a2a_post", "responses": { "200": { "description": "Successful Response", @@ -4308,7 +4308,7 @@ ], "summary": "Handle A2A Jsonrpc", "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", + "operationId": "handle_a2a_jsonrpc_a2a_post", "responses": { "200": { "description": "Successful Response", @@ -5335,11 +5335,11 @@ "description": "Dimensionality of embedding vectors.", "default": 768 }, - "vector_db_id": { + "vector_store_id": { "type": "string", "minLength": 1, - "title": "Vector DB ID", - "description": "Vector DB identification." + "title": "Vector Store ID", + "description": "Vector store identification." }, "db_path": { "type": "string", @@ -5352,7 +5352,7 @@ "type": "object", "required": [ "rag_id", - "vector_db_id", + "vector_store_id", "db_path" ], "title": "ByokRag", @@ -8431,11 +8431,23 @@ ], "title": "Doc Title", "description": "Title of the referenced document" + }, + "doc_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Doc Id", + "description": "ID of the referenced document" } }, "type": "object", "title": "ReferencedDocument", - "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc." + "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc.\n doc_id: ID of the referenced doc." }, "RlsapiV1Attachment": { "properties": { diff --git a/docs/openapi.md b/docs/openapi.md index f576011e5..b681967dc 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -3200,6 +3200,8 @@ system info) and returns an LLM-generated response. Args: infer_request: The inference request containing question and context. + request: The FastAPI request object for accessing headers and state. + background_tasks: FastAPI background tasks for async Splunk event sending. auth: Authentication tuple from the configured auth provider. Returns: @@ -4184,7 +4186,7 @@ BYOK (Bring Your Own Knowledge) RAG configuration. | rag_type | string | Type of RAG database. | | embedding_model | string | Embedding model identification | | embedding_dimension | integer | Dimensionality of embedding vectors. | -| vector_db_id | string | Vector DB identification. | +| vector_store_id | string | Vector store identification. | | db_path | string | Path to RAG database. | @@ -5316,12 +5318,14 @@ Model representing a document referenced in generating a response. Attributes: doc_url: Url to the referenced doc. doc_title: Title of the referenced doc. + doc_id: ID of the referenced doc. | Field | Type | Description | |-------|------|-------------| | doc_url | | URL of the referenced document | | doc_title | | Title of the referenced document | +| doc_id | | ID of the referenced document | ## RlsapiV1Attachment diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index ce0c87bed..eddf30f86 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -1,8 +1,6 @@ """Handler for REST API call to provide answer to query.""" -import ast import logging -import re from datetime import UTC, datetime from typing import Annotated, Any, Optional @@ -14,7 +12,6 @@ RateLimitError, # type: ignore ) from llama_stack_client.types.model_list_response import ModelListResponse -from llama_stack_client.types.shared.interleaved_content_item import TextContentItem from sqlalchemy.exc import SQLAlchemyError import constants @@ -36,7 +33,6 @@ PromptTooLongResponse, QueryResponse, QuotaExceededResponse, - ReferencedDocument, ServiceUnavailableResponse, UnauthorizedResponse, UnprocessableEntityResponse, @@ -553,39 +549,6 @@ def is_input_shield(shield: Shield) -> bool: return _is_inout_shield(shield) or not is_output_shield(shield) -def parse_metadata_from_text_item( - text_item: TextContentItem, -) -> Optional[ReferencedDocument]: - """ - Parse a single TextContentItem to extract referenced documents. - - Args: - text_item (TextContentItem): The TextContentItem containing metadata. - - Returns: - ReferencedDocument: A ReferencedDocument object containing 'doc_url' and 'doc_title' - representing the referenced documents found in the metadata. - """ - docs: list[ReferencedDocument] = [] - if not isinstance(text_item, TextContentItem): - return docs - - metadata_blocks = re.findall( - r"Metadata:\s*({.*?})(?:\n|$)", text_item.text, re.DOTALL - ) - for block in metadata_blocks: - try: - data = ast.literal_eval(block) - url = data.get("docs_url") - title = data.get("title") - if url and title: - return ReferencedDocument(doc_url=url, doc_title=title) - logger.debug("Invalid metadata block (missing url or title): %s", block) - except (ValueError, SyntaxError) as e: - logger.debug("Failed to parse metadata block: %s | Error: %s", block, e) - return None - - def validate_attachments_metadata(attachments: list[Attachment]) -> None: """Validate the attachments metadata provided in the request. diff --git a/src/app/endpoints/query_v2.py b/src/app/endpoints/query_v2.py index 64f4c5341..359fe8c3d 100644 --- a/src/app/endpoints/query_v2.py +++ b/src/app/endpoints/query_v2.py @@ -541,11 +541,11 @@ def parse_referenced_documents_from_responses_api( response: The OpenAI Response API response object Returns: - list[ReferencedDocument]: List of referenced documents with doc_url and doc_title + list[ReferencedDocument]: List of referenced documents with doc_url, doc_title and doc_id """ documents: list[ReferencedDocument] = [] - # Use a set to track unique documents by (doc_url, doc_title) tuple - seen_docs: set[tuple[Optional[str], Optional[str]]] = set() + # Use a set to track unique documents by (doc_url, doc_title, doc_id) tuple + seen_docs: set[tuple[Optional[str], Optional[str], Optional[str]]] = set() # Handle None response (e.g., when agent fails) if response is None or not response.output: @@ -560,74 +560,31 @@ def parse_referenced_documents_from_responses_api( for result in results: # Handle both object and dict access if isinstance(result, dict): - filename = result.get("filename") attributes = result.get("attributes", {}) else: - filename = getattr(result, "filename", None) attributes = getattr(result, "attributes", {}) # Try to get URL from attributes # Look for common URL fields in attributes doc_url = ( - attributes.get("link") + attributes.get("doc_url") + or attributes.get("docs_url") or attributes.get("url") - or attributes.get("doc_url") + or attributes.get("link") ) + doc_title = attributes.get("title") + doc_id = attributes.get("document_id") or attributes.get("doc_id") - # If we have at least a filename or url - if filename or doc_url: + if doc_title or doc_url: # Treat empty string as None for URL to satisfy Optional[AnyUrl] final_url = doc_url if doc_url else None - if (final_url, filename) not in seen_docs: + if (final_url, doc_title, doc_id) not in seen_docs: documents.append( - ReferencedDocument(doc_url=final_url, doc_title=filename) - ) - seen_docs.add((final_url, filename)) - - # 2. Parse from message content annotations - elif item_type == "message": - content = getattr(output_item, "content", None) - if isinstance(content, list): - for part in content: - # Skip if part is a string or doesn't have annotations - if isinstance(part, str): - continue - - annotations = getattr(part, "annotations", []) or [] - for annotation in annotations: - # Handle both object and dict access for annotations - if isinstance(annotation, dict): - anno_type = annotation.get("type") - anno_url = annotation.get("url") - anno_title = annotation.get("title") or annotation.get( - "filename" - ) - else: - anno_type = getattr(annotation, "type", None) - anno_url = getattr(annotation, "url", None) - anno_title = getattr(annotation, "title", None) or getattr( - annotation, "filename", None + ReferencedDocument( + doc_url=final_url, doc_title=doc_title, doc_id=doc_id ) - - if anno_type == "url_citation": - # Treat empty string as None - final_url = anno_url if anno_url else None - if (final_url, anno_title) not in seen_docs: - documents.append( - ReferencedDocument( - doc_url=final_url, doc_title=anno_title - ) - ) - seen_docs.add((final_url, anno_title)) - - elif anno_type == "file_citation": - if (None, anno_title) not in seen_docs: - documents.append( - ReferencedDocument( - doc_url=None, doc_title=anno_title - ) - ) - seen_docs.add((None, anno_title)) + ) + seen_docs.add((final_url, doc_title, doc_id)) return documents diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index 0e45e0614..d6af46a65 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -11,10 +11,9 @@ from pathlib import Path from typing import Any +import yaml from azure.core.exceptions import ClientAuthenticationError from azure.identity import ClientSecretCredential, CredentialUnavailableError - -import yaml from llama_stack.core.stack import replace_env_vars logger = logging.getLogger(__name__) @@ -115,51 +114,145 @@ def setup_azure_entra_id_token( # ============================================================================= -def construct_vector_dbs_section( +def construct_storage_backends_section( + ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] +) -> dict[str, Any]: + """Construct storage.backends section in Llama Stack configuration file. + + Builds the storage.backends section for a Llama Stack configuration by + preserving existing backends and adding new ones for each BYOK RAG. + + Parameters: + ls_config (dict[str, Any]): Existing Llama Stack configuration mapping. + byok_rag (list[dict[str, Any]]): List of BYOK RAG definitions. + + Returns: + dict[str, Any]: The storage.backends dict with new backends added. + """ + output: dict[str, Any] = {} + + # preserve existing backends + if "storage" in ls_config and "backends" in ls_config["storage"]: + output = ls_config["storage"]["backends"].copy() + + # add new backends for each BYOK RAG + for brag in byok_rag: + vector_store_id = brag.get("vector_store_id", "") + backend_name = f"byok_{vector_store_id}_storage" + output[backend_name] = { + "type": "kv_sqlite", + "db_path": brag.get("db_path", f".llama/{vector_store_id}.db"), + } + logger.info( + "Added %s backends into storage.backends section, total backends %s", + len(byok_rag), + len(output), + ) + return output + + +def construct_vector_stores_section( ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] ) -> list[dict[str, Any]]: - """Construct vector_dbs section in Llama Stack configuration file. + """Construct registered_resources.vector_stores section in Llama Stack config. - Builds the vector_dbs section for a Llama Stack configuration. + Builds the vector_stores section for a Llama Stack configuration. Parameters: ls_config (dict[str, Any]): Existing Llama Stack configuration mapping - used as the base; existing `vector_dbs` entries are preserved if - present. + used as the base; existing `registered_resources.vector_stores` entries + are preserved if present. byok_rag (list[dict[str, Any]]): List of BYOK RAG definitions to be added to - the `vector_dbs` section. + the `vector_stores` section. Returns: - list[dict[str, Any]]: The `vector_dbs` list where each entry is a mapping with keys: - - `vector_db_id`: identifier of the vector database + list[dict[str, Any]]: The `vector_stores` list where each entry is a mapping with keys: + - `vector_store_id`: identifier of the vector store - `provider_id`: provider identifier prefixed with `"byok_"` - `embedding_model`: name of the embedding model - `embedding_dimension`: embedding vector dimensionality """ output = [] - # fill-in existing vector_dbs entries - if "vector_dbs" in ls_config: - output = ls_config["vector_dbs"] + # fill-in existing vector_stores entries from registered_resources + if "registered_resources" in ls_config: + if "vector_stores" in ls_config["registered_resources"]: + output = ls_config["registered_resources"]["vector_stores"].copy() - # append new vector_dbs entries + # append new vector_stores entries for brag in byok_rag: + vector_store_id = brag.get("vector_store_id", "") output.append( { - "vector_db_id": brag.get("vector_db_id", ""), - "provider_id": "byok_" + brag.get("vector_db_id", ""), + "vector_store_id": vector_store_id, + "provider_id": f"byok_{vector_store_id}", "embedding_model": brag.get("embedding_model", ""), "embedding_dimension": brag.get("embedding_dimension"), } ) logger.info( - "Added %s items into vector_dbs section, total items %s", + "Added %s items into registered_resources.vector_stores, total items %s", len(byok_rag), len(output), ) return output +def construct_models_section( + ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Construct registered_resources.models section with embedding models. + + Adds embedding model entries for each BYOK RAG configuration. + + Parameters: + ls_config (dict[str, Any]): Existing Llama Stack configuration mapping. + byok_rag (list[dict[str, Any]]): List of BYOK RAG definitions. + + Returns: + list[dict[str, Any]]: The models list with embedding models added. + """ + output: list[dict[str, Any]] = [] + + # preserve existing models + if "registered_resources" in ls_config: + if "models" in ls_config["registered_resources"]: + output = ls_config["registered_resources"]["models"].copy() + + # add embedding models for each BYOK RAG + for brag in byok_rag: + embedding_model = brag.get("embedding_model", "") + vector_store_id = brag.get("vector_store_id", "") + embedding_dimension = brag.get("embedding_dimension") + + # Strip sentence-transformers/ prefix if present + provider_model_id = embedding_model + if provider_model_id.startswith("sentence-transformers/"): + provider_model_id = provider_model_id[len("sentence-transformers/") :] + + # Skip if embedding model already registered + existing_model_ids = [m.get("provider_model_id") for m in output] + if provider_model_id in existing_model_ids: + continue + + output.append( + { + "model_id": f"byok_{vector_store_id}_embedding", + "model_type": "embedding", + "provider_id": "sentence-transformers", + "provider_model_id": provider_model_id, + "metadata": { + "embedding_dimension": embedding_dimension, + }, + } + ) + logger.info( + "Added embedding models into registered_resources.models, total models %s", + len(output), + ) + return output + + def construct_vector_io_providers_section( ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] ) -> list[dict[str, Any]]: @@ -179,28 +272,29 @@ def construct_vector_io_providers_section( Returns: list[dict[str, Any]]: The resulting providers/vector_io list containing the original entries (if any) plus one entry per item in `byok_rag`. - Each appended entry has `provider_id` set to "byok_", - `provider_type` set from the RAG item, and a `config` with a `kvstore` - pointing to ".llama/.db", `namespace` as None, and `type` - "sqlite". + Each appended entry has `provider_id` set to "byok_", + `provider_type` set from the RAG item, and a `config` with `persistence` + referencing the corresponding backend. """ output = [] # fill-in existing vector_io entries if "providers" in ls_config and "vector_io" in ls_config["providers"]: - output = ls_config["providers"]["vector_io"] + output = ls_config["providers"]["vector_io"].copy() # append new vector_io entries for brag in byok_rag: + vector_store_id = brag.get("vector_store_id", "") + backend_name = f"byok_{vector_store_id}_storage" + provider_id = f"byok_{vector_store_id}" output.append( { - "provider_id": "byok_" + brag.get("vector_db_id", ""), + "provider_id": provider_id, "provider_type": brag.get("rag_type", "inline::faiss"), "config": { - "kvstore": { - "db_path": ".llama/" + brag.get("vector_db_id", "") + ".db", - "namespace": None, - "type": "sqlite", + "persistence": { + "namespace": "vector_io::faiss", + "backend": backend_name, } }, } @@ -225,14 +319,33 @@ def enrich_byok_rag(ls_config: dict[str, Any], byok_rag: list[dict[str, Any]]) - return logger.info("Enriching Llama Stack config with BYOK RAG") - ls_config["vector_dbs"] = construct_vector_dbs_section(ls_config, byok_rag) + # Add storage backends + if "storage" not in ls_config: + ls_config["storage"] = {} + ls_config["storage"]["backends"] = construct_storage_backends_section( + ls_config, byok_rag + ) + + # Add vector_io providers if "providers" not in ls_config: ls_config["providers"] = {} ls_config["providers"]["vector_io"] = construct_vector_io_providers_section( ls_config, byok_rag ) + # Add registered vector stores + if "registered_resources" not in ls_config: + ls_config["registered_resources"] = {} + ls_config["registered_resources"]["vector_stores"] = ( + construct_vector_stores_section(ls_config, byok_rag) + ) + + # Add embedding models + ls_config["registered_resources"]["models"] = construct_models_section( + ls_config, byok_rag + ) + # ============================================================================= # Main Generation Function (service/container mode only) diff --git a/src/models/config.py b/src/models/config.py index 771ed4fdf..15181ba4a 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -1480,11 +1480,11 @@ class ByokRag(ConfigurationBase): description="Dimensionality of embedding vectors.", ) - vector_db_id: str = Field( + vector_store_id: str = Field( ..., min_length=1, - title="Vector DB ID", - description="Vector DB identification.", + title="Vector Store ID", + description="Vector store identification.", ) db_path: FilePath = Field( diff --git a/src/models/responses.py b/src/models/responses.py index 9749f95f9..1ce11d6d2 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -8,8 +8,8 @@ from pydantic import AnyUrl, BaseModel, Field from pydantic_core import SchemaError -from quota.quota_exceed_error import QuotaExceedError from models.config import Action, Configuration +from quota.quota_exceed_error import QuotaExceedError from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response" @@ -369,6 +369,7 @@ class ReferencedDocument(BaseModel): Attributes: doc_url: Url to the referenced doc. doc_title: Title of the referenced doc. + doc_id: ID of the referenced doc. """ doc_url: Optional[AnyUrl] = Field( @@ -379,6 +380,8 @@ class ReferencedDocument(BaseModel): None, description="Title of the referenced document" ) + doc_id: Optional[str] = Field(None, description="ID of the referenced document") + class QueryResponse(AbstractSuccessfulResponse): """Model representing LLM response to a query. diff --git a/tests/configuration/run.yaml b/tests/configuration/run.yaml index 6bb776005..7a1702bc8 100644 --- a/tests/configuration/run.yaml +++ b/tests/configuration/run.yaml @@ -16,54 +16,48 @@ benchmarks: [] container_image: null datasets: [] external_providers_dir: null -inference_store: - db_path: .llama/distributions/ollama/inference_store.db - type: sqlite logging: null -metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null - type: sqlite providers: agents: - - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite - provider_id: meta-reference + - provider_id: meta-reference provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default datasetio: - - config: - kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - provider_id: huggingface + - provider_id: huggingface provider_type: remote::huggingface - - config: + config: kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite - provider_id: localfs + namespace: huggingface_datasetio + backend: kv_default + - provider_id: localfs provider_type: inline::localfs - eval: - - config: + config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - provider_id: meta-reference + namespace: localfs_datasetio + backend: kv_default + eval: + - provider_id: meta-reference provider_type: inline::meta-reference + config: + kvstore: + namespace: eval_store + backend: kv_default inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["gpt-4-turbo"] + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} post_training: - config: checkpoint_format: huggingface @@ -95,15 +89,17 @@ providers: provider_id: meta-reference provider_type: inline::meta-reference tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} vector_io: - config: - kvstore: - db_path: .llama/distributions/ollama/faiss_store.db - namespace: null - type: sqlite + persistence: + namespace: vector_io::faiss + backend: kv_default provider_id: faiss provider_type: inline::faiss scoring_fns: [] @@ -115,11 +111,41 @@ server: tls_cafile: null tls_certfile: null tls_keyfile: null -shields: [] -vector_dbs: [] - -models: - - model_id: gpt-4-turbo - provider_id: openai - model_type: llm - provider_model_id: gpt-4-turbo +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_stores: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +telemetry: + enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py index b900af9b5..459c991aa 100644 --- a/tests/unit/app/endpoints/test_query.py +++ b/tests/unit/app/endpoints/test_query.py @@ -8,14 +8,11 @@ import pytest from fastapi import HTTPException, Request, status -from llama_stack_client.types.shared.interleaved_content_item import TextContentItem -from pydantic import AnyUrl from pytest_mock import MockerFixture from app.endpoints.query import ( evaluate_model_hints, is_transcripts_enabled, - parse_metadata_from_text_item, select_model_and_provider_id, validate_attachments_metadata, ) @@ -23,7 +20,6 @@ from models.config import Action from models.database.conversations import UserConversation from models.requests import Attachment, QueryRequest -from models.responses import ReferencedDocument from utils.token_counter import TokenCounter # User ID must be proper UUID @@ -392,48 +388,6 @@ def test_validate_attachments_metadata_invalid_content_type() -> None: ) -def test_parse_metadata_from_text_item_valid(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem.""" - text = """ - Some text... - Metadata: {"docs_url": "https://redhat.com", "title": "Example Doc"} - """ - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = text - - doc = parse_metadata_from_text_item(mock_item) - - assert isinstance(doc, ReferencedDocument) - assert doc.doc_url == AnyUrl("https://redhat.com") - assert doc.doc_title == "Example Doc" - - -def test_parse_metadata_from_text_item_missing_title(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem with missing title.""" - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = """Metadata: {"docs_url": "https://redhat.com"}""" - doc = parse_metadata_from_text_item(mock_item) - assert doc is None - - -def test_parse_metadata_from_text_item_missing_url(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem with missing url.""" - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = """Metadata: {"title": "Example Doc"}""" - doc = parse_metadata_from_text_item(mock_item) - assert doc is None - - -def test_parse_metadata_from_text_item_malformed_url(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem with malformed url.""" - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = ( - """Metadata: {"docs_url": "not a valid url", "title": "Example Doc"}""" - ) - doc = parse_metadata_from_text_item(mock_item) - assert doc is None - - def test_no_tools_parameter_backward_compatibility() -> None: """Test that default behavior is unchanged when no_tools parameter is not specified.""" # This test ensures that existing code that doesn't specify no_tools continues to work diff --git a/tests/unit/app/endpoints/test_query_v2.py b/tests/unit/app/endpoints/test_query_v2.py index b4b4ec5ee..db9601ff1 100644 --- a/tests/unit/app/endpoints/test_query_v2.py +++ b/tests/unit/app/endpoints/test_query_v2.py @@ -909,14 +909,22 @@ def _create_file_search_output(mocker: MockerFixture) -> Any: # Create mock result objects with proper attributes matching real llama-stack response result_1 = mocker.Mock() result_1.filename = "file2.pdf" - result_1.attributes = {"url": "http://example.com/doc2"} + result_1.attributes = { + "docs_url": "http://example.com/doc2", + "title": "Title 1", + "document_id": "doc-123", + } result_1.text = "Sample text from file2.pdf" result_1.score = 0.95 result_1.file_id = "file-123" result_1.model_dump = mocker.Mock( return_value={ "filename": "file2.pdf", - "attributes": {"url": "http://example.com/doc2"}, + "attributes": { + "docs_url": "http://example.com/doc2", + "title": "Title 1", + "document_id": "doc-123", + }, "text": "Sample text from file2.pdf", "score": 0.95, "file_id": "file-123", @@ -925,14 +933,22 @@ def _create_file_search_output(mocker: MockerFixture) -> Any: result_2 = mocker.Mock() result_2.filename = "file3.docx" - result_2.attributes = {} + result_2.attributes = { + "docs_url": "http://example.com/doc3", + "title": "Title 2", + "document_id": "doc-456", + } result_2.text = "Sample text from file3.docx" result_2.score = 0.85 result_2.file_id = "file-456" result_2.model_dump = mocker.Mock( return_value={ "filename": "file3.docx", - "attributes": {}, + "attributes": { + "docs_url": "http://example.com/doc3", + "title": "Title 2", + "document_id": "doc-456", + }, "text": "Sample text from file3.docx", "score": 0.85, "file_id": "file-456", @@ -975,27 +991,20 @@ async def test_retrieve_response_parses_referenced_documents( mock_client, "model-docs", qr, token="tkn", provider_id="test-provider" ) - assert len(referenced_docs) == 4 + # Referenced documents are now extracted only from file_search_call attributes + assert len(referenced_docs) == 2 - # Verify Doc 1 (URL citation) - doc1 = next((d for d in referenced_docs if d.doc_title == "Doc 1"), None) + # Verify Title 1 (File search result with URL) + doc1 = next((d for d in referenced_docs if d.doc_title == "Title 1"), None) assert doc1 - assert str(doc1.doc_url) == "http://example.com/doc1" + assert str(doc1.doc_url) == "http://example.com/doc2" + assert doc1.doc_id == "doc-123" - # Verify file1.txt (File citation) - doc2 = next((d for d in referenced_docs if d.doc_title == "file1.txt"), None) + # Verify Title 2 (File search result with URL) + doc2 = next((d for d in referenced_docs if d.doc_title == "Title 2"), None) assert doc2 - assert doc2.doc_url is None - - # Verify file2.pdf (File search result with URL) - doc3 = next((d for d in referenced_docs if d.doc_title == "file2.pdf"), None) - assert doc3 - assert str(doc3.doc_url) == "http://example.com/doc2" - - # Verify file3.docx (File search result without URL) - doc4 = next((d for d in referenced_docs if d.doc_title == "file3.docx"), None) - assert doc4 - assert doc4.doc_url is None + assert str(doc2.doc_url) == "http://example.com/doc3" + assert doc2.doc_id == "doc-456" # Verify RAG chunks were extracted from file_search_call results assert len(_summary.rag_chunks) == 2 diff --git a/tests/unit/cache/test_postgres_cache.py b/tests/unit/cache/test_postgres_cache.py index 2855c9daa..699c93102 100644 --- a/tests/unit/cache/test_postgres_cache.py +++ b/tests/unit/cache/test_postgres_cache.py @@ -601,7 +601,7 @@ def test_insert_and_get_with_referenced_documents( inserted_json_str = sql_params[-3] assert json.loads(inserted_json_str) == [ - {"doc_url": "http://example.com/", "doc_title": "Test Doc"} + {"doc_url": "http://example.com/", "doc_title": "Test Doc", "doc_id": None} ] # Simulate the database returning that data @@ -612,7 +612,7 @@ def test_insert_and_get_with_referenced_documents( "bar", "start_time", "end_time", - [{"doc_url": "http://example.com/", "doc_title": "Test Doc"}], + [{"doc_url": "http://example.com/", "doc_title": "Test Doc", "doc_id": None}], None, # tool_calls None, # tool_results ) diff --git a/tests/unit/models/config/test_byok_rag.py b/tests/unit/models/config/test_byok_rag.py index d66373479..e8cf2f54f 100644 --- a/tests/unit/models/config/test_byok_rag.py +++ b/tests/unit/models/config/test_byok_rag.py @@ -20,7 +20,7 @@ def test_byok_rag_configuration_default_values() -> None: byok_rag = ByokRag( # pyright: ignore[reportCallIssue] rag_id="rag_id", - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path="tests/configuration/rag.txt", ) assert byok_rag is not None @@ -28,7 +28,7 @@ def test_byok_rag_configuration_default_values() -> None: assert byok_rag.rag_type == DEFAULT_RAG_TYPE assert byok_rag.embedding_model == DEFAULT_EMBEDDING_MODEL assert byok_rag.embedding_dimension == DEFAULT_EMBEDDING_DIMENSION - assert byok_rag.vector_db_id == "vector_db_id" + assert byok_rag.vector_store_id == "vector_store_id" assert byok_rag.db_path == Path("tests/configuration/rag.txt") @@ -38,7 +38,7 @@ def test_byok_rag_configuration_nondefault_values() -> None: Verify that ByokRag class accepts and stores non-default configuration values. Asserts that rag_id, rag_type, embedding_model, embedding_dimension, and - vector_db_id match the provided inputs and that db_path is converted to a + vector_store_id match the provided inputs and that db_path is converted to a Path. """ @@ -47,7 +47,7 @@ def test_byok_rag_configuration_nondefault_values() -> None: rag_type="rag_type", embedding_model="embedding_model", embedding_dimension=1024, - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path=Path("tests/configuration/rag.txt"), ) assert byok_rag is not None @@ -55,7 +55,7 @@ def test_byok_rag_configuration_nondefault_values() -> None: assert byok_rag.rag_type == "rag_type" assert byok_rag.embedding_model == "embedding_model" assert byok_rag.embedding_dimension == 1024 - assert byok_rag.vector_db_id == "vector_db_id" + assert byok_rag.vector_store_id == "vector_store_id" assert byok_rag.db_path == Path("tests/configuration/rag.txt") @@ -68,7 +68,7 @@ def test_byok_rag_configuration_wrong_dimension() -> None: rag_type="rag_type", embedding_model="embedding_model", embedding_dimension=-1024, - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path=Path("tests/configuration/rag.txt"), ) @@ -84,7 +84,7 @@ def test_byok_rag_configuration_empty_rag_id() -> None: rag_type="rag_type", embedding_model="embedding_model", embedding_dimension=1024, - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path=Path("tests/configuration/rag.txt"), ) @@ -107,7 +107,7 @@ def test_byok_rag_configuration_empty_rag_type() -> None: rag_type="", embedding_model="embedding_model", embedding_dimension=1024, - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path=Path("tests/configuration/rag.txt"), ) @@ -123,12 +123,12 @@ def test_byok_rag_configuration_empty_embedding_model() -> None: rag_type="rag_type", embedding_model="", embedding_dimension=1024, - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path=Path("tests/configuration/rag.txt"), ) -def test_byok_rag_configuration_empty_vector_db_id() -> None: +def test_byok_rag_configuration_empty_vector_store_id() -> None: """Test the ByokRag constructor.""" with pytest.raises( @@ -139,6 +139,6 @@ def test_byok_rag_configuration_empty_vector_db_id() -> None: rag_type="rag_type", embedding_model="embedding_model", embedding_dimension=1024, - vector_db_id="", + vector_store_id="", db_path=Path("tests/configuration/rag.txt"), ) diff --git a/tests/unit/models/config/test_dump_configuration.py b/tests/unit/models/config/test_dump_configuration.py index c7e1e2ec5..e96613ab8 100644 --- a/tests/unit/models/config/test_dump_configuration.py +++ b/tests/unit/models/config/test_dump_configuration.py @@ -815,7 +815,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: byok_rag=[ ByokRag( rag_id="rag_id", - vector_db_id="vector_db_id", + vector_store_id="vector_store_id", db_path="tests/configuration/rag.txt", ), ], @@ -934,7 +934,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: "embedding_model": "sentence-transformers/all-mpnet-base-v2", "rag_id": "rag_id", "rag_type": "inline::faiss", - "vector_db_id": "vector_db_id", + "vector_store_id": "vector_store_id", }, ], "quota_handlers": { diff --git a/tests/unit/test_llama_stack_configuration.py b/tests/unit/test_llama_stack_configuration.py index f6cef3a98..4b058910f 100644 --- a/tests/unit/test_llama_stack_configuration.py +++ b/tests/unit/test_llama_stack_configuration.py @@ -8,8 +8,10 @@ from llama_stack_configuration import ( generate_configuration, - construct_vector_dbs_section, + construct_vector_stores_section, construct_vector_io_providers_section, + construct_storage_backends_section, + construct_models_section, ) from models.config import ( Configuration, @@ -20,55 +22,59 @@ ) # ============================================================================= -# Test construct_vector_dbs_section +# Test construct_vector_stores_section # ============================================================================= -def test_construct_vector_dbs_section_empty() -> None: +def test_construct_vector_stores_section_empty() -> None: """Test with no BYOK RAG config.""" ls_config: dict[str, Any] = {} byok_rag: list[dict[str, Any]] = [] - output = construct_vector_dbs_section(ls_config, byok_rag) + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 0 -def test_construct_vector_dbs_section_preserves_existing() -> None: - """Test preserves existing vector_dbs entries.""" +def test_construct_vector_stores_section_preserves_existing() -> None: + """Test preserves existing vector_stores entries.""" ls_config = { - "vector_dbs": [ - {"vector_db_id": "existing", "provider_id": "existing_provider"}, - ] + "registered_resources": { + "vector_stores": [ + {"vector_store_id": "existing", "provider_id": "existing_provider"}, + ] + } } byok_rag: list[dict[str, Any]] = [] - output = construct_vector_dbs_section(ls_config, byok_rag) + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 1 - assert output[0]["vector_db_id"] == "existing" + assert output[0]["vector_store_id"] == "existing" -def test_construct_vector_dbs_section_adds_new() -> None: +def test_construct_vector_stores_section_adds_new() -> None: """Test adds new BYOK RAG entries.""" ls_config: dict[str, Any] = {} byok_rag = [ { "rag_id": "rag1", - "vector_db_id": "db1", + "vector_store_id": "store1", "embedding_model": "test-model", "embedding_dimension": 512, }, ] - output = construct_vector_dbs_section(ls_config, byok_rag) + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 1 - assert output[0]["vector_db_id"] == "db1" - assert output[0]["provider_id"] == "byok_db1" + assert output[0]["vector_store_id"] == "store1" + assert output[0]["provider_id"] == "byok_store1" assert output[0]["embedding_model"] == "test-model" assert output[0]["embedding_dimension"] == 512 -def test_construct_vector_dbs_section_merge() -> None: +def test_construct_vector_stores_section_merge() -> None: """Test merges existing and new entries.""" - ls_config = {"vector_dbs": [{"vector_db_id": "existing"}]} - byok_rag = [{"vector_db_id": "new_db"}] - output = construct_vector_dbs_section(ls_config, byok_rag) + ls_config = { + "registered_resources": {"vector_stores": [{"vector_store_id": "existing"}]} + } + byok_rag = [{"vector_store_id": "new_store"}] + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 2 @@ -99,14 +105,120 @@ def test_construct_vector_io_providers_section_adds_new() -> None: ls_config: dict[str, Any] = {"providers": {}} byok_rag = [ { - "vector_db_id": "db1", + "vector_store_id": "store1", "rag_type": "inline::faiss", }, ] output = construct_vector_io_providers_section(ls_config, byok_rag) assert len(output) == 1 - assert output[0]["provider_id"] == "byok_db1" + assert output[0]["provider_id"] == "byok_store1" assert output[0]["provider_type"] == "inline::faiss" + assert output[0]["config"]["persistence"]["backend"] == "byok_store1_storage" + assert output[0]["config"]["persistence"]["namespace"] == "vector_io::faiss" + + +# ============================================================================= +# Test construct_storage_backends_section +# ============================================================================= + + +def test_construct_storage_backends_section_empty() -> None: + """Test with no BYOK RAG config.""" + ls_config: dict[str, Any] = {} + byok_rag: list[dict[str, Any]] = [] + output = construct_storage_backends_section(ls_config, byok_rag) + assert len(output) == 0 + + +def test_construct_storage_backends_section_preserves_existing() -> None: + """Test preserves existing backends.""" + ls_config = { + "storage": { + "backends": { + "kv_default": {"type": "kv_sqlite", "db_path": "~/.llama/kv.db"} + } + } + } + byok_rag: list[dict[str, Any]] = [] + output = construct_storage_backends_section(ls_config, byok_rag) + assert len(output) == 1 + assert "kv_default" in output + + +def test_construct_storage_backends_section_adds_new() -> None: + """Test adds new BYOK RAG backend entries.""" + ls_config: dict[str, Any] = {} + byok_rag = [ + { + "vector_store_id": "store1", + "db_path": "/path/to/store1.db", + }, + ] + output = construct_storage_backends_section(ls_config, byok_rag) + assert len(output) == 1 + assert "byok_store1_storage" in output + assert output["byok_store1_storage"]["type"] == "kv_sqlite" + assert output["byok_store1_storage"]["db_path"] == "/path/to/store1.db" + + +# ============================================================================= +# Test construct_models_section +# ============================================================================= + + +def test_construct_models_section_empty() -> None: + """Test with no BYOK RAG config.""" + ls_config: dict[str, Any] = {} + byok_rag: list[dict[str, Any]] = [] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 0 + + +def test_construct_models_section_preserves_existing() -> None: + """Test preserves existing models.""" + ls_config = { + "registered_resources": { + "models": [{"model_id": "existing", "model_type": "llm"}] + } + } + byok_rag: list[dict[str, Any]] = [] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 1 + assert output[0]["model_id"] == "existing" + + +def test_construct_models_section_adds_embedding_model() -> None: + """Test adds embedding model from BYOK RAG.""" + ls_config: dict[str, Any] = {} + byok_rag = [ + { + "vector_store_id": "store1", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + }, + ] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 1 + assert output[0]["model_id"] == "byok_store1_embedding" + assert output[0]["model_type"] == "embedding" + assert output[0]["provider_id"] == "sentence-transformers" + assert output[0]["provider_model_id"] == "all-mpnet-base-v2" + assert output[0]["metadata"]["embedding_dimension"] == 768 + + +def test_construct_models_section_strips_prefix() -> None: + """Test strips sentence-transformers/ prefix from embedding model.""" + ls_config: dict[str, Any] = {} + byok_rag = [ + { + "vector_store_id": "store1", + "embedding_model": "sentence-transformers//usr/path/model", + "embedding_dimension": 768, + }, + ] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 1 + assert output[0]["provider_model_id"] == "/usr/path/model" # ============================================================================= @@ -164,10 +276,11 @@ def test_generate_configuration_with_byok(tmp_path: Path) -> None: "byok_rag": [ { "rag_id": "rag1", - "vector_db_id": "db1", + "vector_store_id": "store1", "embedding_model": "test-model", "embedding_dimension": 256, "rag_type": "inline::faiss", + "db_path": "/tmp/store1.db", }, ], } @@ -178,5 +291,19 @@ def test_generate_configuration_with_byok(tmp_path: Path) -> None: with open(outfile, encoding="utf-8") as f: result = yaml.safe_load(f) - db_ids = [db["vector_db_id"] for db in result["vector_dbs"]] - assert "db1" in db_ids + # Check registered_resources.vector_stores + store_ids = [ + s["vector_store_id"] for s in result["registered_resources"]["vector_stores"] + ] + assert "store1" in store_ids + + # Check storage.backends + assert "byok_store1_storage" in result["storage"]["backends"] + + # Check providers.vector_io + provider_ids = [p["provider_id"] for p in result["providers"]["vector_io"]] + assert "byok_store1" in provider_ids + + # Check registered_resources.models for embedding model + model_ids = [m["model_id"] for m in result["registered_resources"]["models"]] + assert "byok_store1_embedding" in model_ids