From 9d2fd3ac3f822de8c279c0c13516396cfed4622f Mon Sep 17 00:00:00 2001 From: nishika26 Date: Thu, 11 Dec 2025 16:49:04 +0530 Subject: [PATCH 01/11] rearranging endpoints for swagger and redocs --- backend/app/api/docs/openapi_config.py | 135 ++++++++++++++++++ backend/app/api/routes/collection_job.py | 2 +- backend/app/api/routes/collections.py | 2 +- backend/app/api/routes/credentials.py | 2 +- backend/app/api/routes/cron.py | 2 +- .../app/api/routes/doc_transformation_job.py | 2 +- backend/app/api/routes/documents.py | 2 +- backend/app/api/routes/evaluation.py | 2 +- backend/app/api/routes/fine_tuning.py | 2 +- backend/app/api/routes/login.py | 2 +- backend/app/api/routes/model_evaluation.py | 2 +- backend/app/api/routes/onboarding.py | 2 +- backend/app/api/routes/organization.py | 2 +- backend/app/api/routes/project.py | 2 +- backend/app/api/routes/responses.py | 2 +- backend/app/api/routes/threads.py | 2 +- backend/app/api/routes/users.py | 2 +- backend/app/main.py | 25 ++++ 18 files changed, 176 insertions(+), 16 deletions(-) create mode 100644 backend/app/api/docs/openapi_config.py diff --git a/backend/app/api/docs/openapi_config.py b/backend/app/api/docs/openapi_config.py new file mode 100644 index 000000000..6de881b2d --- /dev/null +++ b/backend/app/api/docs/openapi_config.py @@ -0,0 +1,135 @@ +""" +OpenAPI schema customization for ReDoc documentation. + +This module contains tag metadata and custom OpenAPI extensions +for organizing and enhancing the API documentation. +""" + +# Tag metadata for organizing endpoints in documentation +tags_metadata = [ + { + "name": "Onboarding", + "description": "Getting started with the platform", + }, + { + "name": "Documents", + "description": "Document upload, transformation, and management operations", + }, + { + "name": "Collections", + "description": "Collection creation, deletion, and management for vector stores and assistants", + }, + { + "name": "Config Management", + "description": "Configuration management operations", + }, + { + "name": "LLM", + "description": "Large Language Model inference and interaction endpoints", + }, + { + "name": "Evaluation", + "description": "Dataset upload, running evaluations, listing datasets as well as evaluations", + }, + { + "name": "Fine Tuning", + "description": "Fine tuning LLM for specific use cases by providing labelled dataset", + }, + { + "name": "Model Evaluation", + "description": "Fine tuned model performance evaluation and benchmarking", + }, + { + "name": "Responses", + "description": "OpenAI Responses API integration for managing LLM conversations", + }, + { + "name": "OpenAI Conversations", + "description": "OpenAI conversation management and interaction", + }, + { + "name": "Users", + "description": "User account management and operations", + }, + { + "name": "Organizations", + "description": "Organization management and settings", + }, + { + "name": "Projects", + "description": "Project management operations", + }, + { + "name": "API Keys", + "description": "API key generation and management", + }, + { + "name": "Credentials", + "description": "Credential management and authentication", + }, + {"name": "Login", "description": "User authentication and login operations"}, + { + "name": "Assistants", + "description": "[**Deprecated**] OpenAI Assistant creation and management. This feature will be removed in a future version.", + }, + { + "name": "Threads", + "description": "[**Deprecated**] Conversation thread management for assistants. This feature will be removed in a future version.", + }, +] + +# ReDoc-specific extension: x-tagGroups for hierarchical organization +# This creates collapsible groups in the ReDoc sidebar +tag_groups = [ + {"name": "Get Started", "tags": ["Onboarding"]}, + { + "name": "Capabilities", + "tags": [ + "Documents", + "Collections", + "Config Management", + "LLM", + "Evaluation", + "Fine Tuning", + "Model Evaluation", + "Responses", + "OpenAI Conversations", + "Assistants", + "Threads", + ], + }, + { + "name": "Administration", + "tags": [ + "Users", + "Organizations", + "Projects", + "API Keys", + "Credentials", + "Login", + ], + }, +] + + +def customize_openapi_schema(openapi_schema: dict) -> dict: + """ + Add custom OpenAPI extensions to the schema. + + Args: + openapi_schema: The base OpenAPI schema from FastAPI + + Returns: + The customized OpenAPI schema with x-tagGroups and other extensions + """ + openapi_schema["x-tagGroups"] = tag_groups + deprecated_tags = ["Assistants", "Threads"] + + for _, path_item in openapi_schema.get("paths", {}).items(): + for method, operation in path_item.items(): + if method in ["get", "post", "put", "delete", "patch"]: + operation_tags = operation.get("tags", []) + if any(tag in deprecated_tags for tag in operation_tags): + operation["x-badges"] = [{"name": "Deprecated", "color": "orange"}] + + return openapi_schema diff --git a/backend/app/api/routes/collection_job.py b/backend/app/api/routes/collection_job.py index 5636ed8f4..c16fa6586 100644 --- a/backend/app/api/routes/collection_job.py +++ b/backend/app/api/routes/collection_job.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(prefix="/collections", tags=["collections"]) +router = APIRouter(prefix="/collections", tags=["Collections"]) @router.get( diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index 78a40ae7e..625a86d66 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -35,7 +35,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(prefix="/collections", tags=["collections"]) +router = APIRouter(prefix="/collections", tags=["Collections"]) collection_callback_router = APIRouter() diff --git a/backend/app/api/routes/credentials.py b/backend/app/api/routes/credentials.py index e2f43cd85..57cf99750 100644 --- a/backend/app/api/routes/credentials.py +++ b/backend/app/api/routes/credentials.py @@ -17,7 +17,7 @@ from app.utils import APIResponse logger = logging.getLogger(__name__) -router = APIRouter(prefix="/credentials", tags=["credentials"]) +router = APIRouter(prefix="/credentials", tags=["Credentials"]) @router.post( diff --git a/backend/app/api/routes/cron.py b/backend/app/api/routes/cron.py index a9e7b66ed..f04e11885 100644 --- a/backend/app/api/routes/cron.py +++ b/backend/app/api/routes/cron.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(tags=["cron"]) +router = APIRouter(tags=["Cron"]) @router.get( diff --git a/backend/app/api/routes/doc_transformation_job.py b/backend/app/api/routes/doc_transformation_job.py index dd5f98827..e3636446c 100644 --- a/backend/app/api/routes/doc_transformation_job.py +++ b/backend/app/api/routes/doc_transformation_job.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(prefix="/documents/transformation", tags=["documents"]) +router = APIRouter(prefix="/documents/transformation", tags=["Documents"]) @router.get( diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index ec046aacb..24d295299 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(prefix="/documents", tags=["documents"]) +router = APIRouter(prefix="/documents", tags=["Documents"]) doctransformation_callback_router = APIRouter() diff --git a/backend/app/api/routes/evaluation.py b/backend/app/api/routes/evaluation.py index 058950d65..bc8ff2b03 100644 --- a/backend/app/api/routes/evaluation.py +++ b/backend/app/api/routes/evaluation.py @@ -45,7 +45,7 @@ "text/plain", # Some systems report CSV as text/plain } -router = APIRouter(tags=["evaluation"]) +router = APIRouter(tags=["Evaluation"]) def _dataset_to_response(dataset) -> DatasetUploadResponse: diff --git a/backend/app/api/routes/fine_tuning.py b/backend/app/api/routes/fine_tuning.py index 33e14cbe7..252d42561 100644 --- a/backend/app/api/routes/fine_tuning.py +++ b/backend/app/api/routes/fine_tuning.py @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(prefix="/fine_tuning", tags=["fine_tuning"]) +router = APIRouter(prefix="/fine_tuning", tags=["Fine Tuning"]) OPENAI_TO_INTERNAL_STATUS = { diff --git a/backend/app/api/routes/login.py b/backend/app/api/routes/login.py index 357285c07..0ca6b8f61 100644 --- a/backend/app/api/routes/login.py +++ b/backend/app/api/routes/login.py @@ -18,7 +18,7 @@ verify_password_reset_token, ) -router = APIRouter(tags=["login"]) +router = APIRouter(tags=["Login"]) @router.post("/login/access-token") diff --git a/backend/app/api/routes/model_evaluation.py b/backend/app/api/routes/model_evaluation.py index b38172ddf..efd81a86e 100644 --- a/backend/app/api/routes/model_evaluation.py +++ b/backend/app/api/routes/model_evaluation.py @@ -30,7 +30,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(prefix="/model_evaluation", tags=["model_evaluation"]) +router = APIRouter(prefix="/model_evaluation", tags=["Model Evaluation"]) def attach_prediction_file_url(model_obj, storage): diff --git a/backend/app/api/routes/onboarding.py b/backend/app/api/routes/onboarding.py index f081c4010..9502a91d4 100644 --- a/backend/app/api/routes/onboarding.py +++ b/backend/app/api/routes/onboarding.py @@ -8,7 +8,7 @@ from app.models import OnboardingRequest, OnboardingResponse, User from app.utils import APIResponse, load_description -router = APIRouter(tags=["onboarding"]) +router = APIRouter(tags=["Onboarding"]) @router.post( diff --git a/backend/app/api/routes/organization.py b/backend/app/api/routes/organization.py index 29f6f62aa..8526b6755 100644 --- a/backend/app/api/routes/organization.py +++ b/backend/app/api/routes/organization.py @@ -20,7 +20,7 @@ from app.utils import APIResponse logger = logging.getLogger(__name__) -router = APIRouter(prefix="/organizations", tags=["organizations"]) +router = APIRouter(prefix="/organizations", tags=["Organizations"]) # Retrieve organizations diff --git a/backend/app/api/routes/project.py b/backend/app/api/routes/project.py index 7be763a75..075e42b9c 100644 --- a/backend/app/api/routes/project.py +++ b/backend/app/api/routes/project.py @@ -17,7 +17,7 @@ from app.utils import APIResponse logger = logging.getLogger(__name__) -router = APIRouter(prefix="/projects", tags=["projects"]) +router = APIRouter(prefix="/projects", tags=["Projects"]) # Retrieve projects diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index c84659ee0..4635c9c4a 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -router = APIRouter(tags=["responses"]) +router = APIRouter(tags=["Responses"]) @router.post("/responses", response_model=APIResponse[ResponseJobStatus]) diff --git a/backend/app/api/routes/threads.py b/backend/app/api/routes/threads.py index 9a38b8637..b42830d7d 100644 --- a/backend/app/api/routes/threads.py +++ b/backend/app/api/routes/threads.py @@ -18,7 +18,7 @@ from app.core.langfuse.langfuse import LangfuseTracer logger = logging.getLogger(__name__) -router = APIRouter(tags=["threads"]) +router = APIRouter(tags=["Threads"]) class StartThreadRequest(BaseModel): diff --git a/backend/app/api/routes/users.py b/backend/app/api/routes/users.py index 5dd83fa34..55a017409 100644 --- a/backend/app/api/routes/users.py +++ b/backend/app/api/routes/users.py @@ -27,7 +27,7 @@ from app.core.exception_handlers import HTTPException logger = logging.getLogger(__name__) -router = APIRouter(prefix="/users", tags=["users"]) +router = APIRouter(prefix="/users", tags=["Users"]) @router.get( diff --git a/backend/app/main.py b/backend/app/main.py index 0b4734845..bfa219e7a 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -2,8 +2,10 @@ from fastapi import FastAPI from fastapi.routing import APIRoute +from fastapi.openapi.utils import get_openapi from asgi_correlation_id.middleware import CorrelationIdMiddleware from app.api.main import api_router +from app.api.docs.openapi_config import tags_metadata, customize_openapi_schema from app.core.config import settings from app.core.exception_handlers import register_exception_handlers from app.core.middleware import http_request_logger @@ -25,8 +27,31 @@ def custom_generate_unique_id(route: APIRoute) -> str: title=settings.PROJECT_NAME, openapi_url=f"{settings.API_V1_STR}/openapi.json", generate_unique_id_function=custom_generate_unique_id, + description="**Responsible AI for the development sector**", ) + +def custom_openapi(): + if app.openapi_schema: + return app.openapi_schema + + openapi_schema = get_openapi( + title=app.title, + version="0.5.0", + openapi_version=app.openapi_version, + description=app.description, + routes=app.routes, + tags=tags_metadata, + ) + + openapi_schema = customize_openapi_schema(openapi_schema) + + app.openapi_schema = openapi_schema + return app.openapi_schema + + +app.openapi = custom_openapi + app.middleware("http")(http_request_logger) app.add_middleware(CorrelationIdMiddleware) From 763e79881cb90976e85f143d8f71c92cd3919834 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Thu, 11 Dec 2025 17:27:23 +0530 Subject: [PATCH 02/11] fixing onboarding file --- backend/app/api/docs/onboarding/onboarding.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/app/api/docs/onboarding/onboarding.md b/backend/app/api/docs/onboarding/onboarding.md index b6816c60a..ef1b33018 100644 --- a/backend/app/api/docs/onboarding/onboarding.md +++ b/backend/app/api/docs/onboarding/onboarding.md @@ -20,9 +20,14 @@ ## 🔑 Credentials (Optional) - If provided, the given credentials will be **encrypted** and stored as project credentials. -- The `credential` parameter accepts a list of one or more credentials (e.g., an OpenAI key, Langfuse credentials, etc.). +- The `credentials` parameter accepts a list of one or more credentials (e.g., an OpenAI key, Langfuse credentials, etc.). - If omitted, the project will be created **without credentials**. - We’ve also included a list of the providers currently supported by kaapi. + + ### Supported Providers + - openai + - langfuse + ### Example: For sending multiple credentials - ``` "credentials": [ @@ -40,9 +45,6 @@ } ] ``` - ### Supported Providers - - openai - - langfuse --- ## 🔄 Transactional Guarantee From 1851982a77b0f3e8de4c9f663d2f5dc4b344ebea Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 09:53:57 +0530 Subject: [PATCH 03/11] refactoring existing docs --- backend/app/api/docs/collections/create.md | 2 +- backend/app/api/docs/collections/list.md | 2 +- .../api/docs/evaluation/create_evaluation.md | 105 +++++++----------- .../app/api/docs/evaluation/delete_dataset.md | 15 --- .../app/api/docs/evaluation/get_dataset.md | 21 +--- .../app/api/docs/evaluation/get_evaluation.md | 55 ++------- .../app/api/docs/evaluation/list_datasets.md | 18 +-- .../api/docs/evaluation/list_evaluations.md | 24 +--- .../app/api/docs/evaluation/upload_dataset.md | 55 ++++----- backend/app/api/docs/llm/llm_call.md | 2 +- backend/app/models/collection.py | 6 +- 11 files changed, 85 insertions(+), 220 deletions(-) diff --git a/backend/app/api/docs/collections/create.md b/backend/app/api/docs/collections/create.md index aef52efaa..5f12fbcb4 100644 --- a/backend/app/api/docs/collections/create.md +++ b/backend/app/api/docs/collections/create.md @@ -7,7 +7,7 @@ pipeline: * Create an OpenAI [Vector Store](https://platform.openai.com/docs/api-reference/vector-stores) based on those file(s). -* [To be deprecated] Attach the Vector Store to an OpenAI +* [Deprecated] Attach the Vector Store to an OpenAI [Assistant](https://platform.openai.com/docs/api-reference/assistants). Use parameters in the request body relevant to an Assistant to flesh out its configuration. Note that an assistant will only be created when you pass both diff --git a/backend/app/api/docs/collections/list.md b/backend/app/api/docs/collections/list.md index cabcd7c61..4224a5b83 100644 --- a/backend/app/api/docs/collections/list.md +++ b/backend/app/api/docs/collections/list.md @@ -3,4 +3,4 @@ not deleted If a vector store was created - `llm_service_name` and `llm_service_id` in the response denote the name of the vector store (eg. 'openai vector store') and its id. -[To be deprecated] If an assistant was created, `llm_service_name` and `llm_service_id` in the response denotes the name of the model used in the assistant (eg. 'gpt-4o') and assistant id. +[Deprecated] If an assistant was created, `llm_service_name` and `llm_service_id` in the response denotes the name of the model used in the assistant (eg. 'gpt-4o') and assistant id. diff --git a/backend/app/api/docs/evaluation/create_evaluation.md b/backend/app/api/docs/evaluation/create_evaluation.md index 313ad0079..176751024 100644 --- a/backend/app/api/docs/evaluation/create_evaluation.md +++ b/backend/app/api/docs/evaluation/create_evaluation.md @@ -1,80 +1,59 @@ -Start an evaluation using OpenAI Batch API. +Start an evaluation run using the OpenAI Batch API. -This endpoint: -1. Fetches the dataset from database and validates it has Langfuse dataset ID -2. Creates an EvaluationRun record in the database -3. Fetches dataset items from Langfuse -4. Builds JSONL for batch processing (config is used as-is) -5. Creates a batch job via the generic batch infrastructure -6. Returns the evaluation run details with batch_job_id +Evaluations allow you to systematically test LLM configurations against +predefined datasets with automatic progress tracking and result collection. + +**Key Features:** +* Fetches dataset items from Langfuse for evaluation +* Creates batch processing job via OpenAI Batch API +* Asynchronous processing with automatic progress tracking (checks every 60s) +* Supports configuration from direct parameters or existing assistants +* Stores results for comparison and analysis +* Provider-agnostic configuration - params are passed through to OpenAI as-is -The batch will be processed asynchronously by Celery Beat (every 60s). -Use GET /evaluations/{evaluation_id} to check progress. -## Request Body +**How it works:** +1. Validates dataset exists and has associated Langfuse dataset ID +2. Creates an EvaluationRun record in the database +3. Fetches all dataset items from Langfuse +4. Generates JSONL batch file with your configuration +5. Submits batch job to OpenAI +6. Returns evaluation run details with batch_job_id for tracking + +Use `GET /evaluations/{evaluation_id}` to monitor progress and retrieve results. -- **dataset_id** (required): ID of the evaluation dataset (from /evaluations/datasets) -- **experiment_name** (required): Name for this evaluation experiment/run -- **config** (optional): Configuration dict that will be used as-is in JSONL generation. Can include any OpenAI Responses API parameters like: - - model: str (e.g., "gpt-4o", "gpt-5") - - instructions: str - - tools: list (e.g., [{"type": "file_search", "vector_store_ids": [...]}]) - - reasoning: dict (e.g., {"effort": "low"}) - - text: dict (e.g., {"verbosity": "low"}) - - temperature: float - - include: list (e.g., ["file_search_call.results"]) - - Note: "input" will be added automatically from the dataset -- **assistant_id** (optional): Assistant ID to fetch configuration from. If provided, configuration will be fetched from the assistant in the database. Config can be passed as empty dict {} when using assistant_id. -## Example with config +**Example: Using Direct Configuration** ```json { - "dataset_id": 123, - "experiment_name": "test_run", - "config": { - "model": "gpt-4.1", - "instructions": "You are a helpful FAQ assistant.", - "tools": [ - { - "type": "file_search", - "vector_store_ids": ["vs_12345"], - "max_num_results": 3 - } - ], - "include": ["file_search_call.results"] - } + "dataset_id": 123, + "experiment_name": "gpt4_file_search_test", + "config": { + "model": "gpt-4o", + "instructions": "You are a helpful FAQ assistant for farmers.", + "tools": [ + { + "type": "file_search", + "vector_store_ids": ["vs_abc123"], + "max_num_results": 5 + } + ], + "temperature": 0.7, + "include": ["file_search_call.results"] + } } ``` -## Example with assistant_id +**Example: Using Existing Assistant** ```json { - "dataset_id": 123, - "experiment_name": "test_run", - "config": {}, - "assistant_id": "asst_xyz" + "dataset_id": 123, + "experiment_name": "production_assistant_eval", + "config": {}, + "assistant_id": "asst_xyz789" } ``` -## Returns - -EvaluationRunPublic with batch details and status: -- id: Evaluation run ID -- run_name: Name of the evaluation run -- dataset_name: Name of the dataset used -- dataset_id: ID of the dataset used -- config: Configuration used for the evaluation -- batch_job_id: ID of the batch job processing this evaluation -- status: Current status (pending, running, completed, failed) -- total_items: Total number of items being evaluated -- completed_items: Number of items completed so far -- results: Evaluation results (when completed) -- error_message: Error message if failed - -## Error Responses - -- **404**: Dataset or assistant not found or not accessible -- **400**: Missing required credentials (OpenAI or Langfuse), dataset missing Langfuse ID, or config missing required fields -- **500**: Failed to configure API clients or start batch evaluation +**Note:** When using `assistant_id`, configuration is fetched from the assistant in the database. You can pass `config` as an empty object `{}`. diff --git a/backend/app/api/docs/evaluation/delete_dataset.md b/backend/app/api/docs/evaluation/delete_dataset.md index 461c30fce..630015d37 100644 --- a/backend/app/api/docs/evaluation/delete_dataset.md +++ b/backend/app/api/docs/evaluation/delete_dataset.md @@ -1,18 +1,3 @@ Delete a dataset by ID. This will remove the dataset record from the database. The CSV file in object store (if exists) will remain for audit purposes, but the dataset will no longer be accessible for creating new evaluations. - -## Path Parameters - -- **dataset_id**: ID of the dataset to delete - -## Returns - -Success message with deleted dataset details: -- message: Confirmation message -- dataset_id: ID of the deleted dataset - -## Error Responses - -- **404**: Dataset not found or not accessible to your organization/project -- **400**: Dataset cannot be deleted (e.g., has active evaluation runs) diff --git a/backend/app/api/docs/evaluation/get_dataset.md b/backend/app/api/docs/evaluation/get_dataset.md index 02e1e73aa..a1a27276a 100644 --- a/backend/app/api/docs/evaluation/get_dataset.md +++ b/backend/app/api/docs/evaluation/get_dataset.md @@ -1,22 +1,3 @@ Get details of a specific dataset by ID. -Retrieves comprehensive information about a dataset including metadata, object store URL, and Langfuse integration details. - -## Path Parameters - -- **dataset_id**: ID of the dataset to retrieve - -## Returns - -DatasetUploadResponse with dataset details: -- dataset_id: Unique identifier for the dataset -- dataset_name: Name of the dataset (sanitized) -- total_items: Total number of items including duplication -- original_items: Number of original items before duplication -- duplication_factor: Factor by which items were duplicated -- langfuse_dataset_id: ID of the dataset in Langfuse -- object_store_url: URL to the CSV file in object storage - -## Error Responses - -- **404**: Dataset not found or not accessible to your organization/project +Returns comprehensive dataset information including metadata (ID, name, item counts, duplication factor), Langfuse integration details (dataset ID), and the object store URL for the CSV file. diff --git a/backend/app/api/docs/evaluation/get_evaluation.md b/backend/app/api/docs/evaluation/get_evaluation.md index 97b094497..1e3186d2c 100644 --- a/backend/app/api/docs/evaluation/get_evaluation.md +++ b/backend/app/api/docs/evaluation/get_evaluation.md @@ -1,37 +1,12 @@ -Get the current status of a specific evaluation run. +Get the current status and results of a specific evaluation run by the evaluation ID along with some optional query parameters listed below. -Retrieves comprehensive information about an evaluation run including its current processing status, results (if completed), and error details (if failed). +Returns comprehensive evaluation information including processing status, configuration, progress metrics, and detailed scores with Q&A context when requested. You can check this endpoint periodically to get to know the evaluation progress. Evaluations are processed asynchronously with status checks every 60 seconds. -## Path Parameters +**Query Parameters:** +* `get_trace_info` (optional, default: false) - Include Langfuse trace scores with Q&A context. Data is fetched from Langfuse on first request and cached for subsequent calls. Only available for completed evaluations. +* `resync_score` (optional, default: false) - Clear cached scores and re-fetch from Langfuse. Useful when evaluators have been updated. Requires `get_trace_info=true`. -- **evaluation_id**: ID of the evaluation run - -## Query Parameters - -- **get_trace_info** (optional, default: false): If true, fetch and include Langfuse trace scores with Q&A context. On first request, data is fetched from Langfuse and cached in the score column. Subsequent requests return cached data. Only available for completed evaluations. - -- **resync_score** (optional, default: false): If true, clear cached scores and re-fetch from Langfuse. Useful when new evaluators have been added or scores have been updated. Requires get_trace_info=true. - -## Returns - -EvaluationRunPublic with current status and results: -- id: Evaluation run ID -- run_name: Name of the evaluation run -- dataset_name: Name of the dataset used -- dataset_id: ID of the dataset used -- config: Configuration used for the evaluation -- batch_job_id: ID of the batch job processing this evaluation -- status: Current status (pending, running, completed, failed) -- total_items: Total number of items being evaluated -- completed_items: Number of items completed so far -- score: Evaluation scores (when get_trace_info=true and status=completed) -- error_message: Error message if failed -- created_at: Timestamp when the evaluation was created -- updated_at: Timestamp when the evaluation was last updated - -## Score Format - -When `get_trace_info=true` and evaluation is completed, the `score` field contains: +**Score Format** (`get_trace_info=true`): ```json { @@ -74,16 +49,8 @@ When `get_trace_info=true` and evaluation is completed, the `score` field contai } ``` -**Notes:** -- Only complete scores are included (scores where all traces have been rated) -- Numeric values are rounded to 2 decimal places -- NUMERIC scores show `avg` and `std` in summary -- CATEGORICAL scores show `distribution` counts in summary - -## Usage - -Use this endpoint to poll for evaluation progress. The evaluation is processed asynchronously by Celery Beat (every 60s), so you should poll periodically to check if the status has changed to "completed" or "failed". - -## Error Responses - -- **404**: Evaluation run not found or not accessible to this organization/project +**Score Details:** +* NUMERIC scores include average (`avg`) and standard deviation (`std`) in summary +* CATEGORICAL scores include distribution counts in summary +* Only complete scores are included (all traces have been rated) +* Numeric values are rounded to 2 decimal places diff --git a/backend/app/api/docs/evaluation/list_datasets.md b/backend/app/api/docs/evaluation/list_datasets.md index bd5576efc..e315db1d0 100644 --- a/backend/app/api/docs/evaluation/list_datasets.md +++ b/backend/app/api/docs/evaluation/list_datasets.md @@ -1,19 +1,3 @@ List all datasets for the current organization and project. -Returns a paginated list of dataset records ordered by most recent first. - -## Query Parameters - -- **limit**: Maximum number of datasets to return (default 50, max 100) -- **offset**: Number of datasets to skip for pagination (default 0) - -## Returns - -List of DatasetUploadResponse objects, each containing: -- dataset_id: Unique identifier for the dataset -- dataset_name: Name of the dataset (sanitized) -- total_items: Total number of items including duplication -- original_items: Number of original items before duplication -- duplication_factor: Factor by which items were duplicated -- langfuse_dataset_id: ID of the dataset in Langfuse -- object_store_url: URL to the CSV file in object storage +Returns a paginated list of datasets ordered by most recent first. Each dataset includes metadata (ID, name, item counts, duplication factor), Langfuse integration details, and object store URL. diff --git a/backend/app/api/docs/evaluation/list_evaluations.md b/backend/app/api/docs/evaluation/list_evaluations.md index 64c667726..24ab51623 100644 --- a/backend/app/api/docs/evaluation/list_evaluations.md +++ b/backend/app/api/docs/evaluation/list_evaluations.md @@ -1,25 +1,3 @@ List all evaluation runs for the current organization and project. -Returns a paginated list of evaluation runs ordered by most recent first. Each evaluation run represents a batch processing job evaluating a dataset against a specific configuration. - -## Query Parameters - -- **limit**: Maximum number of runs to return (default 50) -- **offset**: Number of runs to skip (for pagination, default 0) - -## Returns - -List of EvaluationRunPublic objects, each containing: -- id: Evaluation run ID -- run_name: Name of the evaluation run -- dataset_name: Name of the dataset used -- dataset_id: ID of the dataset used -- config: Configuration used for the evaluation -- batch_job_id: ID of the batch job processing this evaluation -- status: Current status (pending, running, completed, failed) -- total_items: Total number of items being evaluated -- completed_items: Number of items completed so far -- results: Evaluation results (when completed) -- error_message: Error message if failed -- created_at: Timestamp when the evaluation was created -- updated_at: Timestamp when the evaluation was last updated +Returns a paginated list of evaluation runs ordered by most recent first. Each run includes metadata (ID, name, dataset info, timestamps), configuration details, batch job ID, status tracking (pending/running/completed/failed), progress metrics (total/completed items), and results when available. diff --git a/backend/app/api/docs/evaluation/upload_dataset.md b/backend/app/api/docs/evaluation/upload_dataset.md index b73902860..90bf52511 100644 --- a/backend/app/api/docs/evaluation/upload_dataset.md +++ b/backend/app/api/docs/evaluation/upload_dataset.md @@ -1,42 +1,33 @@ -Upload a CSV file containing Golden Q&A pairs. +Upload a CSV file containing golden Q&A pairs for evaluation. -This endpoint: -1. Sanitizes the dataset name (removes spaces, special characters) -2. Validates and parses the CSV file -3. Uploads CSV to object store (if credentials configured) -4. Uploads dataset to Langfuse (for immediate use) -5. Stores metadata in database +Datasets allow you to store reusable question-answer pairs for systematic LLM testing with automatic validation, duplication for statistical significance, and Langfuse integration. Response includes dataset ID, sanitized name, item counts, Langfuse dataset ID, and object store URL. -## Dataset Name +**Key Features:** +* Validates CSV format and required columns (question, answer) +* Automatic dataset name sanitization for Langfuse compatibility +* Optional item duplication for statistical significance (1-5x, default: 5x) +* Uploads to object store and syncs with Langfuse +* Skips rows with missing values automatically -- Will be sanitized for Langfuse compatibility -- Spaces replaced with underscores -- Special characters removed -- Converted to lowercase -- Example: "My Dataset 01!" becomes "my_dataset_01" -## CSV Format +**CSV Format Requirements:** +* Required columns: `question`, `answer` +* Additional columns are allowed (will be ignored) +* Missing values in required columns are automatically skipped -- Must contain 'question' and 'answer' columns -- Can have additional columns (will be ignored) -- Missing values in 'question' or 'answer' rows will be skipped -## Duplication Factor +**Dataset Name Sanitization:** -- Minimum: 1 (no duplication) -- Maximum: 5 -- Default: 5 -- Each item in the dataset will be duplicated this many times -- Used to ensure statistical significance in evaluation results +Your dataset name will be automatically sanitized for Langfuse compatibility: +* Spaces → underscores +* Special characters removed +* Converted to lowercase +* Example: `"My Dataset 01!"` → `"my_dataset_01"` -## Example CSV -``` -question,answer -"What is the capital of France?","Paris" -"What is 2+2?","4" -``` +**Duplication Factor:** -## Returns - -DatasetUploadResponse with dataset_id, object_store_url, and Langfuse details (dataset_name in response will be the sanitized version) +Control how many times each Q&A pair is duplicated (1-5x, default: 5): +* Higher duplication = better statistical significance +* Useful for batch evaluation reliability +* `1` = no duplication (original dataset only) diff --git a/backend/app/api/docs/llm/llm_call.md b/backend/app/api/docs/llm/llm_call.md index 86513bc06..a284ab6a1 100644 --- a/backend/app/api/docs/llm/llm_call.md +++ b/backend/app/api/docs/llm/llm_call.md @@ -3,7 +3,7 @@ Make an LLM API call using either a stored configuration or an ad-hoc configurat This endpoint initiates an asynchronous LLM call job. The request is queued for processing, and results are delivered via the callback URL when complete. -### Request Fields +### Key Parameters **`query`** (required) - Query parameters for this LLM call: - `input` (required, string, min 1 char): User question/prompt/query diff --git a/backend/app/models/collection.py b/backend/app/models/collection.py index e09f56226..f5dee0f6e 100644 --- a/backend/app/models/collection.py +++ b/backend/app/models/collection.py @@ -62,7 +62,7 @@ class AssistantOptions(SQLModel): model: Optional[str] = Field( default=None, description=( - "**[To Be Deprecated]** " + "**[Deprecated]** " "OpenAI model to attach to this assistant. The model " "must be compatable with the assistants API; see the " "OpenAI [model documentation](https://platform.openai.com/docs/models/compare) for more." @@ -72,7 +72,7 @@ class AssistantOptions(SQLModel): instructions: Optional[str] = Field( default=None, description=( - "**[To Be Deprecated]** " + "**[Deprecated]** " "Assistant instruction. Sometimes referred to as the " '"system" prompt.' ), @@ -80,7 +80,7 @@ class AssistantOptions(SQLModel): temperature: float = Field( default=1e-6, description=( - "**[To Be Deprecated]** " + "**[Deprecated]** " "Model temperature. The default is slightly " "greater-than zero because it is [unknown how OpenAI " "handles zero](https://community.openai.com/t/clarifications-on-setting-temperature-0/886447/5)." From 258bbfc441036705aeeea161c692a102c6d561df Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 10:48:49 +0530 Subject: [PATCH 04/11] adding left endpoints docs --- backend/app/api/docs/api_keys/create.md | 3 +++ backend/app/api/docs/api_keys/delete.md | 3 +++ backend/app/api/docs/api_keys/list.md | 3 +++ backend/app/api/docs/credentials/create.md | 3 +++ .../app/api/docs/credentials/delete_all.md | 3 +++ .../api/docs/credentials/delete_provider.md | 3 +++ .../app/api/docs/credentials/get_provider.md | 3 +++ backend/app/api/docs/credentials/list.md | 3 +++ backend/app/api/docs/credentials/update.md | 3 +++ .../app/api/docs/model_evaluation/evaluate.md | 3 +++ .../docs/model_evaluation/get_top_model.md | 3 +++ .../docs/model_evaluation/list_by_document.md | 3 +++ .../api/docs/openai_conversation/delete.md | 3 +++ .../app/api/docs/openai_conversation/get.md | 3 +++ .../openai_conversation/get_by_ancestor_id.md | 3 +++ .../openai_conversation/get_by_response_id.md | 3 +++ .../app/api/docs/openai_conversation/list.md | 3 +++ backend/app/api/docs/organization/create.md | 3 +++ backend/app/api/docs/organization/delete.md | 3 +++ backend/app/api/docs/organization/get.md | 3 +++ backend/app/api/docs/organization/list.md | 3 +++ backend/app/api/docs/organization/update.md | 3 +++ backend/app/api/docs/projects/create.md | 3 +++ backend/app/api/docs/projects/delete.md | 3 +++ backend/app/api/docs/projects/get.md | 3 +++ backend/app/api/docs/projects/list.md | 3 +++ backend/app/api/docs/projects/update.md | 3 +++ .../app/api/docs/responses/create_async.md | 3 +++ backend/app/api/docs/responses/create_sync.md | 3 +++ backend/app/api/routes/api_keys.py | 20 ++++--------------- backend/app/api/routes/credentials.py | 19 +++++++----------- backend/app/api/routes/model_evaluation.py | 11 +++++++--- backend/app/api/routes/openai_conversation.py | 12 +++++++++-- backend/app/api/routes/organization.py | 12 +++++++---- backend/app/api/routes/project.py | 11 +++++++--- backend/app/api/routes/responses.py | 19 +++++++++++++++--- 36 files changed, 148 insertions(+), 43 deletions(-) create mode 100644 backend/app/api/docs/api_keys/create.md create mode 100644 backend/app/api/docs/api_keys/delete.md create mode 100644 backend/app/api/docs/api_keys/list.md create mode 100644 backend/app/api/docs/credentials/create.md create mode 100644 backend/app/api/docs/credentials/delete_all.md create mode 100644 backend/app/api/docs/credentials/delete_provider.md create mode 100644 backend/app/api/docs/credentials/get_provider.md create mode 100644 backend/app/api/docs/credentials/list.md create mode 100644 backend/app/api/docs/credentials/update.md create mode 100644 backend/app/api/docs/model_evaluation/evaluate.md create mode 100644 backend/app/api/docs/model_evaluation/get_top_model.md create mode 100644 backend/app/api/docs/model_evaluation/list_by_document.md create mode 100644 backend/app/api/docs/openai_conversation/delete.md create mode 100644 backend/app/api/docs/openai_conversation/get.md create mode 100644 backend/app/api/docs/openai_conversation/get_by_ancestor_id.md create mode 100644 backend/app/api/docs/openai_conversation/get_by_response_id.md create mode 100644 backend/app/api/docs/openai_conversation/list.md create mode 100644 backend/app/api/docs/organization/create.md create mode 100644 backend/app/api/docs/organization/delete.md create mode 100644 backend/app/api/docs/organization/get.md create mode 100644 backend/app/api/docs/organization/list.md create mode 100644 backend/app/api/docs/organization/update.md create mode 100644 backend/app/api/docs/projects/create.md create mode 100644 backend/app/api/docs/projects/delete.md create mode 100644 backend/app/api/docs/projects/get.md create mode 100644 backend/app/api/docs/projects/list.md create mode 100644 backend/app/api/docs/projects/update.md create mode 100644 backend/app/api/docs/responses/create_async.md create mode 100644 backend/app/api/docs/responses/create_sync.md diff --git a/backend/app/api/docs/api_keys/create.md b/backend/app/api/docs/api_keys/create.md new file mode 100644 index 000000000..2c2317b96 --- /dev/null +++ b/backend/app/api/docs/api_keys/create.md @@ -0,0 +1,3 @@ +Create a new API key for programmatic access to the platform. + +The raw API key is returned **only once during creation**. Store it securely as it cannot be retrieved again. Only the key prefix will be visible in subsequent requests for security reasons. diff --git a/backend/app/api/docs/api_keys/delete.md b/backend/app/api/docs/api_keys/delete.md new file mode 100644 index 000000000..3b87b398c --- /dev/null +++ b/backend/app/api/docs/api_keys/delete.md @@ -0,0 +1,3 @@ +Delete an API key by its ID. + +Permanently revokes the API key. Any requests using this key will fail immediately after deletion. diff --git a/backend/app/api/docs/api_keys/list.md b/backend/app/api/docs/api_keys/list.md new file mode 100644 index 000000000..3f2d04da6 --- /dev/null +++ b/backend/app/api/docs/api_keys/list.md @@ -0,0 +1,3 @@ +List all API keys for the current project. + +Returns paginated list of API keys with key prefix for security. The full key is only shown during creation and cannot be retrieved afterward. diff --git a/backend/app/api/docs/credentials/create.md b/backend/app/api/docs/credentials/create.md new file mode 100644 index 000000000..41f89eb70 --- /dev/null +++ b/backend/app/api/docs/credentials/create.md @@ -0,0 +1,3 @@ +Create new credentials for the current organization and project. + +Credentials are encrypted and stored securely for provider integrations (OpenAI, Langfuse, etc.). Only one credential per provider is allowed per organization-project combination. diff --git a/backend/app/api/docs/credentials/delete_all.md b/backend/app/api/docs/credentials/delete_all.md new file mode 100644 index 000000000..b7ec61f68 --- /dev/null +++ b/backend/app/api/docs/credentials/delete_all.md @@ -0,0 +1,3 @@ +Delete all credentials for current organization and project. + +Permanently removes all provider credentials from the current organization and project. This is a hard delete operation. diff --git a/backend/app/api/docs/credentials/delete_provider.md b/backend/app/api/docs/credentials/delete_provider.md new file mode 100644 index 000000000..fca18ea6b --- /dev/null +++ b/backend/app/api/docs/credentials/delete_provider.md @@ -0,0 +1,3 @@ +Delete credentials for a specific provider. + +Permanently removes credentials for a specific provider from the current organization and project. diff --git a/backend/app/api/docs/credentials/get_provider.md b/backend/app/api/docs/credentials/get_provider.md new file mode 100644 index 000000000..2f3a76920 --- /dev/null +++ b/backend/app/api/docs/credentials/get_provider.md @@ -0,0 +1,3 @@ +Get credentials for a specific provider. + +Retrieves decrypted credentials for a specific provider (e.g., `openai`, `langfuse`) for the current organization and project. diff --git a/backend/app/api/docs/credentials/list.md b/backend/app/api/docs/credentials/list.md new file mode 100644 index 000000000..c660229bc --- /dev/null +++ b/backend/app/api/docs/credentials/list.md @@ -0,0 +1,3 @@ +Get all credentials for current organization and project. + +Returns list of all provider credentials associated with your organization and project. diff --git a/backend/app/api/docs/credentials/update.md b/backend/app/api/docs/credentials/update.md new file mode 100644 index 000000000..0377f0e4b --- /dev/null +++ b/backend/app/api/docs/credentials/update.md @@ -0,0 +1,3 @@ +Update credentials for a specific provider. + +Updates existing provider credentials for the current organization and project. Provider and credential fields must be provided. diff --git a/backend/app/api/docs/model_evaluation/evaluate.md b/backend/app/api/docs/model_evaluation/evaluate.md new file mode 100644 index 000000000..d4c276cec --- /dev/null +++ b/backend/app/api/docs/model_evaluation/evaluate.md @@ -0,0 +1,3 @@ +Start evaluations for one or more fine-tuned models. + +For each fine-tuning job ID provided, this endpoint fetches the fine-tuned model and test data, then queues a background task that runs predictions on the test set and computes evaluation scores (Matthews correlation coefficient). Returns created or active evaluation records. diff --git a/backend/app/api/docs/model_evaluation/get_top_model.md b/backend/app/api/docs/model_evaluation/get_top_model.md new file mode 100644 index 000000000..5b24e6988 --- /dev/null +++ b/backend/app/api/docs/model_evaluation/get_top_model.md @@ -0,0 +1,3 @@ +Get the top-performing model for a specific document. + +Returns the best model trained on the given document, ranked by Matthews correlation coefficient (MCC) across all evaluations. Includes prediction data file URL if available. diff --git a/backend/app/api/docs/model_evaluation/list_by_document.md b/backend/app/api/docs/model_evaluation/list_by_document.md new file mode 100644 index 000000000..2325d6a4e --- /dev/null +++ b/backend/app/api/docs/model_evaluation/list_by_document.md @@ -0,0 +1,3 @@ +Get all model evaluations for a specific document. + +Returns list of all evaluation records for models trained on the given document within the current project, including prediction data file URLs. diff --git a/backend/app/api/docs/openai_conversation/delete.md b/backend/app/api/docs/openai_conversation/delete.md new file mode 100644 index 000000000..12ef5b568 --- /dev/null +++ b/backend/app/api/docs/openai_conversation/delete.md @@ -0,0 +1,3 @@ +Delete a conversation by its ID. + +Performs soft delete by marking the conversation as deleted. The conversation remains in the database but is hidden from listings. diff --git a/backend/app/api/docs/openai_conversation/get.md b/backend/app/api/docs/openai_conversation/get.md new file mode 100644 index 000000000..69b3413ad --- /dev/null +++ b/backend/app/api/docs/openai_conversation/get.md @@ -0,0 +1,3 @@ +Get a single conversation by its ID. + +Returns conversation details for the specified conversation ID within the current project. diff --git a/backend/app/api/docs/openai_conversation/get_by_ancestor_id.md b/backend/app/api/docs/openai_conversation/get_by_ancestor_id.md new file mode 100644 index 000000000..2b1aa1cc0 --- /dev/null +++ b/backend/app/api/docs/openai_conversation/get_by_ancestor_id.md @@ -0,0 +1,3 @@ +Get a conversation by its ancestor response ID. + +Retrieves conversation details using the ancestor response ID for conversation chain lookup. diff --git a/backend/app/api/docs/openai_conversation/get_by_response_id.md b/backend/app/api/docs/openai_conversation/get_by_response_id.md new file mode 100644 index 000000000..d75da7988 --- /dev/null +++ b/backend/app/api/docs/openai_conversation/get_by_response_id.md @@ -0,0 +1,3 @@ +Get a conversation by its OpenAI response ID. + +Retrieves conversation details using the OpenAI Responses API response ID for lookup. diff --git a/backend/app/api/docs/openai_conversation/list.md b/backend/app/api/docs/openai_conversation/list.md new file mode 100644 index 000000000..253cf3af7 --- /dev/null +++ b/backend/app/api/docs/openai_conversation/list.md @@ -0,0 +1,3 @@ +List all conversations in the current project. + +Returns paginated list of conversations with total count metadata for the current project. diff --git a/backend/app/api/docs/organization/create.md b/backend/app/api/docs/organization/create.md new file mode 100644 index 000000000..7f7e284ad --- /dev/null +++ b/backend/app/api/docs/organization/create.md @@ -0,0 +1,3 @@ +Create a new organization. + +Creates a new organization with the specified name and details. diff --git a/backend/app/api/docs/organization/delete.md b/backend/app/api/docs/organization/delete.md new file mode 100644 index 000000000..e0841c04a --- /dev/null +++ b/backend/app/api/docs/organization/delete.md @@ -0,0 +1,3 @@ +Delete an organization. + +Permanently deletes an organization and all associated data. diff --git a/backend/app/api/docs/organization/get.md b/backend/app/api/docs/organization/get.md new file mode 100644 index 000000000..c64242d3e --- /dev/null +++ b/backend/app/api/docs/organization/get.md @@ -0,0 +1,3 @@ +Get organization details by ID. + +Returns details for a specific organization. diff --git a/backend/app/api/docs/organization/list.md b/backend/app/api/docs/organization/list.md new file mode 100644 index 000000000..95943bab2 --- /dev/null +++ b/backend/app/api/docs/organization/list.md @@ -0,0 +1,3 @@ +List all organizations. + +Returns paginated list of all organizations in the system. diff --git a/backend/app/api/docs/organization/update.md b/backend/app/api/docs/organization/update.md new file mode 100644 index 000000000..77391d32c --- /dev/null +++ b/backend/app/api/docs/organization/update.md @@ -0,0 +1,3 @@ +Update organization details. + +Updates name and other details for an existing organization. diff --git a/backend/app/api/docs/projects/create.md b/backend/app/api/docs/projects/create.md new file mode 100644 index 000000000..ead92670d --- /dev/null +++ b/backend/app/api/docs/projects/create.md @@ -0,0 +1,3 @@ +Create a new project. + +Creates a new project within an organization with the specified name and configuration. diff --git a/backend/app/api/docs/projects/delete.md b/backend/app/api/docs/projects/delete.md new file mode 100644 index 000000000..8afee4da9 --- /dev/null +++ b/backend/app/api/docs/projects/delete.md @@ -0,0 +1,3 @@ +Delete a project. + +Permanently deletes a project and all associated data including documents, collections, and configurations. diff --git a/backend/app/api/docs/projects/get.md b/backend/app/api/docs/projects/get.md new file mode 100644 index 000000000..02f2b6d5c --- /dev/null +++ b/backend/app/api/docs/projects/get.md @@ -0,0 +1,3 @@ +Get project details by ID. + +Returns details for a specific project including name, organization, and configuration. diff --git a/backend/app/api/docs/projects/list.md b/backend/app/api/docs/projects/list.md new file mode 100644 index 000000000..911f7d1dc --- /dev/null +++ b/backend/app/api/docs/projects/list.md @@ -0,0 +1,3 @@ +List all projects. + +Returns paginated list of all projects across all organizations. diff --git a/backend/app/api/docs/projects/update.md b/backend/app/api/docs/projects/update.md new file mode 100644 index 000000000..869dfd92f --- /dev/null +++ b/backend/app/api/docs/projects/update.md @@ -0,0 +1,3 @@ +Update project details. + +Updates name and configuration for an existing project. diff --git a/backend/app/api/docs/responses/create_async.md b/backend/app/api/docs/responses/create_async.md new file mode 100644 index 000000000..d97f9ae41 --- /dev/null +++ b/backend/app/api/docs/responses/create_async.md @@ -0,0 +1,3 @@ +Create an asynchronous OpenAI Responses API call. + +Processes requests using Celery for background execution. Returns job status immediately and delivers results via callback when completed. diff --git a/backend/app/api/docs/responses/create_sync.md b/backend/app/api/docs/responses/create_sync.md new file mode 100644 index 000000000..11b4c7b60 --- /dev/null +++ b/backend/app/api/docs/responses/create_sync.md @@ -0,0 +1,3 @@ +Create a synchronous OpenAI Responses API call. + +Synchronous endpoint for immediate responses with Langfuse tracing integration. Useful for benchmarking and testing. diff --git a/backend/app/api/routes/api_keys.py b/backend/app/api/routes/api_keys.py index d1821a356..bac5a3463 100644 --- a/backend/app/api/routes/api_keys.py +++ b/backend/app/api/routes/api_keys.py @@ -4,7 +4,7 @@ from app.api.deps import SessionDep, AuthContextDep from app.crud.api_key import APIKeyCrud from app.models import APIKeyPublic, APIKeyCreateResponse, Message -from app.utils import APIResponse +from app.utils import APIResponse, load_description from app.api.permissions import Permission, require_permission router = APIRouter(prefix="/apikeys", tags=["API Keys"]) @@ -15,6 +15,7 @@ response_model=APIResponse[APIKeyCreateResponse], status_code=201, dependencies=[Depends(require_permission(Permission.SUPERUSER))], + description=load_description("api_keys/create.md"), ) def create_api_key_route( project_id: int, @@ -22,12 +23,6 @@ def create_api_key_route( current_user: AuthContextDep, session: SessionDep, ): - """ - Create a new API key for the project and user, Restricted to Superuser. - - The raw API key is returned only once during creation. - Store it securely as it cannot be retrieved again. - """ api_key_crud = APIKeyCrud(session=session, project_id=project_id) raw_key, api_key = api_key_crud.create( user_id=user_id, @@ -47,6 +42,7 @@ def create_api_key_route( "/", response_model=APIResponse[list[APIKeyPublic]], dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))], + description=load_description("api_keys/list.md"), ) def list_api_keys_route( current_user: AuthContextDep, @@ -54,12 +50,6 @@ def list_api_keys_route( skip: int = Query(0, ge=0, description="Number of records to skip"), limit: int = Query(100, ge=1, le=100, description="Maximum records to return"), ): - """ - List all API keys for the current project. - - Returns key prefix for security - the full key is only shown during creation. - Supports pagination via skip and limit parameters. - """ crud = APIKeyCrud(session, current_user.project.id) api_keys = crud.read_all(skip=skip, limit=limit) @@ -70,15 +60,13 @@ def list_api_keys_route( "/{key_id}", response_model=APIResponse[Message], dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))], + description=load_description("api_keys/delete.md"), ) def delete_api_key_route( key_id: UUID, current_user: AuthContextDep, session: SessionDep, ): - """ - Delete an API key by its ID. - """ api_key_crud = APIKeyCrud(session=session, project_id=current_user.project.id) api_key_crud.delete(key_id=key_id) diff --git a/backend/app/api/routes/credentials.py b/backend/app/api/routes/credentials.py index 57cf99750..c502b1e2d 100644 --- a/backend/app/api/routes/credentials.py +++ b/backend/app/api/routes/credentials.py @@ -14,7 +14,7 @@ update_creds_for_org, ) from app.models import CredsCreate, CredsPublic, CredsUpdate, UserProjectOrg -from app.utils import APIResponse +from app.utils import APIResponse, load_description logger = logging.getLogger(__name__) router = APIRouter(prefix="/credentials", tags=["Credentials"]) @@ -23,8 +23,7 @@ @router.post( "/", response_model=APIResponse[list[CredsPublic]], - summary="Create new credentials for the current organization and project", - description="Creates new credentials for the caller's organization and project. Each organization can have different credentials for different providers and projects. Only one credential per provider is allowed per organization-project combination.", + description=load_description("credentials/create.md"), ) def create_new_credential( *, @@ -53,8 +52,7 @@ def create_new_credential( @router.get( "/", response_model=APIResponse[list[CredsPublic]], - summary="Get all credentials for current org and project", - description="Retrieves all provider credentials associated with the caller's organization and project.", + description=load_description("credentials/list.md"), ) def read_credential( *, @@ -75,8 +73,7 @@ def read_credential( @router.get( "/provider/{provider}", response_model=APIResponse[dict], - summary="Get specific provider credentials for current org and project", - description="Retrieves credentials for a specific provider (e.g., 'openai', 'anthropic') for the caller's organization and project.", + description=load_description("credentials/get_provider.md"), ) def read_provider_credential( *, @@ -100,8 +97,7 @@ def read_provider_credential( @router.patch( "/", response_model=APIResponse[list[CredsPublic]], - summary="Update credentials for current org and project", - description="Updates credentials for a specific provider of the caller's organization and project.", + description=load_description("credentials/update.md"), ) def update_credential( *, @@ -133,7 +129,7 @@ def update_credential( @router.delete( "/provider/{provider}", response_model=APIResponse[dict], - summary="Delete specific provider credentials for current org and project", + description=load_description("credentials/delete_provider.md"), ) def delete_provider_credential( *, @@ -157,8 +153,7 @@ def delete_provider_credential( @router.delete( "/", response_model=APIResponse[dict], - summary="Delete all credentials for current org and project", - description="Removes all credentials for the caller's organization and project. This is a hard delete operation that permanently removes credentials from the database.", + description=load_description("credentials/delete_all.md"), ) def delete_all_credentials( *, diff --git a/backend/app/api/routes/model_evaluation.py b/backend/app/api/routes/model_evaluation.py index efd81a86e..2a14c881d 100644 --- a/backend/app/api/routes/model_evaluation.py +++ b/backend/app/api/routes/model_evaluation.py @@ -4,7 +4,6 @@ from fastapi import APIRouter, HTTPException, BackgroundTasks from sqlmodel import Session -from openai import OpenAI from app.crud import ( fetch_by_id, @@ -24,7 +23,7 @@ from app.core.db import engine from app.core.cloud import get_cloud_storage from app.core.finetune.evaluation import ModelEvaluator -from app.utils import get_openai_client, APIResponse +from app.utils import get_openai_client, APIResponse, load_description from app.api.deps import CurrentUserOrgProject, SessionDep @@ -112,7 +111,11 @@ def run_model_evaluation( ) -@router.post("/evaluate_models/", response_model=APIResponse) +@router.post( + "/evaluate_models/", + response_model=APIResponse, + description=load_description("model_evaluation/evaluate.md"), +) def evaluate_models( request: ModelEvaluationCreate, background_tasks: BackgroundTasks, @@ -196,6 +199,7 @@ def evaluate_models( "/{document_id}/top_model", response_model=APIResponse[ModelEvaluationPublic], response_model_exclude_none=True, + description=load_description("model_evaluation/get_top_model.md"), ) def get_top_model_by_doc_id( document_id: UUID, @@ -223,6 +227,7 @@ def get_top_model_by_doc_id( "/{document_id}", response_model=APIResponse[list[ModelEvaluationPublic]], response_model_exclude_none=True, + description=load_description("model_evaluation/list_by_document.md"), ) def get_evaluations_by_doc_id( document_id: UUID, diff --git a/backend/app/api/routes/openai_conversation.py b/backend/app/api/routes/openai_conversation.py index 71f0c7304..b206bf5bd 100644 --- a/backend/app/api/routes/openai_conversation.py +++ b/backend/app/api/routes/openai_conversation.py @@ -16,7 +16,7 @@ UserProjectOrg, OpenAIConversationPublic, ) -from app.utils import APIResponse +from app.utils import APIResponse, load_description router = APIRouter(prefix="/openai-conversation", tags=["OpenAI Conversations"]) @@ -25,6 +25,7 @@ "/{conversation_id}", response_model=APIResponse[OpenAIConversationPublic], summary="Get a single conversation by its ID", + description=load_description("openai_conversation/get.md"), ) def get_conversation_route( conversation_id: int = Path(..., description="The conversation ID to fetch"), @@ -48,6 +49,7 @@ def get_conversation_route( "/response/{response_id}", response_model=APIResponse[OpenAIConversationPublic], summary="Get a conversation by its OpenAI response ID", + description=load_description("openai_conversation/get_by_response_id.md"), ) def get_conversation_by_response_id_route( response_id: str = Path(..., description="The OpenAI response ID to fetch"), @@ -72,6 +74,7 @@ def get_conversation_by_response_id_route( "/ancestor/{ancestor_response_id}", response_model=APIResponse[OpenAIConversationPublic], summary="Get a conversation by its ancestor response ID", + description=load_description("openai_conversation/get_by_ancestor_id.md"), ) def get_conversation_by_ancestor_id_route( ancestor_response_id: str = Path( @@ -98,6 +101,7 @@ def get_conversation_by_ancestor_id_route( "/", response_model=APIResponse[list[OpenAIConversationPublic]], summary="List all conversations in the current project", + description=load_description("openai_conversation/list.md"), ) def list_conversations_route( session: Session = Depends(get_db), @@ -126,7 +130,11 @@ def list_conversations_route( ) -@router.delete("/{conversation_id}", response_model=APIResponse) +@router.delete( + "/{conversation_id}", + response_model=APIResponse, + description=load_description("openai_conversation/delete.md"), +) def delete_conversation_route( conversation_id: Annotated[int, Path(description="Conversation ID to delete")], session: Session = Depends(get_db), diff --git a/backend/app/api/routes/organization.py b/backend/app/api/routes/organization.py index 8526b6755..a25873a6d 100644 --- a/backend/app/api/routes/organization.py +++ b/backend/app/api/routes/organization.py @@ -1,9 +1,9 @@ import logging -from typing import Any, List +from typing import List from fastapi import APIRouter, Depends, HTTPException from sqlalchemy import func -from sqlmodel import Session, select +from sqlmodel import select from app.models import ( Organization, @@ -12,12 +12,11 @@ OrganizationPublic, ) from app.api.deps import ( - CurrentUser, SessionDep, get_current_active_superuser, ) from app.crud.organization import create_organization, get_organization_by_id -from app.utils import APIResponse +from app.utils import APIResponse, load_description logger = logging.getLogger(__name__) router = APIRouter(prefix="/organizations", tags=["Organizations"]) @@ -28,6 +27,7 @@ "/", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[List[OrganizationPublic]], + description=load_description("organization/list.md"), ) def read_organizations(session: SessionDep, skip: int = 0, limit: int = 100): count_statement = select(func.count()).select_from(Organization) @@ -44,6 +44,7 @@ def read_organizations(session: SessionDep, skip: int = 0, limit: int = 100): "/", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[OrganizationPublic], + description=load_description("organization/create.md"), ) def create_new_organization(*, session: SessionDep, org_in: OrganizationCreate): new_org = create_organization(session=session, org_create=org_in) @@ -54,6 +55,7 @@ def create_new_organization(*, session: SessionDep, org_in: OrganizationCreate): "/{org_id}", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[OrganizationPublic], + description=load_description("organization/get.md"), ) def read_organization(*, session: SessionDep, org_id: int): """ @@ -71,6 +73,7 @@ def read_organization(*, session: SessionDep, org_id: int): "/{org_id}", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[OrganizationPublic], + description=load_description("organization/update.md"), ) def update_organization( *, session: SessionDep, org_id: int, org_in: OrganizationUpdate @@ -100,6 +103,7 @@ def update_organization( dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[None], include_in_schema=False, + description=load_description("organization/delete.md"), ) def delete_organization(session: SessionDep, org_id: int): org = get_organization_by_id(session=session, org_id=org_id) diff --git a/backend/app/api/routes/project.py b/backend/app/api/routes/project.py index 075e42b9c..79c6a1314 100644 --- a/backend/app/api/routes/project.py +++ b/backend/app/api/routes/project.py @@ -1,9 +1,9 @@ import logging -from typing import Any, List +from typing import List from fastapi import APIRouter, Depends, HTTPException, Query from sqlalchemy import func -from sqlmodel import Session, select +from sqlmodel import select from app.models import Project, ProjectCreate, ProjectUpdate, ProjectPublic from app.api.deps import ( @@ -14,7 +14,7 @@ create_project, get_project_by_id, ) -from app.utils import APIResponse +from app.utils import APIResponse, load_description logger = logging.getLogger(__name__) router = APIRouter(prefix="/projects", tags=["Projects"]) @@ -25,6 +25,7 @@ "/", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[List[ProjectPublic]], + description=load_description("projects/list.md"), ) def read_projects( session: SessionDep, @@ -45,6 +46,7 @@ def read_projects( "/", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[ProjectPublic], + description=load_description("projects/create.md"), ) def create_new_project(*, session: SessionDep, project_in: ProjectCreate): project = create_project(session=session, project_create=project_in) @@ -55,6 +57,7 @@ def create_new_project(*, session: SessionDep, project_in: ProjectCreate): "/{project_id}", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[ProjectPublic], + description=load_description("projects/get.md"), ) def read_project(*, session: SessionDep, project_id: int): """ @@ -72,6 +75,7 @@ def read_project(*, session: SessionDep, project_id: int): "/{project_id}", dependencies=[Depends(get_current_active_superuser)], response_model=APIResponse[ProjectPublic], + description=load_description("projects/update.md"), ) def update_project(*, session: SessionDep, project_id: int, project_in: ProjectUpdate): project = get_project_by_id(session=session, project_id=project_id) @@ -96,6 +100,7 @@ def update_project(*, session: SessionDep, project_id: int, project_in: ProjectU "/{project_id}", dependencies=[Depends(get_current_active_superuser)], include_in_schema=False, + description=load_description("projects/delete.md"), ) def delete_project(session: SessionDep, project_id: int): project = get_project_by_id(session=session, project_id=project_id) diff --git a/backend/app/api/routes/responses.py b/backend/app/api/routes/responses.py index 4635c9c4a..c5ebcaa64 100644 --- a/backend/app/api/routes/responses.py +++ b/backend/app/api/routes/responses.py @@ -19,14 +19,23 @@ from app.services.response.jobs import start_job from app.services.response.response import get_file_search_results from app.services.response.callbacks import get_additional_data -from app.utils import APIResponse, get_openai_client, handle_openai_error, mask_string +from app.utils import ( + APIResponse, + get_openai_client, + handle_openai_error, + load_description, +) logger = logging.getLogger(__name__) router = APIRouter(tags=["Responses"]) -@router.post("/responses", response_model=APIResponse[ResponseJobStatus]) +@router.post( + "/responses", + response_model=APIResponse[ResponseJobStatus], + description=load_description("responses/create_async.md"), +) async def responses( request: ResponsesAPIRequest, _session: Session = Depends(get_db), @@ -56,7 +65,11 @@ async def responses( return APIResponse.success_response(data=response) -@router.post("/responses/sync", response_model=APIResponse[CallbackResponse]) +@router.post( + "/responses/sync", + response_model=APIResponse[CallbackResponse], + description=load_description("responses/create_sync.md"), +) async def responses_sync( request: ResponsesSyncAPIRequest, _session: Session = Depends(get_db), From ee5baf7e6a8206cf0ad439c3eafcb74f0d3e6c05 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 11:20:19 +0530 Subject: [PATCH 05/11] pr review and shifting api version to env example --- backend/app/api/docs/evaluation/upload_dataset.md | 2 +- backend/app/api/docs/onboarding/onboarding.md | 4 ++-- backend/app/core/config.py | 1 + backend/app/main.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/app/api/docs/evaluation/upload_dataset.md b/backend/app/api/docs/evaluation/upload_dataset.md index 90bf52511..87a1137f0 100644 --- a/backend/app/api/docs/evaluation/upload_dataset.md +++ b/backend/app/api/docs/evaluation/upload_dataset.md @@ -5,7 +5,7 @@ Datasets allow you to store reusable question-answer pairs for systematic LLM te **Key Features:** * Validates CSV format and required columns (question, answer) * Automatic dataset name sanitization for Langfuse compatibility -* Optional item duplication for statistical significance (1-5x, default: 5x) +* Optional item duplication for statistical significance (1-5x, default: 1x) * Uploads to object store and syncs with Langfuse * Skips rows with missing values automatically diff --git a/backend/app/api/docs/onboarding/onboarding.md b/backend/app/api/docs/onboarding/onboarding.md index ef1b33018..3596dd376 100644 --- a/backend/app/api/docs/onboarding/onboarding.md +++ b/backend/app/api/docs/onboarding/onboarding.md @@ -25,8 +25,8 @@ - We’ve also included a list of the providers currently supported by kaapi. ### Supported Providers - - openai - - langfuse + - openai + - langfuse ### Example: For sending multiple credentials - ``` diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 515874af5..50ececf15 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -42,6 +42,7 @@ class Settings(BaseSettings): ] = "development" PROJECT_NAME: str + API_VERSION: str = "0.5.0" SENTRY_DSN: HttpUrl | None = None POSTGRES_SERVER: str POSTGRES_PORT: int = 5432 diff --git a/backend/app/main.py b/backend/app/main.py index bfa219e7a..47a27f371 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -37,7 +37,7 @@ def custom_openapi(): openapi_schema = get_openapi( title=app.title, - version="0.5.0", + version=settings.API_VERSION, openapi_version=app.openapi_version, description=app.description, routes=app.routes, From 88bbf4aefaa9bfaf680d715d3eb5cf29fc0e110d Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 11:22:30 +0530 Subject: [PATCH 06/11] adding api version to env --- .env.example | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.example b/.env.example index 7c2cd9f02..50daff4c9 100644 --- a/.env.example +++ b/.env.example @@ -16,6 +16,7 @@ ENVIRONMENT=development PROJECT_NAME="Kaapi" STACK_NAME=Kaapi +API_VERSION=0.5.0 #Backend SECRET_KEY=changethis From f97c2ef3c766b58515ab485fa7344b025d5900eb Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 11:31:30 +0530 Subject: [PATCH 07/11] coderabbit pr review --- backend/app/api/docs/evaluation/upload_dataset.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/docs/evaluation/upload_dataset.md b/backend/app/api/docs/evaluation/upload_dataset.md index 87a1137f0..3ebfc108d 100644 --- a/backend/app/api/docs/evaluation/upload_dataset.md +++ b/backend/app/api/docs/evaluation/upload_dataset.md @@ -27,7 +27,7 @@ Your dataset name will be automatically sanitized for Langfuse compatibility: **Duplication Factor:** -Control how many times each Q&A pair is duplicated (1-5x, default: 5): +Control how many times each Q&A pair is duplicated (1-5x, default: 1): * Higher duplication = better statistical significance * Useful for batch evaluation reliability * `1` = no duplication (original dataset only) From ef4ca7a628106921636e6dcedae4ab89124713d2 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 11:50:03 +0530 Subject: [PATCH 08/11] coderabbit pr review --- backend/app/api/docs/api_keys/list.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/docs/api_keys/list.md b/backend/app/api/docs/api_keys/list.md index 3f2d04da6..a4678e6f0 100644 --- a/backend/app/api/docs/api_keys/list.md +++ b/backend/app/api/docs/api_keys/list.md @@ -1,3 +1,3 @@ List all API keys for the current project. -Returns paginated list of API keys with key prefix for security. The full key is only shown during creation and cannot be retrieved afterward. +Returns a paginated list of API keys with key prefix for security. The full key is only shown during creation and cannot be retrieved afterward. From d8f37af5c381e09a4296f27a406c77295fd434bc Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 15:01:33 +0530 Subject: [PATCH 09/11] rephrasing the docs a little --- backend/app/api/docs/collections/create.md | 8 ++------ backend/app/api/docs/collections/delete.md | 2 +- backend/app/api/docs/collections/info.md | 2 +- backend/app/api/docs/collections/list.md | 5 ++--- backend/app/api/docs/config/create.md | 2 +- backend/app/api/docs/credentials/create.md | 2 +- backend/app/api/docs/credentials/delete_all.md | 2 +- backend/app/api/docs/documents/delete.md | 2 +- backend/app/api/docs/documents/info.md | 4 +++- backend/app/api/docs/documents/job_info.md | 4 +++- backend/app/api/docs/documents/job_list.md | 4 +++- backend/app/api/docs/documents/list.md | 4 +++- .../app/api/docs/documents/permanent_delete.md | 1 + backend/app/api/docs/documents/upload.md | 2 +- .../api/docs/evaluation/create_evaluation.md | 17 ++--------------- .../app/api/docs/evaluation/delete_dataset.md | 2 +- .../app/api/docs/evaluation/upload_dataset.md | 2 +- backend/app/api/docs/fine_tuning/retrieve.md | 4 +++- backend/app/api/docs/onboarding/onboarding.md | 4 ++-- .../app/api/docs/openai_conversation/delete.md | 2 +- backend/app/api/docs/organization/update.md | 2 +- backend/app/api/docs/projects/create.md | 2 +- backend/app/api/docs/projects/get.md | 2 +- backend/app/api/docs/projects/update.md | 2 +- backend/app/api/docs/responses/create_async.md | 2 +- 25 files changed, 39 insertions(+), 46 deletions(-) diff --git a/backend/app/api/docs/collections/create.md b/backend/app/api/docs/collections/create.md index 5f12fbcb4..c3a5f4400 100644 --- a/backend/app/api/docs/collections/create.md +++ b/backend/app/api/docs/collections/create.md @@ -1,12 +1,8 @@ Setup and configure the document store that is pertinent to the RAG pipeline: -* Make OpenAI - [File](https://platform.openai.com/docs/api-reference/files)'s from - documents stored in the cloud (see the `documents` interface). -* Create an OpenAI [Vector - Store](https://platform.openai.com/docs/api-reference/vector-stores) - based on those file(s). +* Create a vector store from the document IDs you received after uploading your + documents through the Documents module. * [Deprecated] Attach the Vector Store to an OpenAI [Assistant](https://platform.openai.com/docs/api-reference/assistants). Use parameters in the request body relevant to an Assistant to flesh out diff --git a/backend/app/api/docs/collections/delete.md b/backend/app/api/docs/collections/delete.md index c7f0f2c7a..8cb213d51 100644 --- a/backend/app/api/docs/collections/delete.md +++ b/backend/app/api/docs/collections/delete.md @@ -1,6 +1,6 @@ Remove a collection from the platform. This is a two step process: -1. Delete all OpenAI resources that were allocated: file(s), the Vector +1. Delete all resources that were allocated: file(s), the Vector Store, and the Assistant. 2. Delete the collection entry from the kaapi database. diff --git a/backend/app/api/docs/collections/info.md b/backend/app/api/docs/collections/info.md index ad862ac52..a5f398044 100644 --- a/backend/app/api/docs/collections/info.md +++ b/backend/app/api/docs/collections/info.md @@ -1,4 +1,4 @@ Retrieve detailed information about a specific collection by its collection id. This endpoint returns the collection object including its project, organization, timestamps, and associated LLM service details (`llm_service_id` and `llm_service_name`). -Additionally, if the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by the AI platform, but also by OpenAI. +Additionally, if the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by the AI platform, but also by Vector store provider. diff --git a/backend/app/api/docs/collections/list.md b/backend/app/api/docs/collections/list.md index 4224a5b83..bb28e0b6a 100644 --- a/backend/app/api/docs/collections/list.md +++ b/backend/app/api/docs/collections/list.md @@ -1,6 +1,5 @@ -List _active_ collections -- collections that have been created but -not deleted +List all _active_ collections that have been created and are not deleted -If a vector store was created - `llm_service_name` and `llm_service_id` in the response denote the name of the vector store (eg. 'openai vector store') and its id. +If a vector store was created - `llm_service_name` and `llm_service_id` in the response denotes the name of the vector store (eg. 'openai vector store') and its id respectively. [Deprecated] If an assistant was created, `llm_service_name` and `llm_service_id` in the response denotes the name of the model used in the assistant (eg. 'gpt-4o') and assistant id. diff --git a/backend/app/api/docs/config/create.md b/backend/app/api/docs/config/create.md index d3c8ff15e..fd193024a 100644 --- a/backend/app/api/docs/config/create.md +++ b/backend/app/api/docs/config/create.md @@ -11,7 +11,7 @@ Configurations allow you to store and manage reusable LLM parameters * Provider-agnostic storage - params are passed through to the provider as-is -**Example for the config blob: OpenAI Responses API with File Search** +**Example for the config blob: OpenAI Responses API with File Search -** ```json "config_blob": { diff --git a/backend/app/api/docs/credentials/create.md b/backend/app/api/docs/credentials/create.md index 41f89eb70..139a3c85c 100644 --- a/backend/app/api/docs/credentials/create.md +++ b/backend/app/api/docs/credentials/create.md @@ -1,3 +1,3 @@ -Create new credentials for the current organization and project. +Persist new credentials for the current organization and project. Credentials are encrypted and stored securely for provider integrations (OpenAI, Langfuse, etc.). Only one credential per provider is allowed per organization-project combination. diff --git a/backend/app/api/docs/credentials/delete_all.md b/backend/app/api/docs/credentials/delete_all.md index b7ec61f68..c0eaa8a54 100644 --- a/backend/app/api/docs/credentials/delete_all.md +++ b/backend/app/api/docs/credentials/delete_all.md @@ -1,3 +1,3 @@ Delete all credentials for current organization and project. -Permanently removes all provider credentials from the current organization and project. This is a hard delete operation. +Permanently removes all provider credentials from the current organization and project. diff --git a/backend/app/api/docs/documents/delete.md b/backend/app/api/docs/documents/delete.md index e62c95ad9..ff7af99b6 100644 --- a/backend/app/api/docs/documents/delete.md +++ b/backend/app/api/docs/documents/delete.md @@ -1,4 +1,4 @@ -Perform a soft delete of the document. A soft delete makes the +Perform a delete of the document. This makes the document invisible. It does not delete the document from cloud storage or its information from the database. diff --git a/backend/app/api/docs/documents/info.md b/backend/app/api/docs/documents/info.md index 527a2308d..c9b4b04dd 100644 --- a/backend/app/api/docs/documents/info.md +++ b/backend/app/api/docs/documents/info.md @@ -1 +1,3 @@ -Retrieve all information about a given document. If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved document. If you don't set it to true, the URL will not be included in the response. +Retrieve all information about a given document. + +If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved document. If you don't set it to true, the URL will not be included in the response. diff --git a/backend/app/api/docs/documents/job_info.md b/backend/app/api/docs/documents/job_info.md index c70e42bfa..387623bd4 100644 --- a/backend/app/api/docs/documents/job_info.md +++ b/backend/app/api/docs/documents/job_info.md @@ -1 +1,3 @@ -Get the status and details of a document transformation job. If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the transformed document if the job has been successful. If you don't set it to true, the URL will not be included in the response. +Get the status and details of a document transformation job. + +If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the transformed document if the job has been successful. If you don't set it to true, the URL will not be included in the response. diff --git a/backend/app/api/docs/documents/job_list.md b/backend/app/api/docs/documents/job_list.md index f85ca99ad..1b0a1e44a 100644 --- a/backend/app/api/docs/documents/job_list.md +++ b/backend/app/api/docs/documents/job_list.md @@ -1 +1,3 @@ -Get the status and details of multiple document transformation jobs by IDs. If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the transformed document for successful jobs. If you don't set it to true, the URL will not be included in the response. +Get the status and details of multiple document transformation jobs by IDs. + +If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the transformed document for successful jobs. If you don't set it to true, the URL will not be included in the response. diff --git a/backend/app/api/docs/documents/list.md b/backend/app/api/docs/documents/list.md index 110e931c9..1b1771db9 100644 --- a/backend/app/api/docs/documents/list.md +++ b/backend/app/api/docs/documents/list.md @@ -1 +1,3 @@ -List documents uploaded to the AI platform. If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved documents. If you don't set it to true, the URL will not be included in the response. +List documents uploaded to the AI platform. + +If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved documents. If you don't set it to true, the URL will not be included in the response. diff --git a/backend/app/api/docs/documents/permanent_delete.md b/backend/app/api/docs/documents/permanent_delete.md index b179b1fe7..2a6479803 100644 --- a/backend/app/api/docs/documents/permanent_delete.md +++ b/backend/app/api/docs/documents/permanent_delete.md @@ -1,6 +1,7 @@ This operation marks the document as deleted in the database while retaining its metadata. However, the actual file is permanently deleted from cloud storage (e.g., S3) and cannot be recovered. Only the database record remains for reference purposes. + If the document is part of an active collection, those collections will be deleted using the collections delete interface. Noteably, this means all OpenAI Vector Store's and Assistant's to which this document diff --git a/backend/app/api/docs/documents/upload.md b/backend/app/api/docs/documents/upload.md index cc4ad9bf5..2ff8031fa 100644 --- a/backend/app/api/docs/documents/upload.md +++ b/backend/app/api/docs/documents/upload.md @@ -13,6 +13,6 @@ The following (source_format → target_format) transformations are supported: ### Transformers -Available transformer names and their implementations, default transformer is zerox: +Available transformer names and their implementations, default transformer is zerox for now: - `zerox` diff --git a/backend/app/api/docs/evaluation/create_evaluation.md b/backend/app/api/docs/evaluation/create_evaluation.md index 176751024..1df41ead9 100644 --- a/backend/app/api/docs/evaluation/create_evaluation.md +++ b/backend/app/api/docs/evaluation/create_evaluation.md @@ -4,24 +4,11 @@ Evaluations allow you to systematically test LLM configurations against predefined datasets with automatic progress tracking and result collection. **Key Features:** -* Fetches dataset items from Langfuse for evaluation -* Creates batch processing job via OpenAI Batch API +* Fetches dataset items from Langfuse and creates batch processing job via OpenAI Batch API * Asynchronous processing with automatic progress tracking (checks every 60s) * Supports configuration from direct parameters or existing assistants * Stores results for comparison and analysis -* Provider-agnostic configuration - params are passed through to OpenAI as-is - - -**How it works:** -1. Validates dataset exists and has associated Langfuse dataset ID -2. Creates an EvaluationRun record in the database -3. Fetches all dataset items from Langfuse -4. Generates JSONL batch file with your configuration -5. Submits batch job to OpenAI -6. Returns evaluation run details with batch_job_id for tracking - -Use `GET /evaluations/{evaluation_id}` to monitor progress and retrieve results. - +* Note that you can use `GET /evaluations/{evaluation_id}` to monitor progress and retrieve results of evaluation. **Example: Using Direct Configuration** diff --git a/backend/app/api/docs/evaluation/delete_dataset.md b/backend/app/api/docs/evaluation/delete_dataset.md index 630015d37..d50802e82 100644 --- a/backend/app/api/docs/evaluation/delete_dataset.md +++ b/backend/app/api/docs/evaluation/delete_dataset.md @@ -1,3 +1,3 @@ Delete a dataset by ID. -This will remove the dataset record from the database. The CSV file in object store (if exists) will remain for audit purposes, but the dataset will no longer be accessible for creating new evaluations. +This will remove the dataset record from the database. The CSV file in object store (if exists) will remain there for audit purposes, but the dataset will no longer be accessible for creating new evaluations. diff --git a/backend/app/api/docs/evaluation/upload_dataset.md b/backend/app/api/docs/evaluation/upload_dataset.md index 3ebfc108d..f6da68657 100644 --- a/backend/app/api/docs/evaluation/upload_dataset.md +++ b/backend/app/api/docs/evaluation/upload_dataset.md @@ -1,6 +1,6 @@ Upload a CSV file containing golden Q&A pairs for evaluation. -Datasets allow you to store reusable question-answer pairs for systematic LLM testing with automatic validation, duplication for statistical significance, and Langfuse integration. Response includes dataset ID, sanitized name, item counts, Langfuse dataset ID, and object store URL. +Datasets allow you to store reusable question-answer pairs for systematic LLM testing with automatic validation, duplication for statistical significance, and Langfuse integration. Response includes dataset ID, sanitized name, item counts, Langfuse dataset ID, and object store URL (the cloud storage location where your CSV file is stored). **Key Features:** * Validates CSV format and required columns (question, answer) diff --git a/backend/app/api/docs/fine_tuning/retrieve.md b/backend/app/api/docs/fine_tuning/retrieve.md index 8dd93a841..95710eaf5 100644 --- a/backend/app/api/docs/fine_tuning/retrieve.md +++ b/backend/app/api/docs/fine_tuning/retrieve.md @@ -2,4 +2,6 @@ Refreshes the status of a fine-tuning job by retrieving the latest information f If there are any changes in status, fine-tuned model, or error message, the local job record is updated accordingly. Returns the latest state of the job. -OpenAI’s job status is retrieved using their [Fine-tuning Job Retrieve API](https://platform.openai.com/docs/api-reference/fine_tuning/retrieve). +When a job is completed and updated in the database, model evaluation for that fine-tuned model will start automatically. + +OpenAI's job status is retrieved using their [Fine-tuning Job Retrieve API](https://platform.openai.com/docs/api-reference/fine_tuning/retrieve). diff --git a/backend/app/api/docs/onboarding/onboarding.md b/backend/app/api/docs/onboarding/onboarding.md index 3596dd376..58eeb7379 100644 --- a/backend/app/api/docs/onboarding/onboarding.md +++ b/backend/app/api/docs/onboarding/onboarding.md @@ -25,8 +25,8 @@ - We’ve also included a list of the providers currently supported by kaapi. ### Supported Providers - - openai - - langfuse + - **LLM:** openai + - **Observability:** langfuse ### Example: For sending multiple credentials - ``` diff --git a/backend/app/api/docs/openai_conversation/delete.md b/backend/app/api/docs/openai_conversation/delete.md index 12ef5b568..905c0b9d2 100644 --- a/backend/app/api/docs/openai_conversation/delete.md +++ b/backend/app/api/docs/openai_conversation/delete.md @@ -1,3 +1,3 @@ Delete a conversation by its ID. -Performs soft delete by marking the conversation as deleted. The conversation remains in the database but is hidden from listings. +Performs a delete by marking the conversation as deleted. The conversation remains in the database but is hidden from listings. diff --git a/backend/app/api/docs/organization/update.md b/backend/app/api/docs/organization/update.md index 77391d32c..388c3eccd 100644 --- a/backend/app/api/docs/organization/update.md +++ b/backend/app/api/docs/organization/update.md @@ -1,3 +1,3 @@ Update organization details. -Updates name and other details for an existing organization. +Updates name and description for an existing organization. diff --git a/backend/app/api/docs/projects/create.md b/backend/app/api/docs/projects/create.md index ead92670d..b7397baac 100644 --- a/backend/app/api/docs/projects/create.md +++ b/backend/app/api/docs/projects/create.md @@ -1,3 +1,3 @@ Create a new project. -Creates a new project within an organization with the specified name and configuration. +Creates a new project within an organization with the specified name and description. diff --git a/backend/app/api/docs/projects/get.md b/backend/app/api/docs/projects/get.md index 02f2b6d5c..69f7e1378 100644 --- a/backend/app/api/docs/projects/get.md +++ b/backend/app/api/docs/projects/get.md @@ -1,3 +1,3 @@ Get project details by ID. -Returns details for a specific project including name, organization, and configuration. +Returns details for a specific project including name, organization, and description. diff --git a/backend/app/api/docs/projects/update.md b/backend/app/api/docs/projects/update.md index 869dfd92f..021ae15ce 100644 --- a/backend/app/api/docs/projects/update.md +++ b/backend/app/api/docs/projects/update.md @@ -1,3 +1,3 @@ Update project details. -Updates name and configuration for an existing project. +Updates name and description for an existing project. diff --git a/backend/app/api/docs/responses/create_async.md b/backend/app/api/docs/responses/create_async.md index d97f9ae41..f2dabda5c 100644 --- a/backend/app/api/docs/responses/create_async.md +++ b/backend/app/api/docs/responses/create_async.md @@ -1,3 +1,3 @@ Create an asynchronous OpenAI Responses API call. -Processes requests using Celery for background execution. Returns job status immediately and delivers results via callback when completed. +Processes requests with background execution. Returns job status immediately and delivers results via callback given in the request body when completed. From 4fa355dc019616370bd79d7c3d4562474395061f Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 12 Dec 2025 15:12:10 +0530 Subject: [PATCH 10/11] coderabbit review fixes --- backend/app/api/docs/documents/upload.md | 2 +- backend/app/api/docs/evaluation/create_evaluation.md | 2 +- backend/app/api/docs/evaluation/upload_dataset.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/app/api/docs/documents/upload.md b/backend/app/api/docs/documents/upload.md index 2ff8031fa..cc4ad9bf5 100644 --- a/backend/app/api/docs/documents/upload.md +++ b/backend/app/api/docs/documents/upload.md @@ -13,6 +13,6 @@ The following (source_format → target_format) transformations are supported: ### Transformers -Available transformer names and their implementations, default transformer is zerox for now: +Available transformer names and their implementations, default transformer is zerox: - `zerox` diff --git a/backend/app/api/docs/evaluation/create_evaluation.md b/backend/app/api/docs/evaluation/create_evaluation.md index 1df41ead9..b0c2ba236 100644 --- a/backend/app/api/docs/evaluation/create_evaluation.md +++ b/backend/app/api/docs/evaluation/create_evaluation.md @@ -8,7 +8,7 @@ predefined datasets with automatic progress tracking and result collection. * Asynchronous processing with automatic progress tracking (checks every 60s) * Supports configuration from direct parameters or existing assistants * Stores results for comparison and analysis -* Note that you can use `GET /evaluations/{evaluation_id}` to monitor progress and retrieve results of evaluation. +* Use `GET /evaluations/{evaluation_id}` to monitor progress and retrieve results of evaluation. **Example: Using Direct Configuration** diff --git a/backend/app/api/docs/evaluation/upload_dataset.md b/backend/app/api/docs/evaluation/upload_dataset.md index f6da68657..f4dcae356 100644 --- a/backend/app/api/docs/evaluation/upload_dataset.md +++ b/backend/app/api/docs/evaluation/upload_dataset.md @@ -27,7 +27,7 @@ Your dataset name will be automatically sanitized for Langfuse compatibility: **Duplication Factor:** -Control how many times each Q&A pair is duplicated (1-5x, default: 1): +Control how many times each Q&A pair is duplicated (1-5x, default: 1x): * Higher duplication = better statistical significance * Useful for batch evaluation reliability * `1` = no duplication (original dataset only) From cbfff1001305e30ee36d5c1e99811eabb4af10fd Mon Sep 17 00:00:00 2001 From: nishika26 Date: Tue, 16 Dec 2025 14:28:28 +0530 Subject: [PATCH 11/11] renaming ai platform to kaapi everywhere --- backend/app/api/docs/collections/info.md | 2 +- backend/app/api/docs/collections/job_info.md | 2 +- backend/app/api/docs/documents/list.md | 2 +- backend/app/api/docs/documents/upload.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/app/api/docs/collections/info.md b/backend/app/api/docs/collections/info.md index a5f398044..576046bd0 100644 --- a/backend/app/api/docs/collections/info.md +++ b/backend/app/api/docs/collections/info.md @@ -1,4 +1,4 @@ Retrieve detailed information about a specific collection by its collection id. This endpoint returns the collection object including its project, organization, timestamps, and associated LLM service details (`llm_service_id` and `llm_service_name`). -Additionally, if the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by the AI platform, but also by Vector store provider. +Additionally, if the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by Kaapi, but also by Vector store provider. diff --git a/backend/app/api/docs/collections/job_info.md b/backend/app/api/docs/collections/job_info.md index ef5589c2c..8ddbf0694 100644 --- a/backend/app/api/docs/collections/job_info.md +++ b/backend/app/api/docs/collections/job_info.md @@ -1,4 +1,4 @@ -Retrieve information about a collection job by the collection job ID. This endpoint provides detailed status and metadata for a specific collection job in the AI platform. It is especially useful for: +Retrieve information about a collection job by the collection job ID. This endpoint provides detailed status and metadata for a specific collection job in Kaapi. It is especially useful for: * Fetching the collection job object, including the collection job ID, the current status, and the associated collection details. diff --git a/backend/app/api/docs/documents/list.md b/backend/app/api/docs/documents/list.md index 1b1771db9..b11d47866 100644 --- a/backend/app/api/docs/documents/list.md +++ b/backend/app/api/docs/documents/list.md @@ -1,3 +1,3 @@ -List documents uploaded to the AI platform. +List documents uploaded to Kaapi. If you set the ``include_url`` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved documents. If you don't set it to true, the URL will not be included in the response. diff --git a/backend/app/api/docs/documents/upload.md b/backend/app/api/docs/documents/upload.md index cc4ad9bf5..e667015f5 100644 --- a/backend/app/api/docs/documents/upload.md +++ b/backend/app/api/docs/documents/upload.md @@ -1,4 +1,4 @@ -Upload a document to the AI platform. +Upload a document to Kaapi. - If only a file is provided, the document will be uploaded and stored, and its ID will be returned. - If a target format is specified, a transformation job will also be created to transform document into target format in the background. The response will include both the uploaded document details and information about the transformation job.