From c069209fbe84b8aeb592d233bdf04e2e65d91523 Mon Sep 17 00:00:00 2001 From: Damien Berezenko Date: Mon, 17 Nov 2025 09:59:18 -0600 Subject: [PATCH 1/5] Add LiteLLM proxy provider support - Add new LiteLLMProvider class using pydantic-ai's native LiteLLMProvider - Register litellm_proxy provider in PROVIDER_REGISTRY - Support OpenAI-compatible API endpoints via LiteLLM proxy - Enable use of multiple LLM providers through a unified proxy interface - Upgrade pydantic-ai-slim to 1.0.1 (required for LiteLLM provider support) - Add check_litellm_proxy_running helper for robust proxy validation - Validate proxy is running before creating models (consistent with OllamaProvider) --- codebase_rag/providers/base.py | 25 +++++++++++++ codebase_rag/providers/litellm.py | 59 +++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 codebase_rag/providers/litellm.py diff --git a/codebase_rag/providers/base.py b/codebase_rag/providers/base.py index d525c1e27..4666c95be 100644 --- a/codebase_rag/providers/base.py +++ b/codebase_rag/providers/base.py @@ -159,6 +159,14 @@ def create_model(self, model_id: str, **kwargs: Any) -> OpenAIModel: return OpenAIModel(model_id, provider=provider, **kwargs) # type: ignore +# Import LiteLLM provider +try: + from .litellm import LiteLLMProvider + + _litellm_available = True +except ImportError: + _litellm_available = False + # Provider registry PROVIDER_REGISTRY: dict[str, type[ModelProvider]] = { "google": GoogleProvider, @@ -166,6 +174,10 @@ def create_model(self, model_id: str, **kwargs: Any) -> OpenAIModel: "ollama": OllamaProvider, } +# Add LiteLLM if available +if _litellm_available: + PROVIDER_REGISTRY["litellm_proxy"] = LiteLLMProvider + def get_provider(provider_name: str, **config: Any) -> ModelProvider: """Factory function to create a provider instance.""" @@ -199,3 +211,16 @@ def check_ollama_running(endpoint: str = "http://localhost:11434") -> bool: return bool(response.status_code == 200) except (httpx.RequestError, httpx.TimeoutException): return False + + +def check_litellm_proxy_running(endpoint: str = "http://localhost:4000") -> bool: + """Check if LiteLLM proxy is running and accessible.""" + try: + # LiteLLM proxy health endpoint + base_url = endpoint.rstrip("/v1").rstrip("/") + health_url = urljoin(base_url, "/health") + with httpx.Client(timeout=5.0) as client: + response = client.get(health_url) + return bool(response.status_code == 200) + except (httpx.RequestError, httpx.TimeoutException): + return False diff --git a/codebase_rag/providers/litellm.py b/codebase_rag/providers/litellm.py new file mode 100644 index 000000000..6db829c9d --- /dev/null +++ b/codebase_rag/providers/litellm.py @@ -0,0 +1,59 @@ +"""LiteLLM provider using pydantic-ai's native LiteLLMProvider.""" + +from typing import Any + +from loguru import logger +from pydantic_ai.models.openai import OpenAIChatModel +from pydantic_ai.providers.litellm import LiteLLMProvider as PydanticLiteLLMProvider + +from .base import ModelProvider, check_litellm_proxy_running + + +class LiteLLMProvider(ModelProvider): + def __init__( + self, + api_key: str | None = None, + endpoint: str = "http://localhost:4000/v1", + **kwargs: Any, + ) -> None: + super().__init__(**kwargs) + self.api_key = api_key + self.endpoint = endpoint + + @property + def provider_name(self) -> str: + return "litellm_proxy" + + def validate_config(self) -> None: + if not self.endpoint: + raise ValueError( + "LiteLLM provider requires endpoint. " + "Set ORCHESTRATOR_ENDPOINT or CYPHER_ENDPOINT in .env file." + ) + + # Check if LiteLLM proxy is running + base_url = self.endpoint.rstrip("/v1").rstrip("/") + if not check_litellm_proxy_running(base_url): + raise ValueError( + f"LiteLLM proxy server not responding at {base_url}. " + f"Make sure LiteLLM proxy is running." + ) + + def create_model(self, model_id: str, **kwargs: Any) -> OpenAIChatModel: + """Create OpenAI-compatible model for LiteLLM proxy. + + Args: + model_id: Model identifier (e.g., "openai/gpt-3.5-turbo", "anthropic/claude-3") + **kwargs: Additional arguments passed to OpenAIChatModel + + Returns: + OpenAIChatModel configured to use the LiteLLM proxy + """ + self.validate_config() + + logger.info(f"Creating LiteLLM proxy model: {model_id} at {self.endpoint}") + + # Use pydantic-ai's native LiteLLMProvider + provider = PydanticLiteLLMProvider(api_key=self.api_key, api_base=self.endpoint) + + return OpenAIChatModel(model_id, provider=provider, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 6be0c2979..16ff6de3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.12" dependencies = [ "loguru>=0.7.3", "mcp>=1.21.1", - "pydantic-ai-slim[google,openai,vertexai]>=0.2.18", + "pydantic-ai-slim[google,openai,vertexai]>=1.0.1", "pydantic-settings>=2.0.0", "pymgclient>=1.4.0", "python-dotenv>=1.1.0", From 256132e489c878496636225cadca3089412ce03f Mon Sep 17 00:00:00 2001 From: Damien Berezenko Date: Mon, 17 Nov 2025 10:49:20 -0600 Subject: [PATCH 2/5] fix(providers): resolve litellm_proxy provider registration issues - Upgrade pydantic-ai-slim to 1.18.0 which includes LiteLLM provider support - Fix circular import in providers/base.py by moving LiteLLM import after registry definition - Fix circular import in providers/litellm.py by using local import for check_litellm_proxy_running - Add debug logging when LiteLLM provider is not available --- codebase_rag/providers/base.py | 18 ++++++++---------- codebase_rag/providers/litellm.py | 5 ++++- pyproject.toml | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/codebase_rag/providers/base.py b/codebase_rag/providers/base.py index 4666c95be..9e15cceb7 100644 --- a/codebase_rag/providers/base.py +++ b/codebase_rag/providers/base.py @@ -159,14 +159,6 @@ def create_model(self, model_id: str, **kwargs: Any) -> OpenAIModel: return OpenAIModel(model_id, provider=provider, **kwargs) # type: ignore -# Import LiteLLM provider -try: - from .litellm import LiteLLMProvider - - _litellm_available = True -except ImportError: - _litellm_available = False - # Provider registry PROVIDER_REGISTRY: dict[str, type[ModelProvider]] = { "google": GoogleProvider, @@ -174,9 +166,15 @@ def create_model(self, model_id: str, **kwargs: Any) -> OpenAIModel: "ollama": OllamaProvider, } -# Add LiteLLM if available -if _litellm_available: +# Import LiteLLM provider after base classes are defined to avoid circular import +try: + from .litellm import LiteLLMProvider + PROVIDER_REGISTRY["litellm_proxy"] = LiteLLMProvider + _litellm_available = True +except ImportError as e: + logger.debug(f"LiteLLM provider not available: {e}") + _litellm_available = False def get_provider(provider_name: str, **config: Any) -> ModelProvider: diff --git a/codebase_rag/providers/litellm.py b/codebase_rag/providers/litellm.py index 6db829c9d..d984e7d2c 100644 --- a/codebase_rag/providers/litellm.py +++ b/codebase_rag/providers/litellm.py @@ -6,7 +6,7 @@ from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.litellm import LiteLLMProvider as PydanticLiteLLMProvider -from .base import ModelProvider, check_litellm_proxy_running +from .base import ModelProvider class LiteLLMProvider(ModelProvider): @@ -32,6 +32,9 @@ def validate_config(self) -> None: ) # Check if LiteLLM proxy is running + # Import locally to avoid circular import + from .base import check_litellm_proxy_running + base_url = self.endpoint.rstrip("/v1").rstrip("/") if not check_litellm_proxy_running(base_url): raise ValueError( diff --git a/pyproject.toml b/pyproject.toml index 16ff6de3d..28c8ea85e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.12" dependencies = [ "loguru>=0.7.3", "mcp>=1.21.1", - "pydantic-ai-slim[google,openai,vertexai]>=1.0.1", + "pydantic-ai-slim[google,openai,vertexai]>=1.18.0", "pydantic-settings>=2.0.0", "pymgclient>=1.4.0", "python-dotenv>=1.1.0", From 37c014bc0e1d67e73012154fba1a088daabcf4cf Mon Sep 17 00:00:00 2001 From: Damien Berezenko Date: Mon, 17 Nov 2025 12:28:16 -0600 Subject: [PATCH 3/5] fix(litellm): improve health check to support authenticated proxies --- codebase_rag/providers/base.py | 38 +++++++++++++++++++++++++++---- codebase_rag/providers/litellm.py | 4 ++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/codebase_rag/providers/base.py b/codebase_rag/providers/base.py index 9e15cceb7..a94b4c283 100644 --- a/codebase_rag/providers/base.py +++ b/codebase_rag/providers/base.py @@ -211,14 +211,42 @@ def check_ollama_running(endpoint: str = "http://localhost:11434") -> bool: return False -def check_litellm_proxy_running(endpoint: str = "http://localhost:4000") -> bool: - """Check if LiteLLM proxy is running and accessible.""" +def check_litellm_proxy_running( + endpoint: str = "http://localhost:4000", api_key: str | None = None +) -> bool: + """Check if LiteLLM proxy is running and accessible. + + Args: + endpoint: Base URL of the LiteLLM proxy server + api_key: Optional API key for authenticated proxies + + Returns: + True if the proxy is accessible, False otherwise + """ try: - # LiteLLM proxy health endpoint base_url = endpoint.rstrip("/v1").rstrip("/") + + # Try health endpoint first (works for unauthenticated proxies) health_url = urljoin(base_url, "/health") + headers = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + with httpx.Client(timeout=5.0) as client: - response = client.get(health_url) - return bool(response.status_code == 200) + response = client.get(health_url, headers=headers) + + # If health endpoint works, we're good + if response.status_code == 200: + return True + + # If health endpoint fails (401, 404, 405, 500, etc.), + # try the models endpoint as a fallback when we have an API key + if api_key: + models_url = urljoin(base_url, "/v1/models") + response = client.get(models_url, headers=headers) + # Accept 200 (success) - server is up and API key works + return bool(response.status_code == 200) + + return False except (httpx.RequestError, httpx.TimeoutException): return False diff --git a/codebase_rag/providers/litellm.py b/codebase_rag/providers/litellm.py index d984e7d2c..9095bfe52 100644 --- a/codebase_rag/providers/litellm.py +++ b/codebase_rag/providers/litellm.py @@ -36,10 +36,10 @@ def validate_config(self) -> None: from .base import check_litellm_proxy_running base_url = self.endpoint.rstrip("/v1").rstrip("/") - if not check_litellm_proxy_running(base_url): + if not check_litellm_proxy_running(base_url, api_key=self.api_key): raise ValueError( f"LiteLLM proxy server not responding at {base_url}. " - f"Make sure LiteLLM proxy is running." + f"Make sure LiteLLM proxy is running and API key is valid." ) def create_model(self, model_id: str, **kwargs: Any) -> OpenAIChatModel: From 6fbf7022834c3239b4f9f189356554914abbc74f Mon Sep 17 00:00:00 2001 From: Damien Berezenko Date: Mon, 17 Nov 2025 17:53:14 -0600 Subject: [PATCH 4/5] docs(env): add LiteLLM provider example to .env.example --- .env.example | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.env.example b/.env.example index 44ef9b936..ea49b73ef 100644 --- a/.env.example +++ b/.env.example @@ -45,6 +45,17 @@ CYPHER_ENDPOINT=http://localhost:11434/v1 # CYPHER_MODEL=gemini-2.5-flash # CYPHER_API_KEY=your-google-api-key +# Example 5: LiteLLM with custom provider +# ORCHESTRATOR_PROVIDER=litellm_proxy +# ORCHESTRATOR_MODEL=gpt-oss:120b +# ORCHESTRATOR_ENDPOINT=http://litellm:4000/v1 +# ORCHESTRATOR_API_KEY=sk-your-litellm-key + +# CYPHER_PROVIDER=litellm_proxy +# CYPHER_MODEL=openrouter/gpt-oss:120b +# CYPHER_ENDPOINT=http://litellm:4000/v1 +# CYPHER_API_KEY=sk-your-litellm-key + # Memgraph settings MEMGRAPH_HOST=localhost MEMGRAPH_PORT=7687 From e10b98cc47eb3bb7292badacfc1cfd254bc166a1 Mon Sep 17 00:00:00 2001 From: Damien Berezenko Date: Sat, 27 Dec 2025 17:46:32 -0600 Subject: [PATCH 5/5] docs(env): update example number for LiteLLM custom provider --- .env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.example b/.env.example index ea49b73ef..7edceedd0 100644 --- a/.env.example +++ b/.env.example @@ -45,7 +45,7 @@ CYPHER_ENDPOINT=http://localhost:11434/v1 # CYPHER_MODEL=gemini-2.5-flash # CYPHER_API_KEY=your-google-api-key -# Example 5: LiteLLM with custom provider +# Example 6: LiteLLM with custom provider # ORCHESTRATOR_PROVIDER=litellm_proxy # ORCHESTRATOR_MODEL=gpt-oss:120b # ORCHESTRATOR_ENDPOINT=http://litellm:4000/v1