Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ jobs:
grep -A 3 "llama_stack:" lightspeed-stack.yaml

- name: Docker Login for quay access
if: matrix.mode == 'server'
env:
QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_DOWNSTREAM_USERNAME }}
QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_DOWNSTREAM_TOKEN }}
Expand Down
4 changes: 4 additions & 0 deletions Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ RUN microdnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs jq
RUN mkdir -p /opt/app-root/src/.llama/storage /opt/app-root/src/.llama/providers.d && \
chown -R 1001:1001 /opt/app-root/src/.llama

# Create Hugging Face cache directory for embedding models
RUN mkdir -p /opt/app-root/src/.cache/huggingface && \
chown -R 1001:1001 /opt/app-root/src/.cache

# Add executables from .venv to system PATH
ENV PATH="/app-root/.venv/bin:$PATH"

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ dependencies = [
# Used by authentication/k8s integration
"kubernetes>=30.1.0",
# Used to call Llama Stack APIs
"llama-stack==0.3.5",
"llama-stack-client==0.3.5",
"llama-stack==0.4.2",
"llama-stack-client==0.4.2",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we sure there are no other dependencies?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we resolve this comment?

# Used by Logger
"rich>=14.0.0",
# Used by JWK token auth handler
Expand Down
9 changes: 1 addition & 8 deletions run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ apis:
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io

Expand Down Expand Up @@ -137,11 +136,7 @@ storage:
namespace: prompts
backend: kv_default
registered_resources:
models:
- model_id: gpt-4o-mini
provider_id: openai
model_type: llm
provider_model_id: gpt-4o-mini
models: []
shields:
- shield_id: llama-guard
provider_id: llama-guard
Expand All @@ -160,5 +155,3 @@ vector_stores:
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard
telemetry:
enabled: true
45 changes: 22 additions & 23 deletions src/app/endpoints/a2a.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@
from datetime import datetime, timezone
from typing import Annotated, Any, AsyncIterator, MutableMapping, Optional

from fastapi import APIRouter, Depends, HTTPException, Request, status
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObjectStream,
)
from llama_stack_client import APIConnectionError
from starlette.responses import Response, StreamingResponse

from a2a.server.agent_execution import AgentExecutor, RequestContext
from a2a.server.apps import A2AStarletteApplication
from a2a.server.events import EventQueue
from a2a.server.request_handlers import DefaultRequestHandler
from a2a.server.tasks import TaskStore
from a2a.server.tasks.task_updater import TaskUpdater
from a2a.types import (
AgentCapabilities,
AgentCard,
AgentSkill,
AgentProvider,
AgentCapabilities,
AgentSkill,
Artifact,
Message,
Part,
Expand All @@ -28,27 +27,27 @@
TaskStatusUpdateEvent,
TextPart,
)
from a2a.server.agent_execution import AgentExecutor, RequestContext
from a2a.server.events import EventQueue
from a2a.server.request_handlers import DefaultRequestHandler
from a2a.server.tasks import TaskStore
from a2a.server.tasks.task_updater import TaskUpdater
from a2a.server.apps import A2AStarletteApplication
from a2a.utils import new_agent_text_message, new_task
from fastapi import APIRouter, Depends, HTTPException, Request, status
from llama_stack_api.openai_responses import (
OpenAIResponseObjectStream,
)
from llama_stack_client import APIConnectionError
from starlette.responses import Response, StreamingResponse

from authentication.interface import AuthTuple
from authentication import get_auth_dependency
from authorization.middleware import authorize
from configuration import configuration
from a2a_storage import A2AStorageFactory, A2AContextStore
from models.config import Action
from models.requests import QueryRequest
from a2a_storage import A2AContextStore, A2AStorageFactory
from app.endpoints.query import (
select_model_and_provider_id,
evaluate_model_hints,
select_model_and_provider_id,
)
from app.endpoints.streaming_query_v2 import retrieve_response
from authentication import get_auth_dependency
from authentication.interface import AuthTuple
from authorization.middleware import authorize
from client import AsyncLlamaStackClientHolder
from configuration import configuration
from models.config import Action
from models.requests import QueryRequest
from utils.mcp_headers import mcp_headers_dependency
from utils.responses import extract_text_from_response_output_item
from version import __version__
Expand Down
9 changes: 4 additions & 5 deletions src/app/endpoints/conversations_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from llama_stack_client import (
APIConnectionError,
APIStatusError,
NOT_GIVEN,
)
from sqlalchemy.exc import SQLAlchemyError

Expand Down Expand Up @@ -332,10 +331,10 @@ async def get_conversation_endpoint_handler(
# Use Conversations API to retrieve conversation items
conversation_items_response = await client.conversations.items.list(
conversation_id=llama_stack_conv_id,
after=NOT_GIVEN,
include=NOT_GIVEN,
limit=NOT_GIVEN,
order=NOT_GIVEN,
after=None,
include=None,
limit=None,
order=None,
Comment on lines +334 to +337
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# First, let's see the file and the specific lines in question
cat -n src/app/endpoints/conversations_v3.py | sed -n '320,350p'

Repository: lightspeed-core/lightspeed-stack

Length of output: 1319


🏁 Script executed:

# Check if NOT_GIVEN is used elsewhere in this file
rg -n "NOT_GIVEN" src/app/endpoints/conversations_v3.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Search for NOT_GIVEN usage patterns across the codebase
rg -n "NOT_GIVEN" --type py | head -20

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Look for llama-stack-client imports and usage patterns
rg -n "from llama_stack_client" --type py | head -10

Repository: lightspeed-core/lightspeed-stack

Length of output: 1132


🏁 Script executed:

# Check if there's a requirements or dependencies file
fd -e txt -e toml -e cfg -e lock | grep -E "(requirements|pyproject|setup|lock)"

Repository: lightspeed-core/lightspeed-stack

Length of output: 270


🏁 Script executed:

# Check pyproject.toml for llama-stack-client version
cat pyproject.toml | grep -A 5 -B 5 "llama"

Repository: lightspeed-core/lightspeed-stack

Length of output: 1265


🏁 Script executed:

# Broader search for NOT_GIVEN across all files
rg -n "NOT_GIVEN" --type py

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Check if there are any test files for conversations endpoint to see expected patterns
find . -name "*conversations*" -type f | head -20

Repository: lightspeed-core/lightspeed-stack

Length of output: 543


🌐 Web query:

llama-stack-client 0.4.2 conversations items list API parameters None handling

💡 Result:

For llama-stack-client==0.4.2 (uploaded Jan 16, 2026) [1], the Conversations → Items → List call is a paginated “list” response (it includes first_id, last_id, has_more), and those pagination IDs can legitimately be None when absent/empty. [3]

None handling” (what to do in practice)

  • For request parameters (query/body): if you want a parameter to be omitted, don’t pass it at all (rather than passing None). Passing None typically means “send an explicit null / explicit value” and can change server-side validation/behavior.
  • For response fields: None can mean either the JSON field was missing or it was explicitly null. The client documents how to distinguish those cases using the Pydantic model’s .model_fields_set. [2]

If you paste the exact Python call you’re making (including the kwargs you’re passing as None) and the error/traceback, I can map it to the specific parameter(s) that should be omitted vs explicitly set.


🏁 Script executed:

# Check test files to see how conversations.items.list is properly called
cat -n tests/unit/app/endpoints/test_conversations_v2.py | head -100

Repository: lightspeed-core/lightspeed-stack

Length of output: 4100


🏁 Script executed:

# Check test_conversations_v3 if it exists to see pattern
ls -la tests/unit/app/endpoints/test_conversations* 2>/dev/null

Repository: lightspeed-core/lightspeed-stack

Length of output: 272


🏁 Script executed:

# Search for how other endpoints call list methods with optional parameters
rg -n "\.list\(" src/app/endpoints/ --type py -A 3 | head -40

Repository: lightspeed-core/lightspeed-stack

Length of output: 2711


🏁 Script executed:

# Check if there's any usage of omitting parameters instead of passing None
rg -n "conversations\.items\.list" --type py -A 5

Repository: lightspeed-core/lightspeed-stack

Length of output: 551


🌐 Web query:

llama-stack-client 0.4.2 NOT_GIVEN sentinel optional parameters conversations

💡 Result:

In llama-stack-client==0.4.2 (released Jan 16, 2026) (pypi.org), many SDK method signatures use a Stainless-style sentinel:

  • param: T | NotGiven = NOT_GIVEN
  • Meaning:
    • leave it as NOT_GIVEN (the default) ⇒ the field is omitted from the request body/query entirely
    • pass None (when the type allows Optional[...]) ⇒ the field is sent as JSON null (i.e., explicitly set/cleared)

This pattern is typical of Stainless-generated Python SDKs. (pypi.org)

Practical use with conversations

You normally don’t need to import/use NOT_GIVEN—just don’t pass the optional kwarg(s):

conv = client.conversations.create()

Creating a conversation like this is the standard usage pattern. (git.kvant.cloud)

You’d use NOT_GIVEN mainly when you’re programmatically assembling kwargs and want to ensure a key is not sent:

from llama_stack_client import NOT_GIVEN

kwargs = {"metadata": NOT_GIVEN}   # omit metadata entirely
conv = client.conversations.create(**kwargs)

Whereas:

conv = client.conversations.create(metadata=None)

would explicitly send metadata: null (only useful if the API treats “null” differently from “missing”).

Citations:


Remove explicit None parameters from conversations.items.list() call or use NOT_GIVEN sentinel.

In llama-stack-client 0.4.2, passing None sends an explicit JSON null value, which differs semantically from omitting the parameter. For after, include, limit, and order in the conversations API, omit these parameters entirely:

conversation_items_response = await client.conversations.items.list(
    conversation_id=llama_stack_conv_id,
)

Alternatively, if programmatically assembling parameters, import and use the NOT_GIVEN sentinel:

from llama_stack_client import NOT_GIVEN

conversation_items_response = await client.conversations.items.list(
    conversation_id=llama_stack_conv_id,
    after=NOT_GIVEN,
    include=NOT_GIVEN,
    limit=NOT_GIVEN,
    order=NOT_GIVEN,
)

This ensures the backend treats these as omitted parameters rather than explicitly set to null, preserving the intended default behavior.

🤖 Prompt for AI Agents
In `@src/app/endpoints/conversations_v3.py` around lines 334 - 337, The
conversations.items.list call is passing explicit None for parameters (after,
include, limit, order) which sends JSON nulls; update the call in the code that
invokes client.conversations.items.list (search for conversation_items_response
or llama_stack_conv_id usage) to either remove those keyword args entirely so
they are omitted, or import and pass the NOT_GIVEN sentinel from
llama_stack_client and set after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN,
order=NOT_GIVEN to preserve intended defaults.

)
items = (
conversation_items_response.data
Expand Down
14 changes: 13 additions & 1 deletion src/app/endpoints/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
"""

import logging
from enum import Enum
from typing import Annotated, Any

from fastapi import APIRouter, Depends, Response, status
from llama_stack.providers.datatypes import HealthStatus
from llama_stack_client import APIConnectionError

from authentication import get_auth_dependency
Expand All @@ -30,6 +30,18 @@
router = APIRouter(tags=["health"])


# HealthStatus enum was removed from llama_stack in newer versions
# Defining locally for compatibility
class HealthStatus(str, Enum):
"""Health status enum for provider health checks."""

OK = "ok"
ERROR = "Error"
NOT_IMPLEMENTED = "not_implemented"
HEALTHY = "healthy"
UNKNOWN = "unknown"


get_readiness_responses: dict[int | str, dict[str, Any]] = {
200: ReadinessResponse.openapi_response(),
401: UnauthorizedResponse.openapi_response(
Expand Down
40 changes: 38 additions & 2 deletions src/app/endpoints/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,41 @@
router = APIRouter(tags=["models"])


def parse_llama_stack_model(model: Any) -> dict[str, Any]:
"""
Parse llama-stack model.

Converting the new llama-stack model format (0.4.x) with custom_metadata.

Args:
model: Model object from llama-stack (has id, custom_metadata, object fields)

Returns:
dict: Model in legacy format with identifier, provider_id, model_type, etc.
"""
custom_metadata = getattr(model, "custom_metadata", {}) or {}

model_type = str(custom_metadata.get("model_type", "unknown"))

metadata = {
k: v
for k, v in custom_metadata.items()
if k not in ("provider_id", "provider_resource_id", "model_type")
}

legacy_model = {
"identifier": getattr(model, "id", ""),
"metadata": metadata,
"api_model_type": model_type,
"provider_id": str(custom_metadata.get("provider_id", "")),
"type": getattr(model, "object", "model"),
"provider_resource_id": str(custom_metadata.get("provider_resource_id", "")),
"model_type": model_type,
}

return legacy_model


models_responses: dict[int | str, dict[str, Any]] = {
200: ModelsResponse.openapi_response(),
401: UnauthorizedResponse.openapi_response(
Expand Down Expand Up @@ -72,8 +107,9 @@ async def models_endpoint_handler(
client = AsyncLlamaStackClientHolder().get_client()
# retrieve models
models = await client.models.list()
m = [dict(m) for m in models]
return ModelsResponse(models=m)
# Parse models to legacy format
parsed_models = [parse_llama_stack_model(model) for model in models]
return ModelsResponse(models=parsed_models)

# Connection to Llama Stack server failed
except APIConnectionError as e:
Expand Down
Loading
Loading