Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.ci
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ ATTESTATION_HOST = "attestation"
ATTESTATION_PORT = 8080

# nilAuth Trusted URLs
NILAUTH_TRUSTED_ROOT_ISSUERS = "http://nilauth-credit-server:3000" # "http://nilauth:30921"
NILAUTH_TRUSTED_ROOT_ISSUERS = "http://nilauth-credit-server:3000"
CREDIT_API_TOKEN = "n i l l i o n"

# Admin token for pricing management API
ADMIN_TOKEN = "SecretAdminToken"

# Postgres Docker Compose Config
POSTGRES_HOST = "postgres"
POSTGRES_USER = "user"
Expand Down
9 changes: 8 additions & 1 deletion nilai-api/src/nilai_api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from fastapi import Depends, FastAPI
from nilai_api.auth import get_auth_info
from nilai_api.rate_limiting import setup_redis_conn
from nilai_api.routers import private, public
from nilai_api.routers import private, public, pricing
from nilai_api.pricing_service import PricingService, set_pricing_service
from nilai_api import config
from contextlib import asynccontextmanager
from fastapi.middleware.cors import CORSMiddleware
Expand All @@ -16,6 +17,11 @@
async def lifespan(app: FastAPI):
client, rate_limit_command = await setup_redis_conn(config.CONFIG.redis.url)

# Initialize pricing service
pricing_service = PricingService(client)
await pricing_service.initialize_from_config()
set_pricing_service(pricing_service)

yield {"redis": client, "redis_rate_limit_command": rate_limit_command}


Expand Down Expand Up @@ -88,6 +94,7 @@ async def lifespan(app: FastAPI):

app.include_router(public.router)
app.include_router(private.router, dependencies=[Depends(get_auth_info)])
app.include_router(pricing.router)

app.add_middleware(
CORSMiddleware,
Expand Down
9 changes: 8 additions & 1 deletion nilai-api/src/nilai_api/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .nildb import NilDBConfig
from .web_search import WebSearchSettings
from .rate_limiting import RateLimitingConfig
from .pricing import LLMPricingConfig, LLMPriceConfig
from .utils import create_config_model, CONFIG_DATA


Expand Down Expand Up @@ -37,6 +38,9 @@ class NilAIConfig(BaseModel):
nildb: NilDBConfig = create_config_model(
NilDBConfig, "nildb", CONFIG_DATA, "NILDB_"
)
llm_pricing: LLMPricingConfig = create_config_model(
LLMPricingConfig, "llm_pricing", CONFIG_DATA
)

def prettify(self):
"""Print the config in a pretty format removing passwords and other sensitive information"""
Expand Down Expand Up @@ -66,7 +70,10 @@ def prettify(self):
CONFIG = NilAIConfig()
__all__ = [
# Main config object
"CONFIG"
"CONFIG",
# Pricing config for external use
"LLMPriceConfig",
"LLMPricingConfig",
]

logging.info(CONFIG.prettify())
Expand Down
3 changes: 3 additions & 0 deletions nilai-api/src/nilai_api/config/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ class AuthConfig(BaseModel):
auth_token: Optional[str] = Field(
default=None, description="Auth token for e2e tests and development"
)
admin_token: Optional[str] = Field(
default=None, description="Admin token for pricing updates"
)

@property
def credit_service_url(self) -> str:
Expand Down
31 changes: 0 additions & 31 deletions nilai-api/src/nilai_api/config/config-a779.yaml

This file was deleted.

35 changes: 0 additions & 35 deletions nilai-api/src/nilai_api/config/config-e176.yaml

This file was deleted.

35 changes: 0 additions & 35 deletions nilai-api/src/nilai_api/config/config-f910.yaml

This file was deleted.

29 changes: 29 additions & 0 deletions nilai-api/src/nilai_api/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ auth:
strategy: "api_key"
nilauth_trusted_root_issuers:
- http://nilauth-credit-server:3000
admin_token: null # Set via ADMIN_TOKEN env var for pricing management

# Documentation Configuration
docs:
Expand Down Expand Up @@ -46,3 +47,31 @@ rate_limiting:
openai/gpt-oss-20b: 50
google/gemma-3-27b-it: 50
default: 50

# LLM Pricing Configuration
llm_pricing:
default:
prompt_tokens_price: 0.15
completion_tokens_price: 0.45
web_search_cost: 0.05
models:
meta-llama/Llama-3.2-1B-Instruct:
prompt_tokens_price: 0.03
completion_tokens_price: 0.09
web_search_cost: 0.05
meta-llama/Llama-3.1-8B-Instruct:
prompt_tokens_price: 0.03
completion_tokens_price: 0.09
web_search_cost: 0.05
openai/gpt-oss-20b:
prompt_tokens_price: 0.15
completion_tokens_price: 0.45
web_search_cost: 0.05
google/gemma-3-27b-it:
prompt_tokens_price: 0.15
completion_tokens_price: 0.45
web_search_cost: 0.05
Qwen/Qwen3-Coder-30B-A3B-Instruct:
prompt_tokens_price: 0.15
completion_tokens_price: 0.45
web_search_cost: 0.05
21 changes: 21 additions & 0 deletions nilai-api/src/nilai_api/config/pricing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Dict
from pydantic import BaseModel, Field


class LLMPriceConfig(BaseModel):
"""Pricing configuration for a single LLM model."""

prompt_tokens_price: float = Field(
default=2.0, description="Cost per 1M prompt tokens"
)
completion_tokens_price: float = Field(
default=2.0, description="Cost per 1M completion tokens"
)
web_search_cost: float = Field(default=0.05, description="Cost per web search")


class LLMPricingConfig(BaseModel):
"""Container for all LLM pricing configurations."""

default: LLMPriceConfig = Field(default_factory=LLMPriceConfig)
models: Dict[str, LLMPriceConfig] = Field(default_factory=dict)
33 changes: 21 additions & 12 deletions nilai-api/src/nilai_api/credit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)

from nilai_api.config import CONFIG
from nilai_api.pricing_service import get_pricing_service

from nuc.envelope import NucTokenEnvelope

Expand Down Expand Up @@ -51,6 +52,22 @@ def default() -> "LLMCost":
prompt_tokens_price=2.0, completion_tokens_price=2.0, web_search_cost=0.05
)

@staticmethod
async def from_redis(model_name: str) -> "LLMCost":
"""Fetch pricing from Redis for a specific model."""
try:
pricing_service = get_pricing_service()
price_config = await pricing_service.get_price(model_name)
return LLMCost(
prompt_tokens_price=price_config.prompt_tokens_price,
completion_tokens_price=price_config.completion_tokens_price,
web_search_cost=price_config.web_search_cost,
)
except RuntimeError:
# Pricing service not initialized, use default
logger.warning("Pricing service not initialized, using default pricing")
return LLMCost.default()

def total_cost(
self, prompt_tokens: int, completion_tokens: int, web_searches: int
) -> float:
Expand Down Expand Up @@ -87,14 +104,6 @@ class LLMResponse(BaseModel):

LLMCostDict: TypeAlias = dict[str, LLMCost]


MyCostDictionary: LLMCostDict = {
"meta-llama/Llama-3.2-1B-Instruct": LLMCost(
prompt_tokens_price=3.0, completion_tokens_price=3.0, web_search_cost=0.05
),
"default": LLMCost.default(),
}

# Configure the singleton credit client
CreditClientSingleton.configure(
base_url=CONFIG.auth.credit_service_url,
Expand Down Expand Up @@ -138,10 +147,10 @@ async def extractor(request: Request) -> str:
return extractor


def llm_cost_calculator(llm_cost_dict: LLMCostDict):
def llm_cost_calculator():
async def calculator(request: Request, response_data: dict) -> float:
model_name = getattr(request, "model", "default")
llm_cost = llm_cost_dict.get(model_name, LLMCost.default())
llm_cost = await LLMCost.from_redis(model_name)
total_cost = 0.0
usage: Optional[LLMUsage] = response_data.get("usage", None)
if usage is None:
Expand All @@ -157,8 +166,8 @@ async def calculator(request: Request, response_data: dict) -> float:

_base_llm_meter = create_metering_dependency(
credential_extractor=credential_extractor(),
estimated_cost=2.0,
cost_calculator=llm_cost_calculator(MyCostDictionary),
estimated_cost=0.5,
cost_calculator=llm_cost_calculator(),
public_identifiers=CONFIG.auth.auth_strategy == "nuc",
)

Expand Down
Loading
Loading