Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/app/endpoints/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,12 @@ async def query_endpoint_handler_base( # pylint: disable=R0914

consume_tokens(
configuration.quota_limiters,
configuration.token_usage_history,
user_id,
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens,
model_id=model_id,
provider_id=provider_id,
)

store_conversation_into_cache(
Expand Down
3 changes: 3 additions & 0 deletions src/app/endpoints/streaming_query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,12 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
)
consume_tokens(
configuration.quota_limiters,
configuration.token_usage_history,
context.user_id,
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens,
model_id=context.model_id,
provider_id=context.provider_id,
)
referenced_documents = parse_referenced_documents_from_responses_api(
cast(OpenAIResponseObject, latest_response_object)
Expand Down
19 changes: 19 additions & 0 deletions src/utils/quota.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,52 @@
"""Quota handling helper functions."""

from typing import Optional

import psycopg2
from fastapi import HTTPException

from log import get_logger
from models.responses import InternalServerErrorResponse, QuotaExceededResponse
from quota.quota_exceed_error import QuotaExceedError
from quota.quota_limiter import QuotaLimiter
from quota.token_usage_history import TokenUsageHistory

logger = get_logger(__name__)


# pylint: disable=R0913,R0917
def consume_tokens(
quota_limiters: list[QuotaLimiter],
token_usage_history: Optional[TokenUsageHistory],
user_id: str,
input_tokens: int,
output_tokens: int,
model_id: str,
provider_id: str,
) -> None:
"""Consume tokens from cluster and/or user quotas.

Parameters:
quota_limiters: List of quota limiter instances to consume tokens from.
token_usage_history: Optional instance of TokenUsageHistory class that records used tokens
user_id: Identifier of the user consuming tokens.
input_tokens: Number of input tokens to consume.
output_tokens: Number of output tokens to consume.
model_id: Model identification
provider_id: Provider identification

Returns:
None
"""
# record token usage history
if token_usage_history is not None:
token_usage_history.consume_tokens(
user_id=user_id,
provider=provider_id,
model=model_id,
input_tokens=input_tokens,
output_tokens=output_tokens,
)
# consume tokens all configured quota limiters
for quota_limiter in quota_limiters:
quota_limiter.consume_tokens(
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/endpoints/test_query_v2_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,9 @@ async def test_query_v2_endpoint_quota_integration(
mock_consume.assert_called_once()
consume_args = mock_consume.call_args
user_id, _, _, _ = test_auth
assert consume_args.args[1] == user_id
assert consume_args.args[2] == user_id
assert consume_args.kwargs["model_id"] == "test-model"
assert consume_args.kwargs["provider_id"] == "test-provider"
assert consume_args.kwargs["input_tokens"] == 100
assert consume_args.kwargs["output_tokens"] == 50
Comment on lines 1150 to 1157
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add assertion to verify token_usage_history parameter is passed to consume_tokens.

The PR objective is to "store info about consumed tokens into token usage history," and the AI summary confirms that token_usage_history is now passed to the quota consumer. However, this test doesn't verify that the token_usage_history parameter is actually being passed to consume_tokens.

The test verifies model_id, provider_id, input_tokens, and output_tokens, but the core feature—passing token usage history—is not validated.

🔎 Suggested assertion to add
 mock_consume.assert_called_once()
 consume_args = mock_consume.call_args
 user_id, _, _, _ = test_auth
 assert consume_args.args[2] == user_id
+assert consume_args.kwargs.get("token_usage_history") is not None
 assert consume_args.kwargs["model_id"] == "test-model"
 assert consume_args.kwargs["provider_id"] == "test-provider"
 assert consume_args.kwargs["input_tokens"] == 100
 assert consume_args.kwargs["output_tokens"] == 50

Alternatively, if token_usage_history is in positional args, verify it at the appropriate index (likely args[0] or args[1]):

 mock_consume.assert_called_once()
 consume_args = mock_consume.call_args
 user_id, _, _, _ = test_auth
+# Verify token_usage_history is passed (adjust index based on actual signature)
+assert consume_args.args[0] is not None or consume_args.args[1] is not None
 assert consume_args.args[2] == user_id
 assert consume_args.kwargs["model_id"] == "test-model"
 assert consume_args.kwargs["provider_id"] == "test-provider"
 assert consume_args.kwargs["input_tokens"] == 100
 assert consume_args.kwargs["output_tokens"] == 50
🤖 Prompt for AI Agents
In @tests/integration/endpoints/test_query_v2_integration.py around lines
1150-1157, Add an assertion that verifies the token_usage_history argument is
passed to the quota consumer: after capturing consume_args from
mock_consume.call_args, assert that consume_args.kwargs["token_usage_history"]
equals the expected token_usage_history (or, if token_usage_history is passed
positionally, assert the correct args[index] matches the expected
token_usage_history). Use the same expected token usage object used in the test
setup to compare; reference mock_consume / consume_tokens via consume_args to
locate where to add the assertion.


Expand Down
Loading