1+ import time
12from app .core .logging import get_logger
3+ from app .core .metrics import APP_NAME , APP_PROMPT_BUILD_SECONDS
24from app .generation .models import GenerateAnswer , GenerationRequest , GenerationResponse
35from app .generation .prompt_builder import build_prompt
46
5- from app .core .interfaces import BaseRetriever
7+ from app .core .interfaces import BaseGenerator , BaseRetriever
68
79
810logger = get_logger (__name__ )
911
1012
1113async def generate_answer (
12- req : GenerationRequest , retriever : BaseRetriever
14+ req : GenerationRequest ,
15+ retriever : BaseRetriever ,
16+ generator : BaseGenerator ,
1317) -> GenerationResponse :
1418 """
1519 generate handles the text generation process by retrieving relevant documents
@@ -36,20 +40,22 @@ async def generate_answer(
3640 """
3741 logger .debug (f"Retrieved { len (retrieved_chunks )} chunks" )
3842
39- _ = build_prompt (req .query , retrieved_chunks )
43+ # prompt build metrics
44+ t0 = time .monotonic ()
45+ prompt = build_prompt (req .query , retrieved_chunks )
46+ APP_PROMPT_BUILD_SECONDS .labels (app_name = APP_NAME ).observe (time .monotonic () - t0 )
4047
41- synthesized = (
42- " " .join (chunk ["doc_id" ] for chunk in retrieved_chunks ) or "No context found."
43- )
48+ # Generation latency metrics
49+ t1 = time .monotonic ()
50+ answer_text = await generator .generate (prompt = prompt )
51+ APP_PROMPT_BUILD_SECONDS .labels (app_name = APP_NAME ).observe (time .monotonic () - t1 )
4452
45- logger .info (
46- f"Generated answer for query='{ req .query } ' using { len (retrieved_chunks )} "
47- )
53+ logger .info (f"Generated answer for query='{ req .query } '" )
4854
4955 return GenerationResponse (
5056 query = req .query ,
5157 answer = GenerateAnswer (
52- text = f"Mock answer: { synthesized } " ,
53- used_contexts = retrieved_chunks ,
58+ text = answer_text ,
59+ used_context = retrieved_chunks ,
5460 ),
5561 )
0 commit comments