Skip to content

Commit 4041a50

Browse files
committed
feat(evaluation): add evaluation summary endpoint from trace aggregates
1 parent d52d354 commit 4041a50

File tree

5 files changed

+62
-44
lines changed

5 files changed

+62
-44
lines changed

app/api/router.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from app.retrieval.api import router as retrival_router
88
from app.ingestion.api import router as ingestion_router
99
from app.generation.api import router as generation_router
10+
from app.evaludation.api import router as evaluation_router
1011

1112

1213
router = APIRouter(prefix="/api/v1", tags=["api"])
@@ -22,3 +23,4 @@ async def ping() -> dict[str, Any]:
2223
router.include_router(retrival_router)
2324
router.include_router(ingestion_router)
2425
router.include_router(generation_router)
26+
router.include_router(evaluation_router)

app/evaludation/aggregate.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import json
2+
from pathlib import Path
3+
4+
5+
TRACE_FILE = Path(__file__).parent.parent.parent / "var/traces" / "rage_trace.jsonl"
6+
7+
8+
def aggregate_traces() -> dict:
9+
"""
10+
Aggregate traces from the JSONL trace file.
11+
Read the file line by line, parse each JSON record, and compute
12+
average metrics like recall_k and faithfulness.
13+
14+
:return: Aggregated trace records.
15+
:rtype: dict
16+
"""
17+
18+
total = 0
19+
recall_k_sum = 0.0
20+
recall_n = 0 # recall_k_count
21+
faithfulness_sum = 0.0
22+
faithfulness_n = 0 # faithfulness_count
23+
24+
if not TRACE_FILE.exists():
25+
return {
26+
"runs": 0,
27+
"avg_recall_k": None,
28+
"avg_faithfulness": None,
29+
}
30+
31+
with TRACE_FILE.open("r", encoding="utf-8") as f:
32+
for line in f:
33+
total += 1
34+
r = json.loads(line)
35+
if r.get("recall_k") is not None:
36+
recall_k_sum += r["recall_k"]
37+
recall_n += 1
38+
if r.get("faithfulness") is not None:
39+
faithfulness_sum += r["faithfulness"]
40+
faithfulness_n += 1
41+
42+
return {
43+
"runs": total,
44+
"avg_recall_k": round(recall_k_sum / recall_n, 4) if recall_n > 0 else None,
45+
"avg_faithfulness": round(faithfulness_sum / faithfulness_n, 4)
46+
if faithfulness_n > 0
47+
else None,
48+
}

app/evaludation/api.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from fastapi import APIRouter
2+
3+
from app.evaludation.aggregate import aggregate_traces
4+
5+
6+
router = APIRouter(prefix="/eval", tags=["evaluation"])
7+
8+
9+
@router.get("/summary")
10+
async def evaluation_summary() -> dict:
11+
return aggregate_traces()

app/generation/service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from app.generation.prompt_builder import build_prompt
66

77
from app.core.interfaces import BaseGenerator, BaseRetriever
8-
from app.generation.trace_writer import write_trace
8+
from app.evaludation.trace_writer import write_trace
99

1010

1111
logger = get_logger(__name__)

app/generation/trace_writer.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)