Skip to content

Commit 0328704

Browse files
efahlaparcar
authored andcommitted
stats: rework summary statistics collection and display
The front page graph of "Builds per Day" is too minimal for any meaningful use. Let's rework it to show per-day numbers for: - total build requests - total failures - count of requests satisfied by already-built images Many internal enhancements were made and some bugs were fixed. - Centralize the mechanism for logging summary build events. - The old cache-hits value was completely incorrect, being logged for every download and hence was grossly over reporting. - Rework the aggregation bucket boundaries to land on midnight, so that the "per day" values really are per day. - Add lines to graph as mentioned above. - Add a test for the 'builds-per-day' API call. - Extend the summary stats test to verify all data. Since the old summary data format is no longer used, a conversion script is provided with this commit. See misc/stats_modernize.py, which is intended to be run once before this upgraded code is deployed (although it is safe to run multiple times, its only destructive act is to delete the invalid 'cache-hits'). Link: #1513 Signed-off-by: Eric Fahlgren <ericfahlgren@gmail.com>
1 parent a716d41 commit 0328704

File tree

8 files changed

+199
-59
lines changed

8 files changed

+199
-59
lines changed

asu/build.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from asu.package_changes import apply_package_changes
1616
from asu.util import (
1717
add_timestamp,
18+
add_build_event,
1819
check_manifest,
1920
diff_packages,
2021
fingerprint_pubkey_usign,
@@ -32,7 +33,7 @@
3233
log = logging.getLogger("rq.worker")
3334

3435

35-
def build(build_request: BuildRequest, job=None):
36+
def _build(build_request: BuildRequest, job=None):
3637
"""Build image request and setup ImageBuilders automatically
3738
3839
The `request` dict contains properties of the requested image.
@@ -429,3 +430,15 @@ def build(build_request: BuildRequest, job=None):
429430
job.save_meta()
430431

431432
return json_content
433+
434+
435+
def build(build_request: BuildRequest, job=None):
436+
try:
437+
result = _build(build_request, job)
438+
except Exception:
439+
# Log all build errors, including internal server errors.
440+
add_build_event("failures")
441+
raise
442+
else:
443+
add_build_event("successes")
444+
return result

asu/routers/api.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from asu.config import settings
1111
from asu.util import (
1212
add_timestamp,
13+
add_build_event,
1314
client_get,
1415
get_branch,
1516
get_queue,
@@ -194,9 +195,6 @@ def api_v1_build_get(request: Request, request_hash: str, response: Response) ->
194195
"detail": "could not find provided request hash",
195196
}
196197

197-
if job.is_finished and request.method != "HEAD":
198-
add_timestamp("stats:cache-hits", {"stats": "cache-hits"})
199-
200198
content, status, headers = return_job_v1(job)
201199
response.headers.update(headers)
202200
response.status_code = status
@@ -214,6 +212,8 @@ def api_v1_build_post(
214212
# Sanitize the profile in case the client did not (bug in older LuCI app).
215213
build_request.profile = build_request.profile.replace(",", "_")
216214

215+
add_build_event("requests")
216+
217217
request_hash: str = get_request_hash(build_request)
218218
job: Job = get_queue().fetch_job(request_hash)
219219
status: int = 200
@@ -235,7 +235,7 @@ def api_v1_build_post(
235235
)
236236

237237
if job is None:
238-
add_timestamp("stats:cache-misses", {"stats": "cache-misses"})
238+
add_build_event("cache-misses")
239239

240240
content, status = validate_request(request.app, build_request)
241241
if content:
@@ -261,7 +261,7 @@ def api_v1_build_post(
261261
)
262262
else:
263263
if job.is_finished:
264-
add_timestamp("stats:cache-hits", {"stats": "cache-hits"})
264+
add_build_event("cache-hits")
265265

266266
content, status, headers = return_job_v1(job)
267267
response.headers.update(headers)

asu/routers/stats.py

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,60 @@
1-
from datetime import datetime
1+
from datetime import datetime as dt, timedelta, UTC
22

33
from fastapi import APIRouter
44

5-
from asu.util import get_redis_client
5+
from asu.util import get_redis_ts
66

77
router = APIRouter()
88

99

10-
def get_redis_ts():
11-
return get_redis_client().ts()
10+
DAY_MS = 24 * 60 * 60 * 1000
11+
N_DAYS = 30
1212

1313

1414
@router.get("/builds-per-day")
15-
def get_builds_per_day():
16-
ts = get_redis_ts()
17-
now = int(datetime.utcnow().timestamp() * 1000)
18-
start = now - 30 * 24 * 60 * 60 * 1000 # last 30 days
15+
def get_builds_per_day() -> dict:
16+
"""
17+
References:
18+
https://redis.readthedocs.io/en/latest/redismodules.html#redis.commands.timeseries.commands.TimeSeriesCommands.range
19+
https://www.chartjs.org/docs/latest/charts/line.html
20+
"""
21+
22+
# "stop" is next midnight to define buckets on exact day boundaries.
23+
stop = dt.now(UTC).replace(hour=0, minute=0, second=0, microsecond=0)
24+
stop += timedelta(days=1)
25+
stop = int(stop.timestamp() * 1000)
26+
start = stop - N_DAYS * DAY_MS
27+
28+
stamps = list(range(start, stop, DAY_MS))
29+
labels = [str(dt.fromtimestamp(stamp // 1000, UTC))[:10] + "Z" for stamp in stamps]
1930

20-
# aggregate all time series labeled with stats=builds
21-
results = ts.mrange(
31+
ts = get_redis_ts()
32+
rc = ts.client
33+
range_options = dict(
2234
from_time=start,
23-
to_time=now,
24-
filters=["stats=builds"],
25-
with_labels=False,
35+
to_time=stop,
36+
align=start, # Ensures alignment of X values with "stamps".
2637
aggregation_type="sum",
27-
bucket_size_msec=86400000, # 1 day (24 hours)
38+
bucket_size_msec=DAY_MS,
2839
)
2940

30-
# create a map from timestamp to build count
31-
daily_counts = {}
32-
33-
for entry in results:
34-
data = list(entry.values())[0][1]
35-
for ts, value in data:
36-
daily_counts[ts] = daily_counts.get(ts, 0) + int(value)
37-
38-
# sort by timestamp
39-
sorted_data = sorted(daily_counts.items())
40-
41-
labels = [datetime.utcfromtimestamp(ts / 1000).isoformat() for ts, _ in sorted_data]
42-
values = [count for _, count in sorted_data]
41+
def get_dataset(event: str, color: str) -> dict:
42+
"""Fills "data" array completely, supplying 0 for missing values."""
43+
key = f"stats:build:{event}"
44+
result = ts.range(key, **range_options) if rc.exists(key) else []
45+
data_map = dict(result)
46+
return {
47+
"label": event.title(),
48+
"data": [data_map.get(stamp, 0) for stamp in stamps],
49+
"color": color,
50+
}
4351

4452
return {
4553
"labels": labels,
46-
"datasets": [{"label": "Builds per day", "data": values}],
54+
"datasets": [
55+
# See add_build_event for valid "event" values.
56+
get_dataset("requests", "green"),
57+
get_dataset("cache-hits", "orange"),
58+
get_dataset("failures", "red"),
59+
],
4760
}

asu/templates/overview.html

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,24 +116,22 @@ <h2>Builds per Day (last 30 days)</h2>
116116
labels: data.labels,
117117
datasets: data.datasets.map(ds => ({
118118
data: ds.data,
119+
label: ds.label,
120+
backgroundColor: ds.color,
121+
borderColor: ds.color,
122+
borderJoinStyle: "round",
119123
fill: false
120124
}))
121125
},
122126
options: {
123127
responsive: true,
124128
scales: {
125-
x: {
126-
display: false
127-
},
128-
y: {
129-
display: true
130-
}
129+
x: { display: false },
130+
y: { display: true }
131131
},
132132
plugins: {
133-
legend: {
134-
display: false
135-
},
136-
title: { display: false }
133+
legend: { display: true },
134+
title: { display: false }
137135
}
138136
}
139137
});

asu/util.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ def get_redis_client(unicode: bool = True) -> redis.client.Redis:
3232
return redis.from_url(settings.redis_url, decode_responses=unicode)
3333

3434

35+
def get_redis_ts():
36+
return get_redis_client().ts()
37+
38+
3539
def client_get(url: str, ttl: int = 3600) -> Response:
3640
return hishel.CacheClient(
3741
storage=hishel.RedisStorage(client=get_redis_client(False), ttl=ttl),
@@ -43,7 +47,7 @@ def add_timestamp(key: str, labels: dict[str, str] = {}, value: int = 1) -> None
4347
if not settings.server_stats:
4448
return
4549
log.debug(f"Adding timestamp to {key}: {labels}")
46-
get_redis_client().ts().add(
50+
get_redis_ts().add(
4751
key,
4852
value=value,
4953
timestamp="*",
@@ -52,6 +56,29 @@ def add_timestamp(key: str, labels: dict[str, str] = {}, value: int = 1) -> None
5256
)
5357

5458

59+
def add_build_event(event: str) -> None:
60+
"""
61+
Logs summary statistics for build events:
62+
63+
- requests - total number of calls to /build API, logged for all build
64+
requests, irrespective of validity, success or failure
65+
- cache-hits - count of build requests satisfied by already-existing builds
66+
- cache-misses - count of build requests sent to builder
67+
- successes - count of builder runs with successful completion
68+
- failures - count of builder runs that failed
69+
70+
Note that for validation, you can check that:
71+
- cache-misses = successes + failures
72+
- requests = cache-hits + cache-misses
73+
74+
The summary stats key prefix is 'stats:build:*'.
75+
"""
76+
assert event in {"requests", "cache-hits", "cache-misses", "successes", "failures"}
77+
78+
key: str = f"stats:build:{event}"
79+
add_timestamp(key, {"stats": "summary"})
80+
81+
5582
def get_queue() -> Queue:
5683
"""Return the current queue
5784

misc/stats_modernize.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from asu.util import get_redis_ts
2+
3+
stat_types = (
4+
"cache-hits",
5+
"cache-misses",
6+
"failures",
7+
"requests",
8+
"successes",
9+
)
10+
11+
ts = get_redis_ts()
12+
rc = ts.client
13+
14+
converted = rc.exists("stats:build:requests")
15+
force = False
16+
if converted and not force:
17+
print("Already converted =====================")
18+
else:
19+
print("Converting ============================")
20+
21+
if rc.exists("stats:cache-misses"):
22+
# Note: "rename" overwrites any existing destination...
23+
rc.rename("stats:cache-misses", "stats:build:cache-misses")
24+
if rc.exists("stats:cache-hits"):
25+
# Old stats are completely incorrect, so just delete them.
26+
rc.delete("stats:cache-hits")
27+
28+
for stat_type in stat_types:
29+
key = f"stats:build:{stat_type}"
30+
func = ts.alter if rc.exists(key) else ts.create
31+
func(key, labels={"stats": "summary"}, duplicate_policy="sum")
32+
33+
# Attempt to repopulate total requests and success values as
34+
# accurately as possible using existing stats:builds:* data.
35+
ts.delete("stats:build:requests", "-", "+") # Empty them out.
36+
ts.delete("stats:build:successes", "-", "+")
37+
all_builds = ts.mrange("-", "+", filters=["stats=builds"])
38+
for build in all_builds:
39+
_, data = build.popitem()
40+
series = data[1]
41+
for stamp, value in series:
42+
ts.add("stats:build:requests", timestamp=stamp, value=value)
43+
ts.add("stats:build:successes", timestamp=stamp, value=value)
44+
45+
for stat_type in stat_types:
46+
key = f"stats:build:{stat_type}"
47+
print(f"{key:<25} - {ts.info(key).total_samples} samples")

tests/test_api.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,14 @@ def save_meta(self):
157157
try:
158158
build(build_request, fake_job())
159159
except Exception as exc:
160+
chain = exc
161+
while hasattr(chain, "__context__") and chain.__context__:
162+
# We want the original exception, not anything that FakeRedis
163+
# generated during processing of it.
164+
chain = chain.__context__
165+
if isinstance(chain, RuntimeError):
166+
exc = chain
167+
break
160168
assert str(exc).startswith(
161169
"Image not found: ghcr.io/openwrt/imagebuilder:lantiq-xrx200-v24.10.1"
162170
)

0 commit comments

Comments
 (0)