From 71254861a2e953eff2d29ef96943f19219087aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Wed, 7 Jan 2026 12:32:19 +0000 Subject: [PATCH 1/6] feat: add attestation buildup analysis notebook Add notebook 09-attestation-buildup analyzing how validator attestations accumulate over time. For each slot, attestations can be included in blocks up to 32 slots later. This analysis shows: - Attestation inclusion CDF heatmaps by slot and epoch - Correlation with blob count and block size - Block propagation timing effects on attestation inclusion - Time series trends of network health Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- notebooks/10-attestation-buildup.ipynb | 692 +++++++++++++++++++++++++ pipeline.yaml | 20 + queries/attestation_buildup.py | 97 ++++ site/src/components/Icon.astro | 3 +- 4 files changed, 811 insertions(+), 1 deletion(-) create mode 100644 notebooks/10-attestation-buildup.ipynb create mode 100644 queries/attestation_buildup.py diff --git a/notebooks/10-attestation-buildup.ipynb b/notebooks/10-attestation-buildup.ipynb new file mode 100644 index 0000000..a4aa821 --- /dev/null +++ b/notebooks/10-attestation-buildup.ipynb @@ -0,0 +1,692 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "Attestation buildup analysis showing how validator attestations accumulate over time. For each slot, attestations can be included in blocks up to 32 slots later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "from loaders import load_parquet, display_sql\n", + "\n", + "target_date = None # Set via papermill, or auto-detect from manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "display_sql(\"attestation_buildup\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_parquet(\"attestation_buildup\", target_date)\n", + "\n", + "# Create a dense grid: fill missing delays with previous cumulative value\n", + "slots = df[\"slot\"].unique()\n", + "delays = range(1, 33)\n", + "\n", + "# Pivot to get cumulative_pct for each (slot, delay)\n", + "df_pivot = df.pivot(index=\"slot\", columns=\"inclusion_delay\", values=\"cumulative_pct\")\n", + "\n", + "# Forward fill missing delays (if no attestations at delay N, use value from delay N-1)\n", + "df_pivot = df_pivot.reindex(columns=range(1, 33)).ffill(axis=1).fillna(0)\n", + "\n", + "# Get slot metadata (including block size)\n", + "slot_meta = df.drop_duplicates(\"slot\").set_index(\"slot\")[\n", + " [\"epoch\", \"time\", \"total_validators\", \"blob_count\", \"block_size_bytes\", \"block_first_seen_ms\"]\n", + "]\n", + "\n", + "# Convert block size to KB for readability\n", + "slot_meta[\"block_size_kb\"] = slot_meta[\"block_size_bytes\"] / 1024\n", + "\n", + "print(f\"Loaded {len(slots):,} slots\")\n", + "print(f\"Block size range: {slot_meta['block_size_kb'].min():.1f} - {slot_meta['block_size_kb'].max():.1f} KB\")\n", + "df_pivot.head()" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Attestation buildup heatmap\n", + "\n", + "Each row is a slot, each column is the inclusion delay (1-32). Color intensity shows cumulative percentage of attestations included by that delay. Bright = fast inclusion, dark = slow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# Sample slots for readable heatmap (every Nth slot)\n", + "sample_step = max(1, len(df_pivot) // 200)\n", + "df_sample = df_pivot.iloc[::sample_step]\n", + "\n", + "# Get time labels for y-axis\n", + "y_labels = [slot_meta.loc[s, \"time\"].strftime(\"%H:%M\") if s in slot_meta.index else str(s) for s in df_sample.index]\n", + "\n", + "fig = go.Figure(\n", + " data=go.Heatmap(\n", + " z=df_sample.values,\n", + " x=[str(d) for d in df_sample.columns],\n", + " y=y_labels,\n", + " colorscale=\"Viridis\",\n", + " zmin=0,\n", + " zmax=100,\n", + " colorbar=dict(title=\"Cumulative %\", ticksuffix=\"%\"),\n", + " hovertemplate=\"Time: %{y}
Delay: %{x} slots
Included: %{z:.1f}%\",\n", + " )\n", + ")\n", + "\n", + "fig.update_layout(\n", + " title=\"Attestation inclusion by delay\",\n", + " xaxis_title=\"Inclusion delay (slots)\",\n", + " yaxis_title=\"Time\",\n", + " yaxis=dict(autorange=\"reversed\"),\n", + " height=800,\n", + " margin=dict(l=80, r=30, t=50, b=60),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## CDF distribution at key delays\n", + "\n", + "Histogram showing the distribution of cumulative attestation percentage at delays 1, 2, 4, and 8 slots. Most slots should cluster near 100% for delay 1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "key_delays = [1, 2, 4, 8]\n", + "\n", + "fig = make_subplots(\n", + " rows=2, cols=2,\n", + " subplot_titles=[f\"Delay = {d} slot{'s' if d > 1 else ''}\" for d in key_delays],\n", + " horizontal_spacing=0.1,\n", + " vertical_spacing=0.12,\n", + ")\n", + "\n", + "for i, delay in enumerate(key_delays):\n", + " row, col = divmod(i, 2)\n", + " values = df_pivot[delay].dropna()\n", + " \n", + " fig.add_trace(\n", + " go.Histogram(\n", + " x=values,\n", + " nbinsx=50,\n", + " name=f\"Delay {delay}\",\n", + " marker_color=px.colors.sequential.Viridis[i * 2 + 2],\n", + " hovertemplate=\"%{x:.1f}%: %{y} slots\",\n", + " ),\n", + " row=row + 1, col=col + 1,\n", + " )\n", + " \n", + " # Add median line\n", + " median = values.median()\n", + " fig.add_vline(\n", + " x=median, line_dash=\"dash\", line_color=\"red\",\n", + " annotation_text=f\"median: {median:.1f}%\",\n", + " annotation_position=\"top right\",\n", + " row=row + 1, col=col + 1,\n", + " )\n", + "\n", + "fig.update_xaxes(title_text=\"Cumulative %\", range=[0, 105])\n", + "fig.update_yaxes(title_text=\"Slot count\")\n", + "fig.update_layout(\n", + " title=\"Distribution of attestation inclusion rates\",\n", + " height=600,\n", + " showlegend=False,\n", + " margin=dict(l=60, r=30, t=80, b=60),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## Average CDF curve\n", + "\n", + "Mean attestation buildup curve across all slots with percentile bands (5th-95th). Shows typical attestation propagation dynamics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "delays = list(range(1, 33))\n", + "mean_curve = df_pivot.mean()\n", + "p5 = df_pivot.quantile(0.05)\n", + "p25 = df_pivot.quantile(0.25)\n", + "p75 = df_pivot.quantile(0.75)\n", + "p95 = df_pivot.quantile(0.95)\n", + "\n", + "fig = go.Figure()\n", + "\n", + "# 5-95 percentile band\n", + "fig.add_trace(go.Scatter(\n", + " x=delays + delays[::-1],\n", + " y=list(p95) + list(p5)[::-1],\n", + " fill=\"toself\",\n", + " fillcolor=\"rgba(99, 110, 250, 0.15)\",\n", + " line=dict(color=\"rgba(255,255,255,0)\"),\n", + " name=\"5th-95th percentile\",\n", + " hoverinfo=\"skip\",\n", + "))\n", + "\n", + "# 25-75 percentile band\n", + "fig.add_trace(go.Scatter(\n", + " x=delays + delays[::-1],\n", + " y=list(p75) + list(p25)[::-1],\n", + " fill=\"toself\",\n", + " fillcolor=\"rgba(99, 110, 250, 0.3)\",\n", + " line=dict(color=\"rgba(255,255,255,0)\"),\n", + " name=\"25th-75th percentile\",\n", + " hoverinfo=\"skip\",\n", + "))\n", + "\n", + "# Mean curve\n", + "fig.add_trace(go.Scatter(\n", + " x=delays,\n", + " y=mean_curve,\n", + " mode=\"lines+markers\",\n", + " name=\"Mean\",\n", + " line=dict(color=\"#636EFA\", width=3),\n", + " marker=dict(size=6),\n", + " hovertemplate=\"Delay %{x}: %{y:.2f}%\",\n", + "))\n", + "\n", + "fig.update_layout(\n", + " title=\"Attestation buildup CDF (mean with percentile bands)\",\n", + " xaxis_title=\"Inclusion delay (slots)\",\n", + " yaxis_title=\"Cumulative attestations (%)\",\n", + " xaxis=dict(tickmode=\"linear\", dtick=2),\n", + " yaxis=dict(range=[0, 105]),\n", + " height=500,\n", + " legend=dict(yanchor=\"bottom\", y=0.02, xanchor=\"right\", x=0.98),\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "## Blob count correlation\n", + "\n", + "Box plot showing attestation inclusion at delay=1 grouped by blob count (0-6). Tests whether slots with more blobs experience slower attestation propagation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# Merge delay-1 inclusion rate with blob count\n", + "df_corr = pd.DataFrame({\n", + " \"slot\": df_pivot.index,\n", + " \"pct_at_delay_1\": df_pivot[1].values,\n", + "})\n", + "df_corr = df_corr.merge(slot_meta[[\"blob_count\"]], left_on=\"slot\", right_index=True)\n", + "\n", + "fig = go.Figure()\n", + "\n", + "unique_blobs = sorted(df_corr[\"blob_count\"].unique())\n", + "n_colors = len(unique_blobs)\n", + "colors = px.colors.sample_colorscale(\"Viridis\", [i / max(1, n_colors - 1) for i in range(n_colors)])\n", + "\n", + "for i, blob_count in enumerate(unique_blobs):\n", + " subset = df_corr[df_corr[\"blob_count\"] == blob_count]\n", + " fig.add_trace(go.Box(\n", + " y=subset[\"pct_at_delay_1\"],\n", + " name=str(blob_count),\n", + " boxpoints=\"outliers\",\n", + " marker_color=colors[i],\n", + " hovertemplate=\"%{y:.1f}%\",\n", + " ))\n", + "\n", + "fig.update_layout(\n", + " title=\"Attestation inclusion at delay=1 by blob count\",\n", + " xaxis_title=\"Blob count\",\n", + " yaxis_title=\"Cumulative % at delay 1\",\n", + " yaxis=dict(range=[0, 105]),\n", + " showlegend=False,\n", + " height=500,\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Block size correlation\n", + "\n", + "Scatter plot showing relationship between compressed block size (KB) and attestation inclusion at delay=1. Larger blocks may take longer to propagate, affecting attestation timing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Merge delay-1 inclusion rate with block size\n", + "df_block = pd.DataFrame({\n", + " \"slot\": df_pivot.index,\n", + " \"pct_at_delay_1\": df_pivot[1].values,\n", + "})\n", + "df_block = df_block.merge(\n", + " slot_meta[[\"blob_count\", \"block_size_kb\", \"block_first_seen_ms\"]], \n", + " left_on=\"slot\", right_index=True\n", + ")\n", + "\n", + "# Filter out slots with missing block size data\n", + "df_block = df_block[df_block[\"block_size_kb\"] > 0]\n", + "\n", + "# Scatter plot: block size vs attestation inclusion\n", + "fig = px.scatter(\n", + " df_block,\n", + " x=\"block_size_kb\",\n", + " y=\"pct_at_delay_1\",\n", + " color=\"blob_count\",\n", + " color_continuous_scale=\"Viridis\",\n", + " opacity=0.5,\n", + " labels={\n", + " \"block_size_kb\": \"Block size (KB)\",\n", + " \"pct_at_delay_1\": \"% included at delay 1\",\n", + " \"blob_count\": \"Blobs\",\n", + " },\n", + ")\n", + "fig.update_traces(marker=dict(size=4))\n", + "\n", + "# Add correlation coefficient\n", + "corr = df_block[\"block_size_kb\"].corr(df_block[\"pct_at_delay_1\"])\n", + "\n", + "fig.update_layout(\n", + " title=f\"Block size vs attestation inclusion (r = {corr:.3f})\",\n", + " height=500,\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + ")\n", + "fig.show()\n", + "\n", + "print(f\"Correlation (block size vs inclusion at delay 1): {corr:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## Block size bins\n", + "\n", + "Box plot showing attestation inclusion grouped by block size bins. Reveals whether larger blocks systematically experience slower attestation propagation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "# Create block size bins\n", + "df_block[\"size_bin\"] = pd.cut(\n", + " df_block[\"block_size_kb\"],\n", + " bins=[0, 40, 50, 60, 70, 80, 100, 150],\n", + " labels=[\"<40\", \"40-50\", \"50-60\", \"60-70\", \"70-80\", \"80-100\", \">100\"]\n", + ")\n", + "\n", + "fig = go.Figure()\n", + "\n", + "colors = px.colors.sequential.Plasma\n", + "for i, bin_label in enumerate([\"<40\", \"40-50\", \"50-60\", \"60-70\", \"70-80\", \"80-100\", \">100\"]):\n", + " subset = df_block[df_block[\"size_bin\"] == bin_label]\n", + " if len(subset) > 0:\n", + " fig.add_trace(go.Box(\n", + " y=subset[\"pct_at_delay_1\"],\n", + " name=f\"{bin_label} KB\",\n", + " boxpoints=\"outliers\",\n", + " marker_color=colors[i],\n", + " hovertemplate=\"%{y:.1f}%\",\n", + " ))\n", + "\n", + "fig.update_layout(\n", + " title=\"Attestation inclusion at delay=1 by block size\",\n", + " xaxis_title=\"Block size (KB)\",\n", + " yaxis_title=\"Cumulative % at delay 1\",\n", + " yaxis=dict(range=[0, 105]),\n", + " showlegend=False,\n", + " height=500,\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + ")\n", + "fig.show()\n", + "\n", + "# Print stats per bin\n", + "print(\"\\nStats by block size bin:\")\n", + "print(df_block.groupby(\"size_bin\")[\"pct_at_delay_1\"].agg([\"count\", \"mean\", \"median\"]).round(2))" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## Block propagation time correlation\n", + "\n", + "Scatter plot showing relationship between block first-seen time (ms after slot start) and attestation inclusion. Blocks that propagate later leave less time for attestations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter to reasonable propagation times (< 12 seconds = 1 slot)\n", + "df_prop = df_block[df_block[\"block_first_seen_ms\"] < 12000].copy()\n", + "\n", + "fig = px.scatter(\n", + " df_prop,\n", + " x=\"block_first_seen_ms\",\n", + " y=\"pct_at_delay_1\",\n", + " color=\"block_size_kb\",\n", + " color_continuous_scale=\"Plasma\",\n", + " opacity=0.5,\n", + " labels={\n", + " \"block_first_seen_ms\": \"Block first seen (ms after slot start)\",\n", + " \"pct_at_delay_1\": \"% included at delay 1\",\n", + " \"block_size_kb\": \"Size (KB)\",\n", + " },\n", + ")\n", + "fig.update_traces(marker=dict(size=4))\n", + "\n", + "# Add correlation coefficient\n", + "corr = df_prop[\"block_first_seen_ms\"].corr(df_prop[\"pct_at_delay_1\"])\n", + "\n", + "fig.update_layout(\n", + " title=f\"Block propagation time vs attestation inclusion (r = {corr:.3f})\",\n", + " height=500,\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + ")\n", + "fig.show()\n", + "\n", + "print(f\"Correlation (propagation time vs inclusion): {corr:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## Epoch-level aggregation\n", + "\n", + "Heatmap showing mean attestation inclusion rate per epoch at each delay. Reveals temporal trends in network health." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "# Add epoch to pivot data\n", + "df_with_epoch = df_pivot.copy()\n", + "df_with_epoch[\"epoch\"] = df_with_epoch.index.map(lambda s: slot_meta.loc[s, \"epoch\"] if s in slot_meta.index else None)\n", + "df_with_epoch = df_with_epoch.dropna(subset=[\"epoch\"])\n", + "\n", + "# Aggregate by epoch\n", + "epoch_agg = df_with_epoch.groupby(\"epoch\")[list(range(1, 33))].mean()\n", + "\n", + "# Get epoch times for labels\n", + "epoch_times = df.drop_duplicates(\"epoch\").set_index(\"epoch\")[\"time\"].to_dict()\n", + "y_labels = [epoch_times.get(e, pd.Timestamp(\"1970-01-01\")).strftime(\"%H:%M\") for e in epoch_agg.index]\n", + "\n", + "fig = go.Figure(\n", + " data=go.Heatmap(\n", + " z=epoch_agg.values,\n", + " x=[str(d) for d in epoch_agg.columns],\n", + " y=y_labels,\n", + " colorscale=\"Viridis\",\n", + " zmin=0,\n", + " zmax=100,\n", + " colorbar=dict(title=\"Mean %\", ticksuffix=\"%\"),\n", + " hovertemplate=\"Epoch time: %{y}
Delay: %{x} slots
Mean: %{z:.1f}%\",\n", + " )\n", + ")\n", + "\n", + "fig.update_layout(\n", + " title=\"Mean attestation inclusion by epoch\",\n", + " xaxis_title=\"Inclusion delay (slots)\",\n", + " yaxis_title=\"Epoch time\",\n", + " yaxis=dict(autorange=\"reversed\"),\n", + " height=600,\n", + " margin=dict(l=80, r=30, t=50, b=60),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "## Slow slots analysis\n", + "\n", + "Scatter plot highlighting slots with unusually slow attestation inclusion (<90% at delay 1). Size indicates total validators, color indicates blob count." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "# Identify slow slots\n", + "threshold = 90\n", + "df_analysis = df_corr.merge(\n", + " slot_meta[[\"time\", \"total_validators\", \"block_size_kb\", \"block_first_seen_ms\"]], \n", + " left_on=\"slot\", right_index=True\n", + ")\n", + "slow_slots = df_analysis[df_analysis[\"pct_at_delay_1\"] < threshold].copy()\n", + "\n", + "print(f\"Slots with <{threshold}% inclusion at delay 1: {len(slow_slots):,} ({100*len(slow_slots)/len(df_analysis):.1f}%)\")\n", + "\n", + "if len(slow_slots) > 0:\n", + " fig = px.scatter(\n", + " slow_slots,\n", + " x=\"time\",\n", + " y=\"pct_at_delay_1\",\n", + " size=\"block_size_kb\",\n", + " color=\"blob_count\",\n", + " color_continuous_scale=\"Viridis\",\n", + " hover_data={\"slot\": True, \"total_validators\": True, \"blob_count\": True, \"block_size_kb\": \":.1f\"},\n", + " labels={\n", + " \"time\": \"Time\",\n", + " \"pct_at_delay_1\": \"% at delay 1\",\n", + " \"blob_count\": \"Blobs\",\n", + " \"block_size_kb\": \"Block KB\",\n", + " },\n", + " )\n", + " fig.update_traces(\n", + " hovertemplate=\"Slot: %{customdata[0]}
Time: %{x}
Included: %{y:.1f}%
Block size: %{customdata[3]:.1f} KB
Blobs: %{customdata[2]}\",\n", + " )\n", + " fig.update_layout(\n", + " title=f\"Slow attestation slots (<{threshold}% at delay 1) - size = block size\",\n", + " yaxis=dict(range=[0, threshold + 5]),\n", + " height=500,\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + " )\n", + " fig.show()\n", + "else:\n", + " print(\"No slow slots found.\")" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "## Time series: delay-1 inclusion rate\n", + "\n", + "Rolling average of attestation inclusion at delay=1 over time. Shows network performance trends throughout the day." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "df_ts = df_analysis.sort_values(\"time\").copy()\n", + "df_ts[\"rolling_mean\"] = df_ts[\"pct_at_delay_1\"].rolling(window=32, min_periods=1).mean()\n", + "\n", + "fig = go.Figure()\n", + "\n", + "# Individual points (subsampled)\n", + "sample_step = max(1, len(df_ts) // 500)\n", + "df_sample = df_ts.iloc[::sample_step]\n", + "\n", + "fig.add_trace(go.Scatter(\n", + " x=df_sample[\"time\"],\n", + " y=df_sample[\"pct_at_delay_1\"],\n", + " mode=\"markers\",\n", + " marker=dict(size=3, color=\"#636EFA\", opacity=0.3),\n", + " name=\"Per-slot\",\n", + " hovertemplate=\"%{x}
%{y:.1f}%\",\n", + "))\n", + "\n", + "# Rolling average\n", + "fig.add_trace(go.Scatter(\n", + " x=df_ts[\"time\"],\n", + " y=df_ts[\"rolling_mean\"],\n", + " mode=\"lines\",\n", + " line=dict(color=\"#EF553B\", width=2),\n", + " name=\"32-slot rolling avg\",\n", + " hovertemplate=\"%{x}
%{y:.1f}%\",\n", + "))\n", + "\n", + "fig.update_layout(\n", + " title=\"Attestation inclusion at delay=1 over time\",\n", + " xaxis_title=\"Time\",\n", + " yaxis_title=\"Cumulative % at delay 1\",\n", + " yaxis=dict(range=[0, 105]),\n", + " legend=dict(yanchor=\"bottom\", y=0.02, xanchor=\"right\", x=0.98),\n", + " height=500,\n", + " margin=dict(l=60, r=30, t=50, b=60),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "24", + "metadata": {}, + "source": [ + "## Summary statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "# CDF summary by delay\n", + "summary = pd.DataFrame({\n", + " \"Delay\": [1, 2, 4, 8, 16, 32],\n", + " \"Mean %\": [df_pivot[d].mean() for d in [1, 2, 4, 8, 16, 32]],\n", + " \"Median %\": [df_pivot[d].median() for d in [1, 2, 4, 8, 16, 32]],\n", + " \"5th pct\": [df_pivot[d].quantile(0.05) for d in [1, 2, 4, 8, 16, 32]],\n", + " \"95th pct\": [df_pivot[d].quantile(0.95) for d in [1, 2, 4, 8, 16, 32]],\n", + "})\n", + "print(\"Attestation inclusion by delay:\")\n", + "display(summary.round(2))\n", + "\n", + "# Correlation summary\n", + "print(\"\\nCorrelation with delay-1 inclusion rate:\")\n", + "corr_data = {\n", + " \"Factor\": [\"Blob count\", \"Block size (KB)\", \"Block first seen (ms)\"],\n", + " \"Correlation\": [\n", + " df_block[\"blob_count\"].corr(df_block[\"pct_at_delay_1\"]),\n", + " df_block[\"block_size_kb\"].corr(df_block[\"pct_at_delay_1\"]),\n", + " df_block[df_block[\"block_first_seen_ms\"] < 12000][\"block_first_seen_ms\"].corr(\n", + " df_block[df_block[\"block_first_seen_ms\"] < 12000][\"pct_at_delay_1\"]\n", + " ),\n", + " ],\n", + "}\n", + "corr_df = pd.DataFrame(corr_data)\n", + "display(corr_df.round(4))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pipeline.yaml b/pipeline.yaml index 128ee29..700b740 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -119,6 +119,12 @@ queries: description: Block propagation by geographic region from Contributoor nodes output_file: block_propagation_by_region_contributoor.parquet + attestation_buildup: + module: queries.attestation_buildup + function: fetch_attestation_buildup + description: Attestation inclusion CDF per slot with blob correlation + output_file: attestation_buildup.parquet + # ============================================ # Notebook Registry # ============================================ @@ -258,6 +264,20 @@ notebooks: required: true order: 9 + - id: attestation-buildup + title: Attestation buildup + description: Attestation inclusion timing CDF showing propagation dynamics + icon: TrendingUp + source: notebooks/10-attestation-buildup.ipynb + schedule: daily + queries: + - attestation_buildup + parameters: + - name: target_date + type: date + required: true + order: 10 + # Schedule options: hourly, daily, weekly, manual # - hourly: Runs every hour, accumulating data throughout the day # - daily: Runs once per day at 1am UTC diff --git a/queries/attestation_buildup.py b/queries/attestation_buildup.py new file mode 100644 index 0000000..0e34979 --- /dev/null +++ b/queries/attestation_buildup.py @@ -0,0 +1,97 @@ +""" +Fetch functions for attestation buildup CDF analysis. + +Tracks how attestations accumulate over slots after the attested slot. +Attestations for slot A can be included in blocks up to slot A+32. +""" + +from pathlib import Path + + +def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str: + """Generate SQL date filter for a specific date.""" + return f"{column} >= '{target_date}' AND {column} < '{target_date}'::date + INTERVAL 1 DAY" + + +def fetch_attestation_buildup( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch attestation buildup CDF per slot. + + For each slot, shows cumulative attestation inclusion at each delay (1-32 slots). + Includes blob count and block size (compressed) for correlation analysis. + + Returns (df, query). + """ + date_filter = _get_date_filter(target_date) + + query = f""" +WITH attestation_counts AS ( + SELECT + slot, + epoch, + slot_start_date_time, + block_slot - slot AS inclusion_delay, + sum(length(validators)) AS validators_at_delay + FROM default.canonical_beacon_elaborated_attestation + WHERE meta_network_name = '{network}' + AND {date_filter} + AND block_slot - slot BETWEEN 1 AND 32 + GROUP BY slot, epoch, slot_start_date_time, block_slot - slot +), + +running_totals AS ( + SELECT + slot, + epoch, + slot_start_date_time, + inclusion_delay, + validators_at_delay, + sum(validators_at_delay) OVER (PARTITION BY slot ORDER BY inclusion_delay) AS cumulative_validators, + sum(validators_at_delay) OVER (PARTITION BY slot) AS total_validators + FROM attestation_counts +), + +blobs AS ( + SELECT + slot, + count(DISTINCT blob_index) AS blob_count + FROM default.canonical_beacon_blob_sidecar + WHERE meta_network_name = '{network}' + AND {date_filter} + GROUP BY slot +), + +block_sizes AS ( + SELECT + slot, + min(message_size) AS block_size_bytes, + min(propagation_slot_start_diff) AS block_first_seen_ms + FROM default.libp2p_gossipsub_beacon_block + WHERE meta_network_name = '{network}' + AND {date_filter} + GROUP BY slot +) + +SELECT + r.slot AS slot, + r.epoch AS epoch, + r.slot_start_date_time AS time, + r.inclusion_delay AS inclusion_delay, + r.validators_at_delay AS validators_at_delay, + r.cumulative_validators AS cumulative_validators, + r.total_validators AS total_validators, + round(r.cumulative_validators * 100.0 / r.total_validators, 4) AS cumulative_pct, + coalesce(b.blob_count, 0) AS blob_count, + coalesce(bs.block_size_bytes, 0) AS block_size_bytes, + coalesce(bs.block_first_seen_ms, 0) AS block_first_seen_ms +FROM running_totals r +LEFT JOIN blobs b ON r.slot = b.slot +LEFT JOIN block_sizes bs ON r.slot = bs.slot +ORDER BY r.slot, r.inclusion_delay +""" + + df = client.query_df(query) + return df, query diff --git a/site/src/components/Icon.astro b/site/src/components/Icon.astro index 73e8e9d..dff01b3 100644 --- a/site/src/components/Icon.astro +++ b/site/src/components/Icon.astro @@ -1,5 +1,5 @@ --- -import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Clock, Download, Eye, FileText, Gauge, Gavel, Grid3x3, Layers, Link, List, PanelLeft, Timer, XCircle } from 'lucide-react'; +import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Clock, Download, Eye, FileText, Gauge, Gavel, Grid3x3, Layers, Link, List, PanelLeft, Timer, TrendingUp, XCircle } from 'lucide-react'; interface Props { name: string; @@ -31,6 +31,7 @@ const icons: Record = { List, PanelLeft, Timer, + TrendingUp, XCircle, }; From 1a6dad8e22921a2daf3ef5bbb9aaaf4bcfcc8801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 29 Jan 2026 14:25:57 +0000 Subject: [PATCH 2/6] fix(queries): deduplicate validators across overlapping attestation aggregates The canonical_beacon_elaborated_attestation table stores multiple aggregate attestations per committee at different positions in the same block. These aggregates have massively overlapping validator sets (up to 98% overlap), causing sum(length(validators)) to overcount by 6-7x. Fix by using arrayUniq(arrayFlatten(groupArray(validators))) to count unique validators across all aggregates for each (slot, delay) combination. Also fixes: - Add nullif() to prevent division by zero on empty slots - Remove unused Path import - Remove unused numpy import from notebook - Fix markdown error: "Size indicates total validators" -> "block size" - Remove hardcoded blob count range "(0-6)" from markdown - Add box plot legend annotations per CLAUDE.md requirements --- notebooks/10-attestation-buildup.ipynb | 51 ++++++++++++++------------ queries/attestation_buildup.py | 7 ++-- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/notebooks/10-attestation-buildup.ipynb b/notebooks/10-attestation-buildup.ipynb index a4aa821..3d1a71d 100644 --- a/notebooks/10-attestation-buildup.ipynb +++ b/notebooks/10-attestation-buildup.ipynb @@ -19,7 +19,6 @@ }, "outputs": [], "source": [ - "import numpy as np\n", "import pandas as pd\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", @@ -260,7 +259,7 @@ "source": [ "## Blob count correlation\n", "\n", - "Box plot showing attestation inclusion at delay=1 grouped by blob count (0-6). Tests whether slots with more blobs experience slower attestation propagation." + "Box plot showing attestation inclusion at delay=1 grouped by blob count. Tests whether slots with more blobs experience slower attestation propagation." ] }, { @@ -300,7 +299,12 @@ " yaxis=dict(range=[0, 105]),\n", " showlegend=False,\n", " height=500,\n", - " margin=dict(l=60, r=30, t=50, b=60),\n", + " margin=dict(l=60, r=30, t=50, b=80),\n", + ")\n", + "fig.add_annotation(\n", + " text=\"Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.\",\n", + " xref=\"paper\", yref=\"paper\", x=0.5, y=-0.15,\n", + " showarrow=False, font=dict(size=10, color=\"gray\"),\n", ")\n", "fig.show()" ] @@ -322,13 +326,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Merge delay-1 inclusion rate with block size\n", - "df_block = pd.DataFrame({\n", - " \"slot\": df_pivot.index,\n", - " \"pct_at_delay_1\": df_pivot[1].values,\n", - "})\n", - "df_block = df_block.merge(\n", - " slot_meta[[\"blob_count\", \"block_size_kb\", \"block_first_seen_ms\"]], \n", + "# Extend df_corr with block size data for correlation analysis\n", + "df_block = df_corr.merge(\n", + " slot_meta[[\"block_size_kb\", \"block_first_seen_ms\"]], \n", " left_on=\"slot\", right_index=True\n", ")\n", "\n", @@ -351,7 +351,6 @@ ")\n", "fig.update_traces(marker=dict(size=4))\n", "\n", - "# Add correlation coefficient\n", "corr = df_block[\"block_size_kb\"].corr(df_block[\"pct_at_delay_1\"])\n", "\n", "fig.update_layout(\n", @@ -409,7 +408,12 @@ " yaxis=dict(range=[0, 105]),\n", " showlegend=False,\n", " height=500,\n", - " margin=dict(l=60, r=30, t=50, b=60),\n", + " margin=dict(l=60, r=30, t=50, b=80),\n", + ")\n", + "fig.add_annotation(\n", + " text=\"Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.\",\n", + " xref=\"paper\", yref=\"paper\", x=0.5, y=-0.15,\n", + " showarrow=False, font=dict(size=10, color=\"gray\"),\n", ")\n", "fig.show()\n", "\n", @@ -526,7 +530,7 @@ "source": [ "## Slow slots analysis\n", "\n", - "Scatter plot highlighting slots with unusually slow attestation inclusion (<90% at delay 1). Size indicates total validators, color indicates blob count." + "Scatter plot highlighting slots with unusually slow attestation inclusion (<90% at delay 1). Size indicates block size, color indicates blob count." ] }, { @@ -649,29 +653,28 @@ "outputs": [], "source": [ "# CDF summary by delay\n", + "key_delays = [1, 2, 4, 8, 16, 32]\n", "summary = pd.DataFrame({\n", - " \"Delay\": [1, 2, 4, 8, 16, 32],\n", - " \"Mean %\": [df_pivot[d].mean() for d in [1, 2, 4, 8, 16, 32]],\n", - " \"Median %\": [df_pivot[d].median() for d in [1, 2, 4, 8, 16, 32]],\n", - " \"5th pct\": [df_pivot[d].quantile(0.05) for d in [1, 2, 4, 8, 16, 32]],\n", - " \"95th pct\": [df_pivot[d].quantile(0.95) for d in [1, 2, 4, 8, 16, 32]],\n", + " \"Delay\": key_delays,\n", + " \"Mean %\": [df_pivot[d].mean() for d in key_delays],\n", + " \"Median %\": [df_pivot[d].median() for d in key_delays],\n", + " \"5th pct\": [df_pivot[d].quantile(0.05) for d in key_delays],\n", + " \"95th pct\": [df_pivot[d].quantile(0.95) for d in key_delays],\n", "})\n", "print(\"Attestation inclusion by delay:\")\n", "display(summary.round(2))\n", "\n", "# Correlation summary\n", "print(\"\\nCorrelation with delay-1 inclusion rate:\")\n", - "corr_data = {\n", + "df_filtered = df_block[df_block[\"block_first_seen_ms\"] < 12000]\n", + "corr_df = pd.DataFrame({\n", " \"Factor\": [\"Blob count\", \"Block size (KB)\", \"Block first seen (ms)\"],\n", " \"Correlation\": [\n", " df_block[\"blob_count\"].corr(df_block[\"pct_at_delay_1\"]),\n", " df_block[\"block_size_kb\"].corr(df_block[\"pct_at_delay_1\"]),\n", - " df_block[df_block[\"block_first_seen_ms\"] < 12000][\"block_first_seen_ms\"].corr(\n", - " df_block[df_block[\"block_first_seen_ms\"] < 12000][\"pct_at_delay_1\"]\n", - " ),\n", + " df_filtered[\"block_first_seen_ms\"].corr(df_filtered[\"pct_at_delay_1\"]),\n", " ],\n", - "}\n", - "corr_df = pd.DataFrame(corr_data)\n", + "})\n", "display(corr_df.round(4))" ] } diff --git a/queries/attestation_buildup.py b/queries/attestation_buildup.py index 0e34979..876370e 100644 --- a/queries/attestation_buildup.py +++ b/queries/attestation_buildup.py @@ -5,8 +5,6 @@ Attestations for slot A can be included in blocks up to slot A+32. """ -from pathlib import Path - def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str: """Generate SQL date filter for a specific date.""" @@ -34,7 +32,8 @@ def fetch_attestation_buildup( epoch, slot_start_date_time, block_slot - slot AS inclusion_delay, - sum(length(validators)) AS validators_at_delay + -- Deduplicate validators across overlapping aggregates in the same block + arrayUniq(arrayFlatten(groupArray(validators))) AS validators_at_delay FROM default.canonical_beacon_elaborated_attestation WHERE meta_network_name = '{network}' AND {date_filter} @@ -83,7 +82,7 @@ def fetch_attestation_buildup( r.validators_at_delay AS validators_at_delay, r.cumulative_validators AS cumulative_validators, r.total_validators AS total_validators, - round(r.cumulative_validators * 100.0 / r.total_validators, 4) AS cumulative_pct, + round(r.cumulative_validators * 100.0 / nullif(r.total_validators, 0), 4) AS cumulative_pct, coalesce(b.blob_count, 0) AS blob_count, coalesce(bs.block_size_bytes, 0) AS block_size_bytes, coalesce(bs.block_first_seen_ms, 0) AS block_first_seen_ms From 2f73cacbb3fea4b31590faf7e47b492972d8d370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 29 Jan 2026 14:29:23 +0000 Subject: [PATCH 3/6] feat(queries): extend attestation inclusion window per EIP-7045 EIP-7045 (Deneb) extended attestation validity from 32 slots to the end of epoch N+1, giving a variable window of 32-64 slots depending on the attestation's position within its epoch. Update query to capture delays 1-64 instead of 1-32, and update notebook visualizations (heatmaps, CDF curves, summary stats) accordingly. --- notebooks/10-attestation-buildup.ipynb | 18 +++++++++--------- queries/attestation_buildup.py | 10 ++++++---- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/notebooks/10-attestation-buildup.ipynb b/notebooks/10-attestation-buildup.ipynb index 3d1a71d..e312bb9 100644 --- a/notebooks/10-attestation-buildup.ipynb +++ b/notebooks/10-attestation-buildup.ipynb @@ -5,7 +5,7 @@ "id": "0", "metadata": {}, "source": [ - "Attestation buildup analysis showing how validator attestations accumulate over time. For each slot, attestations can be included in blocks up to 32 slots later." + "Attestation buildup analysis showing how validator attestations accumulate over time. Per EIP-7045 (Deneb), attestations can be included through the end of the next epoch, giving a variable window of 32-64 slots depending on position within the epoch." ] }, { @@ -50,13 +50,13 @@ "\n", "# Create a dense grid: fill missing delays with previous cumulative value\n", "slots = df[\"slot\"].unique()\n", - "delays = range(1, 33)\n", + "max_delay = 64 # EIP-7045 extended inclusion window\n", "\n", "# Pivot to get cumulative_pct for each (slot, delay)\n", "df_pivot = df.pivot(index=\"slot\", columns=\"inclusion_delay\", values=\"cumulative_pct\")\n", "\n", "# Forward fill missing delays (if no attestations at delay N, use value from delay N-1)\n", - "df_pivot = df_pivot.reindex(columns=range(1, 33)).ffill(axis=1).fillna(0)\n", + "df_pivot = df_pivot.reindex(columns=range(1, max_delay + 1)).ffill(axis=1).fillna(0)\n", "\n", "# Get slot metadata (including block size)\n", "slot_meta = df.drop_duplicates(\"slot\").set_index(\"slot\")[\n", @@ -78,7 +78,7 @@ "source": [ "## Attestation buildup heatmap\n", "\n", - "Each row is a slot, each column is the inclusion delay (1-32). Color intensity shows cumulative percentage of attestations included by that delay. Bright = fast inclusion, dark = slow." + "Each row is a slot, each column is the inclusion delay (1-64 per EIP-7045). Color intensity shows cumulative percentage of attestations included by that delay. Bright = fast inclusion, dark = slow." ] }, { @@ -197,7 +197,7 @@ "metadata": {}, "outputs": [], "source": [ - "delays = list(range(1, 33))\n", + "delays = list(range(1, 65))\n", "mean_curve = df_pivot.mean()\n", "p5 = df_pivot.quantile(0.05)\n", "p25 = df_pivot.quantile(0.25)\n", @@ -235,7 +235,7 @@ " mode=\"lines+markers\",\n", " name=\"Mean\",\n", " line=dict(color=\"#636EFA\", width=3),\n", - " marker=dict(size=6),\n", + " marker=dict(size=4),\n", " hovertemplate=\"Delay %{x}: %{y:.2f}%\",\n", "))\n", "\n", @@ -243,7 +243,7 @@ " title=\"Attestation buildup CDF (mean with percentile bands)\",\n", " xaxis_title=\"Inclusion delay (slots)\",\n", " yaxis_title=\"Cumulative attestations (%)\",\n", - " xaxis=dict(tickmode=\"linear\", dtick=2),\n", + " xaxis=dict(tickmode=\"linear\", dtick=4),\n", " yaxis=dict(range=[0, 105]),\n", " height=500,\n", " legend=dict(yanchor=\"bottom\", y=0.02, xanchor=\"right\", x=0.98),\n", @@ -493,7 +493,7 @@ "df_with_epoch = df_with_epoch.dropna(subset=[\"epoch\"])\n", "\n", "# Aggregate by epoch\n", - "epoch_agg = df_with_epoch.groupby(\"epoch\")[list(range(1, 33))].mean()\n", + "epoch_agg = df_with_epoch.groupby(\"epoch\")[list(range(1, 65))].mean()\n", "\n", "# Get epoch times for labels\n", "epoch_times = df.drop_duplicates(\"epoch\").set_index(\"epoch\")[\"time\"].to_dict()\n", @@ -653,7 +653,7 @@ "outputs": [], "source": [ "# CDF summary by delay\n", - "key_delays = [1, 2, 4, 8, 16, 32]\n", + "key_delays = [1, 2, 4, 8, 16, 32, 64]\n", "summary = pd.DataFrame({\n", " \"Delay\": key_delays,\n", " \"Mean %\": [df_pivot[d].mean() for d in key_delays],\n", diff --git a/queries/attestation_buildup.py b/queries/attestation_buildup.py index 876370e..b1ace62 100644 --- a/queries/attestation_buildup.py +++ b/queries/attestation_buildup.py @@ -2,7 +2,8 @@ Fetch functions for attestation buildup CDF analysis. Tracks how attestations accumulate over slots after the attested slot. -Attestations for slot A can be included in blocks up to slot A+32. +Per EIP-7045 (Deneb), attestations can be included through the end of the +next epoch, giving a variable window of 32-64 slots depending on position. """ @@ -18,8 +19,9 @@ def fetch_attestation_buildup( ) -> tuple: """Fetch attestation buildup CDF per slot. - For each slot, shows cumulative attestation inclusion at each delay (1-32 slots). - Includes blob count and block size (compressed) for correlation analysis. + For each slot, shows cumulative attestation inclusion at each delay (1-64 slots). + Per EIP-7045, attestations can be included through the end of epoch N+1. + Includes blob count and block size for correlation analysis. Returns (df, query). """ @@ -37,7 +39,7 @@ def fetch_attestation_buildup( FROM default.canonical_beacon_elaborated_attestation WHERE meta_network_name = '{network}' AND {date_filter} - AND block_slot - slot BETWEEN 1 AND 32 + AND block_slot - slot BETWEEN 1 AND 64 GROUP BY slot, epoch, slot_start_date_time, block_slot - slot ), From 05fcc8f4009741db090777fa19e65cbfc4bc3e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 29 Jan 2026 14:30:32 +0000 Subject: [PATCH 4/6] fix(queries): add GLOBAL to distributed table JOINs --- queries/attestation_buildup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/queries/attestation_buildup.py b/queries/attestation_buildup.py index b1ace62..e6fd1b5 100644 --- a/queries/attestation_buildup.py +++ b/queries/attestation_buildup.py @@ -89,8 +89,8 @@ def fetch_attestation_buildup( coalesce(bs.block_size_bytes, 0) AS block_size_bytes, coalesce(bs.block_first_seen_ms, 0) AS block_first_seen_ms FROM running_totals r -LEFT JOIN blobs b ON r.slot = b.slot -LEFT JOIN block_sizes bs ON r.slot = bs.slot +GLOBAL LEFT JOIN blobs b ON r.slot = b.slot +GLOBAL LEFT JOIN block_sizes bs ON r.slot = bs.slot ORDER BY r.slot, r.inclusion_delay """ From a52f306769adf0547b510316d31abebdf7b4ee75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 29 Jan 2026 15:07:23 +0000 Subject: [PATCH 5/6] feat(notebooks): add slot numbers to figure tooltips --- notebooks/10-attestation-buildup.ipynb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/notebooks/10-attestation-buildup.ipynb b/notebooks/10-attestation-buildup.ipynb index e312bb9..ba6abf9 100644 --- a/notebooks/10-attestation-buildup.ipynb +++ b/notebooks/10-attestation-buildup.ipynb @@ -94,17 +94,19 @@ "\n", "# Get time labels for y-axis\n", "y_labels = [slot_meta.loc[s, \"time\"].strftime(\"%H:%M\") if s in slot_meta.index else str(s) for s in df_sample.index]\n", + "slot_labels = list(df_sample.index)\n", "\n", "fig = go.Figure(\n", " data=go.Heatmap(\n", " z=df_sample.values,\n", " x=[str(d) for d in df_sample.columns],\n", " y=y_labels,\n", + " customdata=[[s] * len(df_sample.columns) for s in slot_labels],\n", " colorscale=\"Viridis\",\n", " zmin=0,\n", " zmax=100,\n", " colorbar=dict(title=\"Cumulative %\", ticksuffix=\"%\"),\n", - " hovertemplate=\"Time: %{y}
Delay: %{x} slots
Included: %{z:.1f}%\",\n", + " hovertemplate=\"Slot: %{customdata}
Time: %{y}
Delay: %{x} slots
Included: %{z:.1f}%\",\n", " )\n", ")\n", "\n", @@ -343,6 +345,7 @@ " color=\"blob_count\",\n", " color_continuous_scale=\"Viridis\",\n", " opacity=0.5,\n", + " hover_data={\"slot\": True, \"block_size_kb\": \":.1f\", \"pct_at_delay_1\": \":.1f\", \"blob_count\": True},\n", " labels={\n", " \"block_size_kb\": \"Block size (KB)\",\n", " \"pct_at_delay_1\": \"% included at delay 1\",\n", @@ -449,6 +452,7 @@ " color=\"block_size_kb\",\n", " color_continuous_scale=\"Plasma\",\n", " opacity=0.5,\n", + " hover_data={\"slot\": True, \"block_first_seen_ms\": True, \"pct_at_delay_1\": \":.1f\", \"block_size_kb\": \":.1f\"},\n", " labels={\n", " \"block_first_seen_ms\": \"Block first seen (ms after slot start)\",\n", " \"pct_at_delay_1\": \"% included at delay 1\",\n", @@ -609,10 +613,11 @@ "fig.add_trace(go.Scatter(\n", " x=df_sample[\"time\"],\n", " y=df_sample[\"pct_at_delay_1\"],\n", + " customdata=df_sample[\"slot\"],\n", " mode=\"markers\",\n", " marker=dict(size=3, color=\"#636EFA\", opacity=0.3),\n", " name=\"Per-slot\",\n", - " hovertemplate=\"%{x}
%{y:.1f}%\",\n", + " hovertemplate=\"Slot: %{customdata}
Time: %{x}
Included: %{y:.1f}%\",\n", "))\n", "\n", "# Rolling average\n", From 8277abc4b2f0e1811612b21792cc80bbfe94a721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Fri, 30 Jan 2026 11:09:06 +0000 Subject: [PATCH 6/6] fix(queries): deduplicate validators by first inclusion block The canonical_beacon_elaborated_attestation table stores attestations in every block where they were included, not just the first. This caused the same validator to be counted at multiple inclusion delays, inflating totals by ~2x. Fix: Use ARRAY JOIN to explode validators, then min(block_slot) per validator to find first inclusion. Each validator is now counted exactly once at their earliest inclusion delay. Verified: Slot 13564099 now shows 99.76% at delay 1 (was 49.94%). --- queries/attestation_buildup.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/queries/attestation_buildup.py b/queries/attestation_buildup.py index e6fd1b5..a581203 100644 --- a/queries/attestation_buildup.py +++ b/queries/attestation_buildup.py @@ -28,19 +28,32 @@ def fetch_attestation_buildup( date_filter = _get_date_filter(target_date) query = f""" -WITH attestation_counts AS ( +WITH first_inclusions AS ( + -- Find the first block where each validator's attestation was included SELECT slot, epoch, slot_start_date_time, - block_slot - slot AS inclusion_delay, - -- Deduplicate validators across overlapping aggregates in the same block - arrayUniq(arrayFlatten(groupArray(validators))) AS validators_at_delay + validator, + min(block_slot) AS first_block_slot FROM default.canonical_beacon_elaborated_attestation + ARRAY JOIN validators AS validator WHERE meta_network_name = '{network}' AND {date_filter} AND block_slot - slot BETWEEN 1 AND 64 - GROUP BY slot, epoch, slot_start_date_time, block_slot - slot + GROUP BY slot, epoch, slot_start_date_time, validator +), + +attestation_counts AS ( + -- Count validators at each inclusion delay (based on first inclusion only) + SELECT + slot, + epoch, + slot_start_date_time, + first_block_slot - slot AS inclusion_delay, + count() AS validators_at_delay + FROM first_inclusions + GROUP BY slot, epoch, slot_start_date_time, inclusion_delay ), running_totals AS (