From 94be7f31f44aa25342c5f7afcbb367d47a0774db Mon Sep 17 00:00:00 2001 From: raulk Date: Wed, 4 Feb 2026 14:45:33 +0000 Subject: [PATCH 1/2] feat(notebooks): add transport protocol analysis notebook Analyzes QUIC vs TCP transport protocol distribution across the Ethereum mainnet P2P network using libp2p connection data from Xatu sentries. Features: - Overall transport protocol distribution (pie chart) - Daily connection trends by protocol (stacked bar) - Unique peer breakdown by protocol capability - Client implementation QUIC adoption rates - Connection frequency patterns by protocol - Summary statistics table Uses 7-day rolling window instead of daily snapshots since transport protocol analysis benefits from aggregate view. --- notebooks/10-transport-protocols.ipynb | 566 +++++++++++++++++++++++++ 1 file changed, 566 insertions(+) create mode 100644 notebooks/10-transport-protocols.ipynb diff --git a/notebooks/10-transport-protocols.ipynb b/notebooks/10-transport-protocols.ipynb new file mode 100644 index 0000000..b920275 --- /dev/null +++ b/notebooks/10-transport-protocols.ipynb @@ -0,0 +1,566 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "Analysis of transport protocol usage (QUIC vs TCP) in Ethereum mainnet libp2p connections.\n", + "\n", + "This notebook examines the distribution of QUIC and TCP transport protocols across the network, with breakdown by consensus client implementation. Data is sourced from the EthPandaOps Xatu sentries observing libp2p connection events." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "import clickhouse_connect\n", + "import pandas as pd\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "\n", + "lookback_days = 7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "client = clickhouse_connect.get_client(\n", + " host=os.environ[\"CLICKHOUSE_HOST\"],\n", + " port=int(os.environ.get(\"CLICKHOUSE_PORT\", 8443)),\n", + " username=os.environ[\"CLICKHOUSE_USER\"],\n", + " password=os.environ[\"CLICKHOUSE_PASSWORD\"],\n", + " secure=True,\n", + " autogenerate_session_id=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "## Overall transport protocol distribution\n", + "\n", + "Breakdown of all libp2p connections by transport protocol. UDP indicates QUIC transport, TCP indicates traditional TCP connections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "df_overall = client.query_df(f\"\"\"\n", + "SELECT \n", + " remote_transport_protocol as transport,\n", + " count(*) as connections,\n", + " round(count(*) * 100.0 / sum(count(*)) OVER (), 2) as percentage\n", + "FROM default.libp2p_connected\n", + "WHERE meta_network_name = 'mainnet'\n", + " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", + "GROUP BY remote_transport_protocol\n", + "ORDER BY connections DESC\n", + "\"\"\")\n", + "\n", + "df_overall[\"label\"] = df_overall.apply(\n", + " lambda r: f\"{'QUIC' if r['transport'] == 'udp' else 'TCP'} ({r['percentage']:.1f}%)\", \n", + " axis=1\n", + ")\n", + "\n", + "fig = px.pie(\n", + " df_overall,\n", + " values=\"connections\",\n", + " names=\"label\",\n", + " color=\"transport\",\n", + " color_discrete_map={\"tcp\": \"#3b82f6\", \"udp\": \"#22c55e\"},\n", + " hole=0.4,\n", + ")\n", + "fig.update_layout(\n", + " title=f\"Transport protocol distribution (last {lookback_days} days)\",\n", + " height=400,\n", + " showlegend=True,\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=-0.15, xanchor=\"center\", x=0.5),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## Daily transport protocol trends\n", + "\n", + "Daily breakdown showing TCP vs QUIC connection counts over time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "df_daily = client.query_df(f\"\"\"\n", + "SELECT \n", + " toDate(event_date_time) as date,\n", + " remote_transport_protocol as transport,\n", + " count(*) as connections,\n", + " uniqExact(remote_peer_id_unique_key) as unique_peers\n", + "FROM default.libp2p_connected\n", + "WHERE meta_network_name = 'mainnet'\n", + " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", + "GROUP BY date, transport\n", + "ORDER BY date, transport\n", + "\"\"\")\n", + "\n", + "df_daily[\"transport_label\"] = df_daily[\"transport\"].map({\"tcp\": \"TCP\", \"udp\": \"QUIC\"})\n", + "\n", + "fig = px.bar(\n", + " df_daily,\n", + " x=\"date\",\n", + " y=\"connections\",\n", + " color=\"transport_label\",\n", + " color_discrete_map={\"TCP\": \"#3b82f6\", \"QUIC\": \"#22c55e\"},\n", + " barmode=\"stack\",\n", + " labels={\"date\": \"Date\", \"connections\": \"Connections\", \"transport_label\": \"Transport\"},\n", + ")\n", + "fig.update_layout(\n", + " title=\"Daily connections by transport protocol\",\n", + " height=400,\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Unique peers by transport protocol\n", + "\n", + "Unique peer analysis showing how many distinct peers connected via each transport protocol. Note that some peers connect via both protocols." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "df_peer_breakdown = client.query_df(f\"\"\"\n", + "WITH peer_protocols AS (\n", + " SELECT \n", + " remote_peer_id_unique_key,\n", + " groupUniqArray(remote_transport_protocol) as protocols\n", + " FROM default.libp2p_connected\n", + " WHERE meta_network_name = 'mainnet'\n", + " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", + " GROUP BY remote_peer_id_unique_key\n", + ")\n", + "SELECT \n", + " countIf(has(protocols, 'tcp') AND NOT has(protocols, 'udp')) as tcp_only,\n", + " countIf(has(protocols, 'udp') AND NOT has(protocols, 'tcp')) as quic_only,\n", + " countIf(has(protocols, 'tcp') AND has(protocols, 'udp')) as both,\n", + " count(*) as total\n", + "FROM peer_protocols\n", + "\"\"\")\n", + "\n", + "peer_data = [\n", + " {\"category\": \"TCP only\", \"peers\": int(df_peer_breakdown[\"tcp_only\"].iloc[0])},\n", + " {\"category\": \"QUIC only\", \"peers\": int(df_peer_breakdown[\"quic_only\"].iloc[0])},\n", + " {\"category\": \"Both\", \"peers\": int(df_peer_breakdown[\"both\"].iloc[0])},\n", + "]\n", + "df_peers = pd.DataFrame(peer_data)\n", + "df_peers[\"percentage\"] = (df_peers[\"peers\"] / df_peers[\"peers\"].sum() * 100).round(1)\n", + "\n", + "fig = px.bar(\n", + " df_peers,\n", + " x=\"category\",\n", + " y=\"peers\",\n", + " color=\"category\",\n", + " color_discrete_map={\"TCP only\": \"#3b82f6\", \"QUIC only\": \"#22c55e\", \"Both\": \"#a855f7\"},\n", + " text=df_peers.apply(lambda r: f\"{r['peers']:,} ({r['percentage']}%)\", axis=1),\n", + ")\n", + "fig.update_traces(textposition=\"outside\")\n", + "fig.update_layout(\n", + " title=f\"Unique peers by transport capability (last {lookback_days} days)\",\n", + " height=400,\n", + " showlegend=False,\n", + " xaxis_title=\"\",\n", + " yaxis_title=\"Unique peers\",\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## Transport protocol by client implementation\n", + "\n", + "QUIC adoption varies significantly by consensus client. This table shows the breakdown of unique peers by their transport protocol capability for each known client implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "df_clients = client.query_df(f\"\"\"\n", + "WITH peer_protocols AS (\n", + " SELECT \n", + " remote_agent_implementation as client,\n", + " remote_peer_id_unique_key,\n", + " groupUniqArray(remote_transport_protocol) as protocols\n", + " FROM default.libp2p_connected\n", + " WHERE meta_network_name = 'mainnet'\n", + " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", + " AND remote_agent_implementation NOT IN ('', 'unknown')\n", + " GROUP BY client, remote_peer_id_unique_key\n", + ")\n", + "SELECT \n", + " client,\n", + " countIf(has(protocols, 'tcp') AND NOT has(protocols, 'udp')) as tcp_only,\n", + " countIf(has(protocols, 'udp') AND NOT has(protocols, 'tcp')) as quic_only,\n", + " countIf(has(protocols, 'tcp') AND has(protocols, 'udp')) as both,\n", + " count(*) as total,\n", + " round(countIf(has(protocols, 'udp')) * 100.0 / count(*), 1) as quic_capable_pct\n", + "FROM peer_protocols\n", + "GROUP BY client\n", + "HAVING total > 30\n", + "ORDER BY total DESC\n", + "\"\"\")\n", + "\n", + "df_clients" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "df_clients_long = df_clients.melt(\n", + " id_vars=[\"client\", \"total\"],\n", + " value_vars=[\"tcp_only\", \"quic_only\", \"both\"],\n", + " var_name=\"protocol_type\",\n", + " value_name=\"peers\",\n", + ")\n", + "df_clients_long[\"protocol_label\"] = df_clients_long[\"protocol_type\"].map({\n", + " \"tcp_only\": \"TCP only\",\n", + " \"quic_only\": \"QUIC only\",\n", + " \"both\": \"Both\",\n", + "})\n", + "\n", + "client_order = df_clients.sort_values(\"total\", ascending=True)[\"client\"].tolist()\n", + "\n", + "fig = px.bar(\n", + " df_clients_long,\n", + " x=\"peers\",\n", + " y=\"client\",\n", + " color=\"protocol_label\",\n", + " color_discrete_map={\"TCP only\": \"#3b82f6\", \"QUIC only\": \"#22c55e\", \"Both\": \"#a855f7\"},\n", + " orientation=\"h\",\n", + " category_orders={\"client\": client_order},\n", + " labels={\"peers\": \"Unique peers\", \"client\": \"Client\", \"protocol_label\": \"Transport\"},\n", + ")\n", + "fig.update_layout(\n", + " title=f\"Transport protocol support by client (last {lookback_days} days)\",\n", + " height=500,\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1),\n", + " barmode=\"stack\",\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## QUIC capability by client\n", + "\n", + "Percentage of peers supporting QUIC (either exclusively or alongside TCP) for each client implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "df_quic_pct = df_clients.sort_values(\"quic_capable_pct\", ascending=True).copy()\n", + "\n", + "fig = px.bar(\n", + " df_quic_pct,\n", + " x=\"quic_capable_pct\",\n", + " y=\"client\",\n", + " orientation=\"h\",\n", + " text=df_quic_pct[\"quic_capable_pct\"].apply(lambda x: f\"{x:.1f}%\"),\n", + " color=\"quic_capable_pct\",\n", + " color_continuous_scale=\"Greens\",\n", + ")\n", + "fig.update_traces(textposition=\"outside\")\n", + "fig.update_layout(\n", + " title=f\"QUIC-capable peers by client (last {lookback_days} days)\",\n", + " height=500,\n", + " showlegend=False,\n", + " xaxis_title=\"% of peers supporting QUIC\",\n", + " yaxis_title=\"\",\n", + " coloraxis_showscale=False,\n", + ")\n", + "fig.update_xaxes(range=[0, 100])\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## Connection patterns by protocol\n", + "\n", + "Comparison of connection frequency: how many connections each unique peer makes on average, broken down by transport protocol." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "df_conn_patterns = client.query_df(f\"\"\"\n", + "SELECT \n", + " remote_transport_protocol as transport,\n", + " count(*) as total_connections,\n", + " uniqExact(remote_peer_id_unique_key) as unique_peers,\n", + " round(count(*) / uniqExact(remote_peer_id_unique_key), 1) as connections_per_peer\n", + "FROM default.libp2p_connected\n", + "WHERE meta_network_name = 'mainnet'\n", + " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", + "GROUP BY transport\n", + "ORDER BY transport\n", + "\"\"\")\n", + "\n", + "df_conn_patterns[\"transport_label\"] = df_conn_patterns[\"transport\"].map({\"tcp\": \"TCP\", \"udp\": \"QUIC\"})\n", + "\n", + "fig = make_subplots(\n", + " rows=1, cols=2,\n", + " subplot_titles=(\"Unique peers\", \"Connections per peer\"),\n", + ")\n", + "\n", + "colors = {\"TCP\": \"#3b82f6\", \"QUIC\": \"#22c55e\"}\n", + "\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=df_conn_patterns[\"transport_label\"],\n", + " y=df_conn_patterns[\"unique_peers\"],\n", + " marker_color=[colors[t] for t in df_conn_patterns[\"transport_label\"]],\n", + " text=df_conn_patterns[\"unique_peers\"].apply(lambda x: f\"{x:,}\"),\n", + " textposition=\"outside\",\n", + " showlegend=False,\n", + " ),\n", + " row=1, col=1,\n", + ")\n", + "\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=df_conn_patterns[\"transport_label\"],\n", + " y=df_conn_patterns[\"connections_per_peer\"],\n", + " marker_color=[colors[t] for t in df_conn_patterns[\"transport_label\"]],\n", + " text=df_conn_patterns[\"connections_per_peer\"].apply(lambda x: f\"{x:.1f}\"),\n", + " textposition=\"outside\",\n", + " showlegend=False,\n", + " ),\n", + " row=1, col=2,\n", + ")\n", + "\n", + "fig.update_layout(\n", + " title=f\"Connection patterns by transport (last {lookback_days} days)\",\n", + " height=400,\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## Daily unique peers trend\n", + "\n", + "Daily count of unique peers connecting via each transport protocol." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "df_daily_pivot = df_daily.pivot(index=\"date\", columns=\"transport_label\", values=\"unique_peers\").reset_index()\n", + "df_daily_pivot[\"QUIC %\"] = (df_daily_pivot[\"QUIC\"] / (df_daily_pivot[\"TCP\"] + df_daily_pivot[\"QUIC\"]) * 100).round(1)\n", + "\n", + "fig = make_subplots(\n", + " rows=2, cols=1,\n", + " subplot_titles=(\"Unique peers by transport\", \"QUIC share of unique peers\"),\n", + " row_heights=[0.6, 0.4],\n", + " vertical_spacing=0.12,\n", + ")\n", + "\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=df_daily_pivot[\"date\"],\n", + " y=df_daily_pivot[\"TCP\"],\n", + " name=\"TCP\",\n", + " mode=\"lines+markers\",\n", + " line=dict(color=\"#3b82f6\"),\n", + " ),\n", + " row=1, col=1,\n", + ")\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=df_daily_pivot[\"date\"],\n", + " y=df_daily_pivot[\"QUIC\"],\n", + " name=\"QUIC\",\n", + " mode=\"lines+markers\",\n", + " line=dict(color=\"#22c55e\"),\n", + " ),\n", + " row=1, col=1,\n", + ")\n", + "\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=df_daily_pivot[\"date\"],\n", + " y=df_daily_pivot[\"QUIC %\"],\n", + " name=\"QUIC %\",\n", + " mode=\"lines+markers\",\n", + " line=dict(color=\"#22c55e\"),\n", + " fill=\"tozeroy\",\n", + " fillcolor=\"rgba(34, 197, 94, 0.2)\",\n", + " showlegend=False,\n", + " ),\n", + " row=2, col=1,\n", + ")\n", + "\n", + "fig.update_yaxes(title_text=\"Unique peers\", row=1, col=1)\n", + "fig.update_yaxes(title_text=\"QUIC %\", range=[0, 20], row=2, col=1)\n", + "fig.update_xaxes(title_text=\"Date\", row=2, col=1)\n", + "\n", + "fig.update_layout(\n", + " title=f\"Daily unique peer trends (last {lookback_days} days)\",\n", + " height=600,\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1),\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "Key findings from the transport protocol analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import HTML, display\n", + "\n", + "tcp_pct = df_overall[df_overall[\"transport\"] == \"tcp\"][\"percentage\"].iloc[0]\n", + "quic_pct = df_overall[df_overall[\"transport\"] == \"udp\"][\"percentage\"].iloc[0]\n", + "\n", + "tcp_only_peers = int(df_peer_breakdown[\"tcp_only\"].iloc[0])\n", + "quic_only_peers = int(df_peer_breakdown[\"quic_only\"].iloc[0])\n", + "both_peers = int(df_peer_breakdown[\"both\"].iloc[0])\n", + "total_peers = int(df_peer_breakdown[\"total\"].iloc[0])\n", + "\n", + "quic_clients = df_clients[df_clients[\"quic_capable_pct\"] > 10][\"client\"].tolist()\n", + "no_quic_clients = df_clients[df_clients[\"quic_capable_pct\"] < 1][\"client\"].tolist()\n", + "\n", + "html = f'''\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Connection distribution
TCP connections{tcp_pct:.1f}%
QUIC connections{quic_pct:.1f}%
Unique peer breakdown
TCP-only peers{tcp_only_peers:,} ({tcp_only_peers/total_peers*100:.1f}%)
QUIC-only peers{quic_only_peers:,} ({quic_only_peers/total_peers*100:.1f}%)
Dual-protocol peers{both_peers:,} ({both_peers/total_peers*100:.1f}%)
Total unique peers{total_peers:,}
Client QUIC support
Significant QUIC support (>10%){\", \".join(quic_clients) or \"None\"}
No QUIC support (<1%){\", \".join(no_quic_clients) or \"None\"}
\n", + "'''\n", + "\n", + "display(HTML(html))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From f612404e853b87e5be8ddcc85c1b28e37da92058 Mon Sep 17 00:00:00 2001 From: raulk Date: Wed, 4 Feb 2026 15:28:13 +0000 Subject: [PATCH 2/2] feat(pipeline): integrate transport protocols notebook into pipeline - Add query module queries/transport_protocols.py with 5 query functions for fetching QUIC vs TCP data from libp2p_connected table - Register queries in pipeline.yaml (7-day rolling window) - Register notebook in pipeline.yaml with Network icon - Update notebook to use standard loaders pattern (load_parquet, display_sql) - Add Network icon to Icon.astro component Queries fetch 7-day rolling data as of target_date to provide aggregate view of transport protocol adoption across the network. --- notebooks/10-transport-protocols.ipynb | 225 +++++++++++-------------- pipeline.yaml | 48 ++++++ queries/transport_protocols.py | 168 ++++++++++++++++++ site/src/components/Icon.astro | 3 +- 4 files changed, 316 insertions(+), 128 deletions(-) create mode 100644 queries/transport_protocols.py diff --git a/notebooks/10-transport-protocols.ipynb b/notebooks/10-transport-protocols.ipynb index b920275..145c088 100644 --- a/notebooks/10-transport-protocols.ipynb +++ b/notebooks/10-transport-protocols.ipynb @@ -7,7 +7,7 @@ "source": [ "Analysis of transport protocol usage (QUIC vs TCP) in Ethereum mainnet libp2p connections.\n", "\n", - "This notebook examines the distribution of QUIC and TCP transport protocols across the network, with breakdown by consensus client implementation. Data is sourced from the EthPandaOps Xatu sentries observing libp2p connection events." + "This notebook examines the distribution of QUIC and TCP transport protocols across the network, with breakdown by consensus client implementation. Data is sourced from the EthPandaOps Xatu sentries observing libp2p connection events over a 7-day rolling window." ] }, { @@ -21,44 +21,38 @@ }, "outputs": [], "source": [ - "import os\n", - "import clickhouse_connect\n", "import pandas as pd\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "from plotly.subplots import make_subplots\n", - "from dotenv import load_dotenv\n", "\n", - "load_dotenv()\n", + "from loaders import load_parquet, display_sql\n", "\n", - "lookback_days = 7" + "target_date = None # Set via papermill, or auto-detect from manifest" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "2", "metadata": {}, - "outputs": [], "source": [ - "client = clickhouse_connect.get_client(\n", - " host=os.environ[\"CLICKHOUSE_HOST\"],\n", - " port=int(os.environ.get(\"CLICKHOUSE_PORT\", 8443)),\n", - " username=os.environ[\"CLICKHOUSE_USER\"],\n", - " password=os.environ[\"CLICKHOUSE_PASSWORD\"],\n", - " secure=True,\n", - " autogenerate_session_id=False,\n", - ")" + "## Overall transport protocol distribution\n", + "\n", + "Breakdown of all libp2p connections by transport protocol. UDP indicates QUIC transport, TCP indicates traditional TCP connections." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "3", - "metadata": {}, + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], "source": [ - "## Overall transport protocol distribution\n", - "\n", - "Breakdown of all libp2p connections by transport protocol. UDP indicates QUIC transport, TCP indicates traditional TCP connections." + "display_sql(\"transport_overall\", target_date)" ] }, { @@ -68,20 +62,10 @@ "metadata": {}, "outputs": [], "source": [ - "df_overall = client.query_df(f\"\"\"\n", - "SELECT \n", - " remote_transport_protocol as transport,\n", - " count(*) as connections,\n", - " round(count(*) * 100.0 / sum(count(*)) OVER (), 2) as percentage\n", - "FROM default.libp2p_connected\n", - "WHERE meta_network_name = 'mainnet'\n", - " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", - "GROUP BY remote_transport_protocol\n", - "ORDER BY connections DESC\n", - "\"\"\")\n", + "df_overall = load_parquet(\"transport_overall\", target_date)\n", "\n", "df_overall[\"label\"] = df_overall.apply(\n", - " lambda r: f\"{'QUIC' if r['transport'] == 'udp' else 'TCP'} ({r['percentage']:.1f}%)\", \n", + " lambda r: f\"{'QUIC' if r['transport'] == 'udp' else 'TCP'} ({r['percentage']:.1f}%)\",\n", " axis=1\n", ")\n", "\n", @@ -94,7 +78,7 @@ " hole=0.4,\n", ")\n", "fig.update_layout(\n", - " title=f\"Transport protocol distribution (last {lookback_days} days)\",\n", + " title=\"Transport protocol distribution (7-day rolling)\",\n", " height=400,\n", " showlegend=True,\n", " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=-0.15, xanchor=\"center\", x=0.5),\n", @@ -116,21 +100,24 @@ "cell_type": "code", "execution_count": null, "id": "6", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"transport_daily\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", "metadata": {}, "outputs": [], "source": [ - "df_daily = client.query_df(f\"\"\"\n", - "SELECT \n", - " toDate(event_date_time) as date,\n", - " remote_transport_protocol as transport,\n", - " count(*) as connections,\n", - " uniqExact(remote_peer_id_unique_key) as unique_peers\n", - "FROM default.libp2p_connected\n", - "WHERE meta_network_name = 'mainnet'\n", - " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", - "GROUP BY date, transport\n", - "ORDER BY date, transport\n", - "\"\"\")\n", + "df_daily = load_parquet(\"transport_daily\", target_date)\n", "\n", "df_daily[\"transport_label\"] = df_daily[\"transport\"].map({\"tcp\": \"TCP\", \"udp\": \"QUIC\"})\n", "\n", @@ -153,7 +140,7 @@ }, { "cell_type": "markdown", - "id": "7", + "id": "8", "metadata": {}, "source": [ "## Unique peers by transport protocol\n", @@ -164,27 +151,25 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "9", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"transport_peers\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", "metadata": {}, "outputs": [], "source": [ - "df_peer_breakdown = client.query_df(f\"\"\"\n", - "WITH peer_protocols AS (\n", - " SELECT \n", - " remote_peer_id_unique_key,\n", - " groupUniqArray(remote_transport_protocol) as protocols\n", - " FROM default.libp2p_connected\n", - " WHERE meta_network_name = 'mainnet'\n", - " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", - " GROUP BY remote_peer_id_unique_key\n", - ")\n", - "SELECT \n", - " countIf(has(protocols, 'tcp') AND NOT has(protocols, 'udp')) as tcp_only,\n", - " countIf(has(protocols, 'udp') AND NOT has(protocols, 'tcp')) as quic_only,\n", - " countIf(has(protocols, 'tcp') AND has(protocols, 'udp')) as both,\n", - " count(*) as total\n", - "FROM peer_protocols\n", - "\"\"\")\n", + "df_peer_breakdown = load_parquet(\"transport_peers\", target_date)\n", "\n", "peer_data = [\n", " {\"category\": \"TCP only\", \"peers\": int(df_peer_breakdown[\"tcp_only\"].iloc[0])},\n", @@ -204,7 +189,7 @@ ")\n", "fig.update_traces(textposition=\"outside\")\n", "fig.update_layout(\n", - " title=f\"Unique peers by transport capability (last {lookback_days} days)\",\n", + " title=\"Unique peers by transport capability (7-day rolling)\",\n", " height=400,\n", " showlegend=False,\n", " xaxis_title=\"\",\n", @@ -215,7 +200,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "11", "metadata": {}, "source": [ "## Transport protocol by client implementation\n", @@ -226,34 +211,25 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "12", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"transport_by_client\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", "metadata": {}, "outputs": [], "source": [ - "df_clients = client.query_df(f\"\"\"\n", - "WITH peer_protocols AS (\n", - " SELECT \n", - " remote_agent_implementation as client,\n", - " remote_peer_id_unique_key,\n", - " groupUniqArray(remote_transport_protocol) as protocols\n", - " FROM default.libp2p_connected\n", - " WHERE meta_network_name = 'mainnet'\n", - " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", - " AND remote_agent_implementation NOT IN ('', 'unknown')\n", - " GROUP BY client, remote_peer_id_unique_key\n", - ")\n", - "SELECT \n", - " client,\n", - " countIf(has(protocols, 'tcp') AND NOT has(protocols, 'udp')) as tcp_only,\n", - " countIf(has(protocols, 'udp') AND NOT has(protocols, 'tcp')) as quic_only,\n", - " countIf(has(protocols, 'tcp') AND has(protocols, 'udp')) as both,\n", - " count(*) as total,\n", - " round(countIf(has(protocols, 'udp')) * 100.0 / count(*), 1) as quic_capable_pct\n", - "FROM peer_protocols\n", - "GROUP BY client\n", - "HAVING total > 30\n", - "ORDER BY total DESC\n", - "\"\"\")\n", + "df_clients = load_parquet(\"transport_by_client\", target_date)\n", "\n", "df_clients" ] @@ -261,7 +237,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -290,7 +266,7 @@ " labels={\"peers\": \"Unique peers\", \"client\": \"Client\", \"protocol_label\": \"Transport\"},\n", ")\n", "fig.update_layout(\n", - " title=f\"Transport protocol support by client (last {lookback_days} days)\",\n", + " title=\"Transport protocol support by client (7-day rolling)\",\n", " height=500,\n", " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1),\n", " barmode=\"stack\",\n", @@ -300,7 +276,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "15", "metadata": {}, "source": [ "## QUIC capability by client\n", @@ -311,7 +287,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -328,7 +304,7 @@ ")\n", "fig.update_traces(textposition=\"outside\")\n", "fig.update_layout(\n", - " title=f\"QUIC-capable peers by client (last {lookback_days} days)\",\n", + " title=\"QUIC-capable peers by client (7-day rolling)\",\n", " height=500,\n", " showlegend=False,\n", " xaxis_title=\"% of peers supporting QUIC\",\n", @@ -341,7 +317,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "17", "metadata": {}, "source": [ "## Connection patterns by protocol\n", @@ -352,22 +328,25 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "18", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"transport_connection_patterns\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", "metadata": {}, "outputs": [], "source": [ - "df_conn_patterns = client.query_df(f\"\"\"\n", - "SELECT \n", - " remote_transport_protocol as transport,\n", - " count(*) as total_connections,\n", - " uniqExact(remote_peer_id_unique_key) as unique_peers,\n", - " round(count(*) / uniqExact(remote_peer_id_unique_key), 1) as connections_per_peer\n", - "FROM default.libp2p_connected\n", - "WHERE meta_network_name = 'mainnet'\n", - " AND event_date_time >= now() - INTERVAL {lookback_days} DAY\n", - "GROUP BY transport\n", - "ORDER BY transport\n", - "\"\"\")\n", + "df_conn_patterns = load_parquet(\"transport_connection_patterns\", target_date)\n", "\n", "df_conn_patterns[\"transport_label\"] = df_conn_patterns[\"transport\"].map({\"tcp\": \"TCP\", \"udp\": \"QUIC\"})\n", "\n", @@ -403,7 +382,7 @@ ")\n", "\n", "fig.update_layout(\n", - " title=f\"Connection patterns by transport (last {lookback_days} days)\",\n", + " title=\"Connection patterns by transport (7-day rolling)\",\n", " height=400,\n", ")\n", "fig.show()" @@ -411,7 +390,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "20", "metadata": {}, "source": [ "## Daily unique peers trend\n", @@ -422,7 +401,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -476,7 +455,7 @@ "fig.update_xaxes(title_text=\"Date\", row=2, col=1)\n", "\n", "fig.update_layout(\n", - " title=f\"Daily unique peer trends (last {lookback_days} days)\",\n", + " title=\"Daily unique peer trends\",\n", " height=600,\n", " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1),\n", ")\n", @@ -485,7 +464,7 @@ }, { "cell_type": "markdown", - "id": "18", + "id": "22", "metadata": {}, "source": [ "## Summary\n", @@ -496,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -549,16 +528,8 @@ "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.11" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/pipeline.yaml b/pipeline.yaml index 128ee29..7e883dd 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -119,6 +119,36 @@ queries: description: Block propagation by geographic region from Contributoor nodes output_file: block_propagation_by_region_contributoor.parquet + transport_overall: + module: queries.transport_protocols + function: fetch_transport_overall + description: Overall QUIC vs TCP transport protocol distribution (7-day rolling) + output_file: transport_overall.parquet + + transport_daily: + module: queries.transport_protocols + function: fetch_transport_daily + description: Daily transport protocol breakdown (7-day rolling) + output_file: transport_daily.parquet + + transport_peers: + module: queries.transport_protocols + function: fetch_transport_peers + description: Unique peer breakdown by transport capability (7-day rolling) + output_file: transport_peers.parquet + + transport_by_client: + module: queries.transport_protocols + function: fetch_transport_by_client + description: Transport protocol adoption by client implementation (7-day rolling) + output_file: transport_by_client.parquet + + transport_connection_patterns: + module: queries.transport_protocols + function: fetch_transport_connection_patterns + description: Connection frequency patterns by transport protocol (7-day rolling) + output_file: transport_connection_patterns.parquet + # ============================================ # Notebook Registry # ============================================ @@ -258,6 +288,24 @@ notebooks: required: true order: 9 + - id: transport-protocols + title: Transport protocols + description: QUIC vs TCP transport protocol distribution across the P2P network by client + icon: Network + source: notebooks/10-transport-protocols.ipynb + schedule: daily + queries: + - transport_overall + - transport_daily + - transport_peers + - transport_by_client + - transport_connection_patterns + parameters: + - name: target_date + type: date + required: true + order: 10 + # Schedule options: hourly, daily, weekly, manual # - hourly: Runs every hour, accumulating data throughout the day # - daily: Runs once per day at 1am UTC diff --git a/queries/transport_protocols.py b/queries/transport_protocols.py new file mode 100644 index 0000000..7adbd4c --- /dev/null +++ b/queries/transport_protocols.py @@ -0,0 +1,168 @@ +"""Fetch functions for transport protocol (QUIC vs TCP) analysis. + +Queries the libp2p_connected table from Xatu sentries to analyze +transport protocol distribution across the Ethereum P2P network. +""" + +from __future__ import annotations + + +def _get_lookback_filter(target_date: str, days: int = 7) -> str: + """Generate SQL filter for N days ending on target_date (inclusive).""" + return ( + f"event_date_time >= '{target_date}'::date - INTERVAL {days - 1} DAY " + f"AND event_date_time < '{target_date}'::date + INTERVAL 1 DAY" + ) + + +def fetch_transport_overall( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch overall transport protocol distribution. + + Returns 7-day rolling stats as of target_date. + """ + date_filter = _get_lookback_filter(target_date, days=7) + + query = f""" +SELECT + remote_transport_protocol as transport, + count(*) as connections, + round(count(*) * 100.0 / sum(count(*)) OVER (), 2) as percentage +FROM default.libp2p_connected +WHERE meta_network_name = '{network}' + AND {date_filter} +GROUP BY remote_transport_protocol +ORDER BY connections DESC +""" + + df = client.query_df(query) + return df, query + + +def fetch_transport_daily( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch daily transport protocol breakdown. + + Returns daily stats for 7 days ending on target_date. + """ + date_filter = _get_lookback_filter(target_date, days=7) + + query = f""" +SELECT + toDate(event_date_time) as date, + remote_transport_protocol as transport, + count(*) as connections, + uniqExact(remote_peer_id_unique_key) as unique_peers +FROM default.libp2p_connected +WHERE meta_network_name = '{network}' + AND {date_filter} +GROUP BY date, transport +ORDER BY date, transport +""" + + df = client.query_df(query) + return df, query + + +def fetch_transport_peers( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch unique peer breakdown by transport protocol capability. + + Categorizes peers as TCP-only, QUIC-only, or supporting both. + """ + date_filter = _get_lookback_filter(target_date, days=7) + + query = f""" +WITH peer_protocols AS ( + SELECT + remote_peer_id_unique_key, + groupUniqArray(remote_transport_protocol) as protocols + FROM default.libp2p_connected + WHERE meta_network_name = '{network}' + AND {date_filter} + GROUP BY remote_peer_id_unique_key +) +SELECT + countIf(has(protocols, 'tcp') AND NOT has(protocols, 'udp')) as tcp_only, + countIf(has(protocols, 'udp') AND NOT has(protocols, 'tcp')) as quic_only, + countIf(has(protocols, 'tcp') AND has(protocols, 'udp')) as both, + count(*) as total +FROM peer_protocols +""" + + df = client.query_df(query) + return df, query + + +def fetch_transport_by_client( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch transport protocol breakdown by client implementation. + + Shows QUIC adoption rates per consensus client. + """ + date_filter = _get_lookback_filter(target_date, days=7) + + query = f""" +WITH peer_protocols AS ( + SELECT + remote_agent_implementation as client, + remote_peer_id_unique_key, + groupUniqArray(remote_transport_protocol) as protocols + FROM default.libp2p_connected + WHERE meta_network_name = '{network}' + AND {date_filter} + AND remote_agent_implementation NOT IN ('', 'unknown') + GROUP BY client, remote_peer_id_unique_key +) +SELECT + client, + countIf(has(protocols, 'tcp') AND NOT has(protocols, 'udp')) as tcp_only, + countIf(has(protocols, 'udp') AND NOT has(protocols, 'tcp')) as quic_only, + countIf(has(protocols, 'tcp') AND has(protocols, 'udp')) as both, + count(*) as total, + round(countIf(has(protocols, 'udp')) * 100.0 / count(*), 1) as quic_capable_pct +FROM peer_protocols +GROUP BY client +HAVING total > 30 +ORDER BY total DESC +""" + + df = client.query_df(query) + return df, query + + +def fetch_transport_connection_patterns( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch connection frequency patterns by transport protocol.""" + date_filter = _get_lookback_filter(target_date, days=7) + + query = f""" +SELECT + remote_transport_protocol as transport, + count(*) as total_connections, + uniqExact(remote_peer_id_unique_key) as unique_peers, + round(count(*) / uniqExact(remote_peer_id_unique_key), 1) as connections_per_peer +FROM default.libp2p_connected +WHERE meta_network_name = '{network}' + AND {date_filter} +GROUP BY transport +ORDER BY transport +""" + + df = client.query_df(query) + return df, query diff --git a/site/src/components/Icon.astro b/site/src/components/Icon.astro index 73e8e9d..03bc72d 100644 --- a/site/src/components/Icon.astro +++ b/site/src/components/Icon.astro @@ -1,5 +1,5 @@ --- -import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Clock, Download, Eye, FileText, Gauge, Gavel, Grid3x3, Layers, Link, List, PanelLeft, Timer, XCircle } from 'lucide-react'; +import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Clock, Download, Eye, FileText, Gauge, Gavel, Grid3x3, Layers, Link, List, Network, PanelLeft, Timer, XCircle } from 'lucide-react'; interface Props { name: string; @@ -29,6 +29,7 @@ const icons: Record = { Layers, Link, List, + Network, PanelLeft, Timer, XCircle,