From c453047896d37e39dc3043fde6f73b4e1e0be009 Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Tue, 10 Jun 2025 16:20:38 +1000 Subject: [PATCH 1/3] Update excluded dir in pyproject.toml --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cbed6fc..2f0637c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,14 +6,15 @@ build-backend = "setuptools.build_meta" name = "memprof_plotter" version = "0.0.1" dependencies = [ - "matplotlib" + "matplotlib", + "PyGithub" ] requires-python = ">=3.10" description = "Plotting utility for tsp memory profiling data" [tool.setuptools.packages.find] include = ["memprof_plotter"] -exclude = ["plot"] +exclude = ["memprof_plots"] [project.scripts] memprof_plotter = "memprof_plotter.plotter:main" \ No newline at end of file From 8a933da0cfd4d1376d18c065fe77ced0200f2051 Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Wed, 11 Jun 2025 11:18:15 +1000 Subject: [PATCH 2/3] Use pygithub to retrieve artefacts. Add test action --- .github/workflows/test.yml | 29 +++++++++ memprof_plotter/plotter.py | 119 ++++++++++++++++++++++++++++++++----- pyproject.toml | 2 +- 3 files changed, 135 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..d2b4967 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,29 @@ +name: Create memory profiling graphs + + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install plotter + run: | + pip3 install . + - name: Make plots + env: + GH_TOKEN: ${{ github.token }} + run: | + memprof_plotter + - name: Upload plots + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: memprof_plots + path: memprof_plots \ No newline at end of file diff --git a/memprof_plotter/plotter.py b/memprof_plotter/plotter.py index 9cbfbcd..3be0e62 100644 --- a/memprof_plotter/plotter.py +++ b/memprof_plotter/plotter.py @@ -1,16 +1,25 @@ #!/usr/bin/env python3 -import re +import argparse +import github +import github.Workflow +import matplotlib.pyplot as plt import os +import re +import requests import sqlite3 import sys -import matplotlib.pyplot as plt +import tempfile +import zipfile from collections import defaultdict +from io import BytesIO + +gh_token = os.environ.get("GH_TOKEN", "BAD_KEY") -get_all_cmds_query = "SELECT command,category FROM jobs" +get_all_cmds_query: str = "SELECT command,category FROM jobs" -get_mem_query = """ +get_mem_query: str = """ SELECT command, category, @@ -31,21 +40,101 @@ """ +def download_artefact(url: str) -> bytes | None: + """ + PyGithub does not support retrieving artefacts into buffers, so we have to resort + to requests + """ + req = requests.get( + url, + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {gh_token}", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + if req.status_code != 200: + print(f"Failed to download archive {url}") + return + zf = zipfile.ZipFile(BytesIO(req.content)) + if "tsp_db.sqlite3" in zf.namelist(): + return zf.read("tsp_db.sqlite3") + else: + print("Artefact does not contain required TSP database") + return + + + +def get_artefacts(nruns: int, workflow: github.Workflow.Workflow, artefact: str) -> dict[int, bytes]: + irun = 0 + runs = {} + for run in workflow.get_runs(status="success"): + k = run.run_number + for gha in run.get_artifacts(): + if gha.name == artefact: + artefact_data = download_artefact(gha.archive_download_url) + if artefact_data: + runs[k] = artefact_data + irun += 1 + break + if irun == nruns: + break + return runs + + def main(): - outpath = os.environ.get("MEMPROF_PLOT_DIR","memprof_plots") + if gh_token == "BAD_KEY": + raise KeyError("GH_TOKEN must be set in environment") + + parser = argparse.ArgumentParser( + prog="memprof_plotter", description="Plot memprof data from g-adopt github actions artefacts" + ) + parser.add_argument( + "-o", "--outdir", required=False, type=str, default="memprof_plots", help="top directory for output plots" + ) + parser.add_argument( + "-n", "--nruns", required=False, type=int, default=5, help="Number of successful runs to gather" + ) + parser.add_argument( + "-r", "--repo", required=False, type=str, default="g-adopt/g-adopt", help="Repository to gather artefacts from" + ) + parser.add_argument( + "-w", + "--workflow", + required=False, + type=str, + default="test.yml", + help="Name of workflow file containing memprof data", + ) + parser.add_argument( + "-a", "--artefact", required=False, type=str, default="run-log", help="Name of artefact containing memprof data" + ) + + ns = parser.parse_args(sys.argv[1:]) + + ### Connect to github + auth = github.Auth.Token(gh_token) + gh = github.Github(auth=auth) + repo = gh.get_repo(ns.repo) + + runs = get_artefacts(ns.nruns, repo.get_workflow(ns.workflow), ns.artefact) d_times = defaultdict(dict) d_rss = defaultdict(dict) d_cat = {} d_names = {} - for runid in sys.argv[1:]: - try: - conn = sqlite3.connect(f"{runid}/tsp_db.sqlite3") - except sqlite3.OperationalError: - ### unable to open database file - continue + tmpfile = None + + for runid, run in runs.items(): + conn = sqlite3.connect(":memory:") + if hasattr(conn, "deserialize"): + conn.deserialize(run) + else: + tmpfile = tempfile.TemporaryFile() + tmpfile.write(run) + conn = sqlite3.connect(tmpfile.name) cur = conn.cursor() cur.execute(get_all_cmds_query) for cmd, cat in cur.fetchall(): @@ -63,11 +152,13 @@ def main(): d_times[f"{cat}_{cmd}"][runid].append(time) d_rss[f"{cat}_{cmd}"][runid].append(rss) conn.close() + if tmpfile: + tmpfile.close() for k, v in d_rss.items(): - os.makedirs(f"{outpath}/{d_cat[k]}", exist_ok=True) + os.makedirs(f"{ns.outdir}/{d_cat[k]}", exist_ok=True) fig, ax = plt.subplots() - for runid in sys.argv[1:]: + for runid in runs: if runid in v: ax.plot(d_times[k][runid], v[runid], label=f"Run {runid}") ax.set_xlabel("Time (seconds)") @@ -75,7 +166,7 @@ def main(): ax.set_ylim(ymin=0.0) ax.set_title(d_names[k]) ax.legend() - fig.savefig(f"{outpath}/{d_cat[k]}/{re.sub('[ /]', '', k)}.png") + fig.savefig(f"{ns.outdir}/{d_cat[k]}/{re.sub('[ /]', '', k)}.png") plt.close(fig) diff --git a/pyproject.toml b/pyproject.toml index 2f0637c..d18b1ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ dependencies = [ "matplotlib", "PyGithub" ] -requires-python = ">=3.10" +requires-python = ">=3.11" description = "Plotting utility for tsp memory profiling data" [tool.setuptools.packages.find] From 3a2d5006add55207579b57346d5fd066c004cdeb Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Wed, 11 Jun 2025 11:29:51 +1000 Subject: [PATCH 3/3] Use NamedTemporaryFile. Stop mypy complaining. --- .github/workflows/test.yml | 1 - memprof_plotter/plotter.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d2b4967..70b010e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,5 @@ name: Create memory profiling graphs - on: push: branches: diff --git a/memprof_plotter/plotter.py b/memprof_plotter/plotter.py index 3be0e62..873cd3b 100644 --- a/memprof_plotter/plotter.py +++ b/memprof_plotter/plotter.py @@ -55,13 +55,13 @@ def download_artefact(url: str) -> bytes | None: ) if req.status_code != 200: print(f"Failed to download archive {url}") - return + return None zf = zipfile.ZipFile(BytesIO(req.content)) if "tsp_db.sqlite3" in zf.namelist(): return zf.read("tsp_db.sqlite3") else: print("Artefact does not contain required TSP database") - return + return None @@ -132,7 +132,7 @@ def main(): if hasattr(conn, "deserialize"): conn.deserialize(run) else: - tmpfile = tempfile.TemporaryFile() + tmpfile = tempfile.NamedTemporaryFile() tmpfile.write(run) conn = sqlite3.connect(tmpfile.name) cur = conn.cursor()