From e5b519d8c8e28b51327649be1bc25c3e169c963b Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Thu, 12 Jun 2025 10:17:31 +1000 Subject: [PATCH 1/2] Unzip artefacts one at a time to save memory --- memprof_plotter/plotter.py | 77 ++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/memprof_plotter/plotter.py b/memprof_plotter/plotter.py index e8bac4a..6b1c7be 100644 --- a/memprof_plotter/plotter.py +++ b/memprof_plotter/plotter.py @@ -40,7 +40,7 @@ """ -def download_artefact(url: str) -> bytes | None: +def download_artefact(url: str) -> zipfile.ZipFile | None: """ PyGithub does not support retrieving artefacts into buffers, so we have to resort to requests @@ -58,13 +58,15 @@ def download_artefact(url: str) -> bytes | None: return None zf = zipfile.ZipFile(BytesIO(req.content)) if "tsp_db.sqlite3" in zf.namelist(): - return zf.read("tsp_db.sqlite3") + return zf else: print("Artefact does not contain required TSP database") return None -def get_artefacts(nruns: int, workflow: github.Workflow.Workflow, artefact: str, filter: list[str]) -> dict[int, bytes]: +def get_artefacts( + nruns: int, workflow: github.Workflow.Workflow, artefact: str, filter: list[str] +) -> dict[str, zipfile.ZipFile]: irun = 0 runs = {} for run in workflow.get_runs(status="success"): @@ -84,6 +86,27 @@ def get_artefacts(nruns: int, workflow: github.Workflow.Workflow, artefact: str, return runs +class Zip_to_sql_conn: + def __init__(self, zip: zipfile.ZipFile): + self.db = zip.read("tsp_db.sqlite3") + self.conn = sqlite3.connect(":memory:") + self.tmpfile = None + if hasattr(self.conn, "deserialize"): + self.conn.deserialize(self.db) + else: + self.tmpfile = tempfile.NamedTemporaryFile() + self.tmpfile.write(self.db) + self.conn = sqlite3.connect(self.tmpfile.name) + + def __enter__(self) -> sqlite3.Connection: + return self.conn + + def __exit__(self, type, value, traceback): + self.conn.close() + if self.tmpfile: + self.tmpfile.close() + + def main(): if gh_token == "BAD_KEY": raise KeyError("GH_TOKEN must be set in environment") @@ -136,35 +159,25 @@ def main(): d_cat = {} d_names = {} - tmpfile = None - - for runid, run in runs.items(): - conn = sqlite3.connect(":memory:") - if hasattr(conn, "deserialize"): - conn.deserialize(run) - else: - tmpfile = tempfile.NamedTemporaryFile() - tmpfile.write(run) - conn = sqlite3.connect(tmpfile.name) - cur = conn.cursor() - cur.execute(get_all_cmds_query) - for cmd, cat in cur.fetchall(): - d_cat[f"{cat}_{cmd}"] = cat or "other" - d_times[f"{cat}_{cmd}"][runid] = [] - d_rss[f"{cat}_{cmd}"][runid] = [] - d_names[f"{cat}_{cmd}"] = cmd - - try: - cur.execute(get_mem_query) - except sqlite3.OperationalError: - ### No such table memprof - continue - for cmd, cat, time, rss in cur.fetchall(): - d_times[f"{cat}_{cmd}"][runid].append(time) - d_rss[f"{cat}_{cmd}"][runid].append(rss) - conn.close() - if tmpfile: - tmpfile.close() + for runid, zf in runs.items(): + ##conn = zip_to_sql_conn(zf) + with Zip_to_sql_conn(zf) as conn: + cur = conn.cursor() + cur.execute(get_all_cmds_query) + for cmd, cat in cur.fetchall(): + d_cat[f"{cat}_{cmd}"] = cat or "other" + d_times[f"{cat}_{cmd}"][runid] = [] + d_rss[f"{cat}_{cmd}"][runid] = [] + d_names[f"{cat}_{cmd}"] = cmd + + try: + cur.execute(get_mem_query) + except sqlite3.OperationalError: + ### No such table memprof + continue + for cmd, cat, time, rss in cur.fetchall(): + d_times[f"{cat}_{cmd}"][runid].append(time) + d_rss[f"{cat}_{cmd}"][runid].append(rss) for k, v in d_rss.items(): os.makedirs(f"{ns.outdir}/{d_cat[k]}", exist_ok=True) From dc92292eeb0a0f9c036abcf4ae333e0410ef3dfb Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Thu, 12 Jun 2025 10:29:07 +1000 Subject: [PATCH 2/2] Remove comment --- memprof_plotter/plotter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/memprof_plotter/plotter.py b/memprof_plotter/plotter.py index 6b1c7be..d1e7cce 100644 --- a/memprof_plotter/plotter.py +++ b/memprof_plotter/plotter.py @@ -160,7 +160,6 @@ def main(): d_names = {} for runid, zf in runs.items(): - ##conn = zip_to_sql_conn(zf) with Zip_to_sql_conn(zf) as conn: cur = conn.cursor() cur.execute(get_all_cmds_query)