From 79a6beefcdd1b772d25646783da907d5cd55b4c2 Mon Sep 17 00:00:00 2001 From: Charlie Date: Mon, 15 Dec 2025 16:54:24 -0500 Subject: [PATCH 1/6] Add archive stats view and refine run displays --- sample_registry/app.py | 123 ++++++++++++++++----- sample_registry/templates/archive.html | 98 ++++++++++++++++ sample_registry/templates/base.html | 9 +- sample_registry/templates/browse_runs.html | 9 +- sample_registry/templates/show_run.html | 2 +- 5 files changed, 203 insertions(+), 38 deletions(-) create mode 100644 sample_registry/templates/archive.html diff --git a/sample_registry/app.py b/sample_registry/app.py index 723d34e..0559b83 100644 --- a/sample_registry/app.py +++ b/sample_registry/app.py @@ -1,19 +1,22 @@ -import csv -import pickle -import os -from flask import ( - Flask, - make_response, - render_template, - url_for, - request, - redirect, - send_file, - send_from_directory, -) -from flask_sqlalchemy import SQLAlchemy -from io import StringIO -from pathlib import Path +import csv +import pickle +import os +from collections import defaultdict +from datetime import datetime +from flask import ( + Flask, + make_response, + render_template, + url_for, + request, + redirect, + send_file, + send_from_directory, + jsonify, +) +from flask_sqlalchemy import SQLAlchemy +from io import StringIO +from pathlib import Path from sample_registry import SQLALCHEMY_DATABASE_URI from sample_registry.models import ( Base, @@ -33,10 +36,12 @@ # whatever production server you are using instead. It's ok to leave this in when running the dev server. app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) -app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI -print(SQLALCHEMY_DATABASE_URI) -db = SQLAlchemy(model_class=Base) -db.init_app(app) +app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI +print(SQLALCHEMY_DATABASE_URI) +db = SQLAlchemy(model_class=Base) +db.init_app(app) + +ARCHIVE_ROOT = Path("/mnt/isilon/microbiome/") with app.app_context(): db.create_all() @@ -187,7 +192,7 @@ def show_runs(run_acc=None): @app.route("/stats") -def show_stats(): +def show_stats(): num_samples = db.session.query(Sample).count() num_samples_with_sampletype = ( db.session.query(Sample).filter(Sample.sample_type is not None).count() @@ -276,9 +281,9 @@ def show_stats(): .count() ) - return render_template( - "show_stats.html", - num_samples=num_samples, + return render_template( + "show_stats.html", + num_samples=num_samples, num_samples_with_sampletype=num_samples_with_sampletype, num_samples_with_standard_sampletype=num_samples_with_standard_sampletype, standard_sampletype_counts=standard_sampletype_counts, @@ -290,8 +295,74 @@ def show_stats(): standard_hostspecies_counts=standard_hostspecies_counts, nonstandard_hostspecies_counts=nonstandard_hostspecies_counts, num_samples_with_primer=num_samples_with_primer, - num_samples_with_reverse_primer=num_samples_with_reverse_primer, - ) + num_samples_with_reverse_primer=num_samples_with_reverse_primer, + ) + + +def _parsed_month(date_str: str): + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%y", "%m/%d/%Y"): + try: + return datetime.strptime(date_str, fmt).strftime("%Y-%m") + except ValueError: + continue + return None + + +def _archive_size_for_run(run, warnings): + archive_path = (ARCHIVE_ROOT / run.data_uri).parent + run_label = f"CMR{run.run_accession:06d}" + + if not archive_path.exists(): + warnings.append(f"{run_label}: Archive path {archive_path} does not exist") + return 0 + + if not archive_path.is_dir(): + warnings.append(f"{run_label}: Archive path {archive_path} is not a directory") + return 0 + + total_size = 0 + for entry in archive_path.rglob("*"): + try: + if entry.is_file(): + total_size += entry.stat().st_size + except OSError as exc: + warnings.append(f"{run_label}: Error accessing {entry}: {exc}") + + if total_size == 0: + warnings.append(f"{run_label}: Archive at {archive_path} has size 0 bytes") + + return total_size + + +@app.route("/api/archive_sizes") +def archive_sizes(): + runs = db.session.query(Run).all() + warnings = [] + totals_by_month = defaultdict(int) + + for run in runs: + month_label = _parsed_month(run.run_date) + if not month_label: + warnings.append( + f"CMR{run.run_accession:06d}: Unable to parse run_date '{run.run_date}'" + ) + + archive_size = _archive_size_for_run(run, warnings) + + if month_label: + totals_by_month[month_label] += archive_size + + by_month = [ + {"month": month, "size_bytes": totals_by_month[month]} + for month in sorted(totals_by_month.keys()) + ] + + return jsonify({"by_month": by_month, "warnings": warnings}) + + +@app.route("/archive") +def archive(): + return render_template("archive.html") @app.route("/download/", methods=["GET", "POST"]) diff --git a/sample_registry/templates/archive.html b/sample_registry/templates/archive.html new file mode 100644 index 0000000..20d7b64 --- /dev/null +++ b/sample_registry/templates/archive.html @@ -0,0 +1,98 @@ +{% extends 'base.html' %} + +{% block head %} + +{% endblock %} + +{% block body %} +
+
+
+

Archive usage

+

Net archive size grouped by month. Data are gathered directly from the NFS archive paths for each run.

+
+
+
+
+ +
+
+
+
+

+ +

+
+
    +
    +
    +
    +
    + + +{% endblock %} diff --git a/sample_registry/templates/base.html b/sample_registry/templates/base.html index 30f5c90..641b076 100644 --- a/sample_registry/templates/base.html +++ b/sample_registry/templates/base.html @@ -41,10 +41,11 @@

    - Runs - Metadata - Stats -
    + Runs + Metadata + Stats + Archive +
    diff --git a/sample_registry/templates/browse_runs.html b/sample_registry/templates/browse_runs.html index 0db170f..f322884 100644 --- a/sample_registry/templates/browse_runs.html +++ b/sample_registry/templates/browse_runs.html @@ -19,13 +19,8 @@

    Sequencing runs

    - {% for run, sample_count in sample_counts.items() %} - {% if run.machine_type.startswith('Illumina') %} - {% set platform = 'Illumina' %} - {% else %} - {% set platform = run.machine_type %} - {% endif %} - {% set platform = platform + ' ' + run.machine_kit %} + {% for run, sample_count in sample_counts.items() %} + {% set platform = run.machine_type + ' ' + run.machine_kit %} {{ "CMR{:06d}".format(run.run_accession) }} {{ run.run_date }} diff --git a/sample_registry/templates/show_run.html b/sample_registry/templates/show_run.html index 6142295..9d6c82e 100644 --- a/sample_registry/templates/show_run.html +++ b/sample_registry/templates/show_run.html @@ -19,7 +19,7 @@

  • Date: {{ run.run_date }}
  • Lane: {{ run.lane }}
  • Platform: {{ run.machine_type }} {{ run.machine_kit }}
  • -
  • Data file: {{ run.data_uri.split('/')|last }}
  • +
  • Data file: {{ run.data_uri }}
  • Export metadata for all samples:
    From 832de206e68aa91e095648d86831ed2b1ee82aba Mon Sep 17 00:00:00 2001 From: Ulthran Date: Mon, 26 Jan 2026 11:49:34 -0500 Subject: [PATCH 2/6] Sanitized, RO db connection --- sample_registry/__init__.py | 7 ++ sample_registry/app.py | 197 ++++++++++++++++++------------------ sample_registry/db.py | 1 - sample_registry/register.py | 1 - tests/test_mapping.py | 1 - tests/test_register.py | 1 - 6 files changed, 103 insertions(+), 105 deletions(-) diff --git a/sample_registry/__init__.py b/sample_registry/__init__.py index d95a285..2d631f7 100644 --- a/sample_registry/__init__.py +++ b/sample_registry/__init__.py @@ -8,6 +8,10 @@ __version__ = "1.3.0" +# Define archive root path +ARCHIVE_ROOT = Path( + os.environ.get("SAMPLE_REGISTRY_ARCHIVE_ROOT", "/mnt/isilon/microbiome/") +) # Doesn't include "NA" because that's what we fill in for missing values NULL_VALUES: list[Optional[str]] = [ None, @@ -42,6 +46,9 @@ def sample_registry_version(): # Set SQLALCHEMY_DATABASE_URI to an in-memory SQLite database for testing SQLALCHEMY_DATABASE_URI = "sqlite:///:memory:" +# Put guardrails on db connection +SQLALCHEMY_DATABASE_URI = f"{SQLALCHEMY_DATABASE_URI.split('?')[0]}?mode=ro&uri=true" +sys.stderr.write(f"Connecting to database at {SQLALCHEMY_DATABASE_URI}\n") # Create database engine engine = create_engine(SQLALCHEMY_DATABASE_URI, echo=False) diff --git a/sample_registry/app.py b/sample_registry/app.py index 0559b83..591f189 100644 --- a/sample_registry/app.py +++ b/sample_registry/app.py @@ -1,23 +1,23 @@ -import csv -import pickle -import os -from collections import defaultdict -from datetime import datetime -from flask import ( - Flask, - make_response, - render_template, - url_for, - request, - redirect, - send_file, - send_from_directory, - jsonify, -) -from flask_sqlalchemy import SQLAlchemy -from io import StringIO -from pathlib import Path -from sample_registry import SQLALCHEMY_DATABASE_URI +import csv +import pickle +import os +from collections import defaultdict +from datetime import datetime +from flask import ( + Flask, + make_response, + render_template, + url_for, + request, + redirect, + send_file, + send_from_directory, + jsonify, +) +from flask_sqlalchemy import SQLAlchemy +from io import StringIO +from pathlib import Path +from sample_registry import ARCHIVE_ROOT, SQLALCHEMY_DATABASE_URI from sample_registry.models import ( Base, Annotation, @@ -36,15 +36,10 @@ # whatever production server you are using instead. It's ok to leave this in when running the dev server. app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) -app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI -print(SQLALCHEMY_DATABASE_URI) -db = SQLAlchemy(model_class=Base) -db.init_app(app) - -ARCHIVE_ROOT = Path("/mnt/isilon/microbiome/") - -with app.app_context(): - db.create_all() +app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI +print(SQLALCHEMY_DATABASE_URI) +db = SQLAlchemy(model_class=Base) +db.init_app(app) @app.route("/favicon.ico") @@ -192,7 +187,7 @@ def show_runs(run_acc=None): @app.route("/stats") -def show_stats(): +def show_stats(): num_samples = db.session.query(Sample).count() num_samples_with_sampletype = ( db.session.query(Sample).filter(Sample.sample_type is not None).count() @@ -281,9 +276,9 @@ def show_stats(): .count() ) - return render_template( - "show_stats.html", - num_samples=num_samples, + return render_template( + "show_stats.html", + num_samples=num_samples, num_samples_with_sampletype=num_samples_with_sampletype, num_samples_with_standard_sampletype=num_samples_with_standard_sampletype, standard_sampletype_counts=standard_sampletype_counts, @@ -295,74 +290,74 @@ def show_stats(): standard_hostspecies_counts=standard_hostspecies_counts, nonstandard_hostspecies_counts=nonstandard_hostspecies_counts, num_samples_with_primer=num_samples_with_primer, - num_samples_with_reverse_primer=num_samples_with_reverse_primer, - ) - - -def _parsed_month(date_str: str): - for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%y", "%m/%d/%Y"): - try: - return datetime.strptime(date_str, fmt).strftime("%Y-%m") - except ValueError: - continue - return None - - -def _archive_size_for_run(run, warnings): - archive_path = (ARCHIVE_ROOT / run.data_uri).parent - run_label = f"CMR{run.run_accession:06d}" - - if not archive_path.exists(): - warnings.append(f"{run_label}: Archive path {archive_path} does not exist") - return 0 - - if not archive_path.is_dir(): - warnings.append(f"{run_label}: Archive path {archive_path} is not a directory") - return 0 - - total_size = 0 - for entry in archive_path.rglob("*"): - try: - if entry.is_file(): - total_size += entry.stat().st_size - except OSError as exc: - warnings.append(f"{run_label}: Error accessing {entry}: {exc}") - - if total_size == 0: - warnings.append(f"{run_label}: Archive at {archive_path} has size 0 bytes") - - return total_size - - -@app.route("/api/archive_sizes") -def archive_sizes(): - runs = db.session.query(Run).all() - warnings = [] - totals_by_month = defaultdict(int) - - for run in runs: - month_label = _parsed_month(run.run_date) - if not month_label: - warnings.append( - f"CMR{run.run_accession:06d}: Unable to parse run_date '{run.run_date}'" - ) - - archive_size = _archive_size_for_run(run, warnings) - - if month_label: - totals_by_month[month_label] += archive_size - - by_month = [ - {"month": month, "size_bytes": totals_by_month[month]} - for month in sorted(totals_by_month.keys()) - ] - - return jsonify({"by_month": by_month, "warnings": warnings}) - - -@app.route("/archive") -def archive(): - return render_template("archive.html") + num_samples_with_reverse_primer=num_samples_with_reverse_primer, + ) + + +def _parsed_month(date_str: str): + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%y", "%m/%d/%Y"): + try: + return datetime.strptime(date_str, fmt).strftime("%Y-%m") + except ValueError: + continue + return None + + +def _archive_size_for_run(run, warnings): + archive_path = (ARCHIVE_ROOT / run.data_uri).parent + run_label = f"CMR{run.run_accession:06d}" + + if not archive_path.exists(): + warnings.append(f"{run_label}: Archive path {archive_path} does not exist") + return 0 + + if not archive_path.is_dir(): + warnings.append(f"{run_label}: Archive path {archive_path} is not a directory") + return 0 + + total_size = 0 + for entry in archive_path.rglob("*"): + try: + if entry.is_file(): + total_size += entry.stat().st_size + except OSError as exc: + warnings.append(f"{run_label}: Error accessing {entry}: {exc}") + + if total_size == 0: + warnings.append(f"{run_label}: Archive at {archive_path} has size 0 bytes") + + return total_size + + +@app.route("/api/archive_sizes") +def archive_sizes(): + runs = db.session.query(Run).all() + warnings = [] + totals_by_month = defaultdict(int) + + for run in runs: + month_label = _parsed_month(run.run_date) + if not month_label: + warnings.append( + f"CMR{run.run_accession:06d}: Unable to parse run_date '{run.run_date}'" + ) + + archive_size = _archive_size_for_run(run, warnings) + + if month_label: + totals_by_month[month_label] += archive_size + + by_month = [ + {"month": month, "size_bytes": totals_by_month[month]} + for month in sorted(totals_by_month.keys()) + ] + + return jsonify({"by_month": by_month, "warnings": warnings}) + + +@app.route("/archive") +def archive(): + return render_template("archive.html") @app.route("/download/", methods=["GET", "POST"]) diff --git a/sample_registry/db.py b/sample_registry/db.py index 9b9a911..6322348 100644 --- a/sample_registry/db.py +++ b/sample_registry/db.py @@ -13,7 +13,6 @@ StandardHostSpecies, ) - STANDARD_TAGS = { "SampleType": "sample_type", "SubjectID": "subject_id", diff --git a/sample_registry/register.py b/sample_registry/register.py index 7f0d8a1..1530470 100644 --- a/sample_registry/register.py +++ b/sample_registry/register.py @@ -9,7 +9,6 @@ from sample_registry.registrar import SampleRegistry from seqBackupLib.illumina import IlluminaFastq - SAMPLES_DESC = """\ Add new samples to the registry, with annotations. """ diff --git a/tests/test_mapping.py b/tests/test_mapping.py index c5fc035..33c3c8f 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -1,7 +1,6 @@ import io from sample_registry.mapping import SampleTable - NORMAL_TSV = """\ SampleID BarcodeSequence HostSpecies SubjectID S1 GCCT Human Hu23 diff --git a/tests/test_register.py b/tests/test_register.py index 7d4d574..12ccfbf 100644 --- a/tests/test_register.py +++ b/tests/test_register.py @@ -25,7 +25,6 @@ register_host_species, ) - samples = [ { "SampleID": "abc123", From d2791bee27efb9ca036b3db790c02131b8ac9a31 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Mon, 26 Jan 2026 12:01:13 -0500 Subject: [PATCH 3/6] Safe db connection --- .gitignore | 1 + sample_registry/__init__.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 6b9c6d8..21f1420 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ website/core.db build/ __pycache__/ *.sqlite3 +*.sqlite env diff --git a/sample_registry/__init__.py b/sample_registry/__init__.py index 2d631f7..b9a30f8 100644 --- a/sample_registry/__init__.py +++ b/sample_registry/__init__.py @@ -38,7 +38,7 @@ def sample_registry_version(): "Missing database connection information in environment, using test SQLite database\n" ) SQLALCHEMY_DATABASE_URI = ( - f"sqlite:///{Path(__file__).parent.parent.resolve()}/sample_registry.sqlite3" + f"sqlite:///{Path(__file__).parent.parent.resolve()}/sample_registry.sqlite" ) @@ -46,11 +46,16 @@ def sample_registry_version(): # Set SQLALCHEMY_DATABASE_URI to an in-memory SQLite database for testing SQLALCHEMY_DATABASE_URI = "sqlite:///:memory:" -# Put guardrails on db connection -SQLALCHEMY_DATABASE_URI = f"{SQLALCHEMY_DATABASE_URI.split('?')[0]}?mode=ro&uri=true" +SQLALCHEMY_DATABASE_URI = f"{SQLALCHEMY_DATABASE_URI.split('?')[0]}?mode=ro" sys.stderr.write(f"Connecting to database at {SQLALCHEMY_DATABASE_URI}\n") -# Create database engine -engine = create_engine(SQLALCHEMY_DATABASE_URI, echo=False) +# The 'check_same_thread': False argument is often necessary for SQLite with SQLAlchemy +# in multi-threaded environments (like web apps). +connection_args = {"check_same_thread": False} +# Construct the read-only engine +engine = create_engine( + SQLALCHEMY_DATABASE_URI, + connect_args=connection_args +) # Create database session Session = sessionmaker(bind=engine) From 75d549438392cd29f87dae26c5b579b969b93bda Mon Sep 17 00:00:00 2001 From: Ulthran Date: Mon, 26 Jan 2026 12:02:10 -0500 Subject: [PATCH 4/6] Reformat --- sample_registry/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sample_registry/__init__.py b/sample_registry/__init__.py index b9a30f8..92cca41 100644 --- a/sample_registry/__init__.py +++ b/sample_registry/__init__.py @@ -52,10 +52,7 @@ def sample_registry_version(): # in multi-threaded environments (like web apps). connection_args = {"check_same_thread": False} # Construct the read-only engine -engine = create_engine( - SQLALCHEMY_DATABASE_URI, - connect_args=connection_args -) +engine = create_engine(SQLALCHEMY_DATABASE_URI, connect_args=connection_args) # Create database session Session = sessionmaker(bind=engine) From b5c7b4fff5b32922a8d837726fa9797dd45f3056 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Mon, 26 Jan 2026 12:29:54 -0500 Subject: [PATCH 5/6] Implement copilot review suggestions --- README.md | 2 +- sample_registry/__init__.py | 8 ++------ sample_registry/app.py | 5 +++++ sample_registry/templates/archive.html | 2 +- sample_registry/templates/show_run.html | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1619144..f1df3de 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ python sample_registry/app.py How you want to deploy this will depend on your needs, facilities, and ability. We have it deployed by a Kubernetes cluster but you could also 1) just run it in development mode from a lab computer or 2) setup Nginx/Apache on a dedicated server or 3) run it serverlessly in the cloud (e.g. with [Zappa](https://github.com/zappa/Zappa) on AWS) or 4) do something else. There are lots of well documented examples of deploying Flask sites out there, look around and find what works best for you. -When running, it will default to using a SQLite3 database located in the root of this repository (automatically created if it doesn't already exist). You can change to use a different backend by setting the `SAMPLE_REGISTRY_DB_URI` environment variable before running the app. For example, another sqlite database could be specified with a URI like this: `export SAMPLE_REGISTRY_DB_URI=sqlite:////path/to/db.sqlite3`. +When running, it will default to using a SQLite3 database located in the root of this repository (automatically created if it doesn't already exist). You can change to use a different backend by setting the `SAMPLE_REGISTRY_DB_URI` environment variable before running the app. For example, another sqlite database could be specified with a URI like this: `export SAMPLE_REGISTRY_DB_URI=sqlite:////path/to/db.sqlite`. ## Using the library diff --git a/sample_registry/__init__.py b/sample_registry/__init__.py index 92cca41..62e061e 100644 --- a/sample_registry/__init__.py +++ b/sample_registry/__init__.py @@ -46,13 +46,9 @@ def sample_registry_version(): # Set SQLALCHEMY_DATABASE_URI to an in-memory SQLite database for testing SQLALCHEMY_DATABASE_URI = "sqlite:///:memory:" -SQLALCHEMY_DATABASE_URI = f"{SQLALCHEMY_DATABASE_URI.split('?')[0]}?mode=ro" + sys.stderr.write(f"Connecting to database at {SQLALCHEMY_DATABASE_URI}\n") -# The 'check_same_thread': False argument is often necessary for SQLite with SQLAlchemy -# in multi-threaded environments (like web apps). -connection_args = {"check_same_thread": False} -# Construct the read-only engine -engine = create_engine(SQLALCHEMY_DATABASE_URI, connect_args=connection_args) +engine = create_engine(SQLALCHEMY_DATABASE_URI) # Create database session Session = sessionmaker(bind=engine) diff --git a/sample_registry/app.py b/sample_registry/app.py index 591f189..0338ec7 100644 --- a/sample_registry/app.py +++ b/sample_registry/app.py @@ -36,8 +36,13 @@ # whatever production server you are using instead. It's ok to leave this in when running the dev server. app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) +# Sanitize and RO db connection +SQLALCHEMY_DATABASE_URI = f"{SQLALCHEMY_DATABASE_URI.split('?')[0]}?mode=ro" app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI print(SQLALCHEMY_DATABASE_URI) +# Ensure SQLite explicitly opens in read-only mode +app.config["SQLALCHEMY_ENGINE_OPTIONS"] = {"connect_args": {"uri": True}} + db = SQLAlchemy(model_class=Base) db.init_app(app) diff --git a/sample_registry/templates/archive.html b/sample_registry/templates/archive.html index 20d7b64..8462e4f 100644 --- a/sample_registry/templates/archive.html +++ b/sample_registry/templates/archive.html @@ -1,7 +1,7 @@ {% extends 'base.html' %} {% block head %} - + {% endblock %} {% block body %} diff --git a/sample_registry/templates/show_run.html b/sample_registry/templates/show_run.html index 9d6c82e..a092818 100644 --- a/sample_registry/templates/show_run.html +++ b/sample_registry/templates/show_run.html @@ -19,7 +19,7 @@

  • Date: {{ run.run_date }}
  • Lane: {{ run.lane }}
  • Platform: {{ run.machine_type }} {{ run.machine_kit }}
  • -
  • Data file: {{ run.data_uri }}
  • +
  • Data file: /mnt/isilon/microbiome/{{ run.data_uri }}
  • Export metadata for all samples:
    From 946da610927f4705eb26e7b94c3a647e43048267 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Mon, 26 Jan 2026 12:35:37 -0500 Subject: [PATCH 6/6] Reduce potential for overflow --- sample_registry/app.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sample_registry/app.py b/sample_registry/app.py index 0338ec7..966201d 100644 --- a/sample_registry/app.py +++ b/sample_registry/app.py @@ -42,7 +42,6 @@ print(SQLALCHEMY_DATABASE_URI) # Ensure SQLite explicitly opens in read-only mode app.config["SQLALCHEMY_ENGINE_OPTIONS"] = {"connect_args": {"uri": True}} - db = SQLAlchemy(model_class=Base) db.init_app(app) @@ -338,19 +337,23 @@ def _archive_size_for_run(run, warnings): def archive_sizes(): runs = db.session.query(Run).all() warnings = [] + max_warnings = 50 totals_by_month = defaultdict(int) for run in runs: month_label = _parsed_month(run.run_date) if not month_label: - warnings.append( - f"CMR{run.run_accession:06d}: Unable to parse run_date '{run.run_date}'" - ) + if len(warnings) < max_warnings: + warnings.append( + f"CMR{run.run_accession:06d}: Unable to parse run_date '{run.run_date}'" + ) + continue archive_size = _archive_size_for_run(run, warnings) + totals_by_month[month_label] += archive_size - if month_label: - totals_by_month[month_label] += archive_size + if len(warnings) >= max_warnings: + warnings.append("... Additional warnings truncated ...") by_month = [ {"month": month, "size_bytes": totals_by_month[month]}