From a9a9c68e200ee20bce4a4b88b3532476011ba623 Mon Sep 17 00:00:00 2001 From: Charlie Date: Wed, 10 Dec 2025 13:10:26 -0500 Subject: [PATCH 1/3] Handle path inputs for ingest TSVs --- marc_db/ingest.py | 14 +++++++++++++- tests/test_ingest.py | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/marc_db/ingest.py b/marc_db/ingest.py index 7ffcda0..900b791 100644 --- a/marc_db/ingest.py +++ b/marc_db/ingest.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, Iterable, Optional, Tuple, Callable +from typing import Callable, Dict, Iterable, Optional, Tuple, Union import pandas as pd from sqlalchemy.orm import Session @@ -38,6 +38,12 @@ def _ensure_required_columns(df: pd.DataFrame, required: Iterable[str]): raise ValueError(f"Missing required column(s): {', '.join(missing)}") +def _load_dataframe(data: Optional[Union[pd.DataFrame, Path, str]]): + if data is None or isinstance(data, pd.DataFrame): + return data + return pd.read_csv(Path(data), sep="\t") + + def _ingest_isolates(df: pd.DataFrame, session: Session): isolate_cols = [ "SampleID", @@ -202,6 +208,12 @@ def ingest_from_tsvs( """ created_session = False + isolates = _load_dataframe(isolates) + assemblies = _load_dataframe(assemblies) + assembly_qcs = _load_dataframe(assembly_qcs) + taxonomic_assignments = _load_dataframe(taxonomic_assignments) + contaminants = _load_dataframe(contaminants) + antimicrobials = _load_dataframe(antimicrobials) if session is None: session = get_session() created_session = True diff --git a/tests/test_ingest.py b/tests/test_ingest.py index 015badd..4944f71 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -57,3 +57,19 @@ def test_conflicting_duplicate_rows(): session.close() engine.dispose() + + +def test_ingest_accepts_path_strings(): + engine = create_engine("sqlite:///:memory:") + Session = sessionmaker(bind=engine) + session = Session() + Base.metadata.create_all(engine) + + tsv_path = str(data_dir / "test_multi_aliquot.tsv") + ingest_from_tsvs(isolates=tsv_path, yes=True, session=session) + + assert len(get_isolates(session)) == 2 + assert len(get_aliquots(session)) == 5 + + session.close() + engine.dispose() From d68541a16511a9f6675863e950221913b4190b2b Mon Sep 17 00:00:00 2001 From: Charlie Date: Wed, 10 Dec 2025 13:17:13 -0500 Subject: [PATCH 2/3] Update marc_db/ingest.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- marc_db/ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marc_db/ingest.py b/marc_db/ingest.py index 900b791..eedb6f1 100644 --- a/marc_db/ingest.py +++ b/marc_db/ingest.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, Dict, Iterable, Optional, Tuple, Union +from typing import Callable, Dict, Iterable, Optional, Union import pandas as pd from sqlalchemy.orm import Session From 2d13463388a383cbd0fa9e8e0f45e869af73402a Mon Sep 17 00:00:00 2001 From: Charlie Date: Wed, 10 Dec 2025 13:17:22 -0500 Subject: [PATCH 3/3] Update marc_db/ingest.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- marc_db/ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marc_db/ingest.py b/marc_db/ingest.py index eedb6f1..4c444cd 100644 --- a/marc_db/ingest.py +++ b/marc_db/ingest.py @@ -38,7 +38,7 @@ def _ensure_required_columns(df: pd.DataFrame, required: Iterable[str]): raise ValueError(f"Missing required column(s): {', '.join(missing)}") -def _load_dataframe(data: Optional[Union[pd.DataFrame, Path, str]]): +def _load_dataframe(data: Optional[Union[pd.DataFrame, Path, str]]) -> Optional[pd.DataFrame]: if data is None or isinstance(data, pd.DataFrame): return data return pd.read_csv(Path(data), sep="\t")