From c156e4c4eb2e25eba5f3c3bcece275422db44c1d Mon Sep 17 00:00:00 2001
From: jtschwar <jtschwar@gmail.com>
Date: Thu, 29 Jan 2026 13:35:21 -0800
Subject: [PATCH 1/4] add parser uri to readers, add function to download
 portal project for sta

---
 src/copick_utils/cli/download.py |  25 ++++
 src/copick_utils/io/portal.py    | 127 +++++++++++++++++++
 src/copick_utils/io/readers.py   | 210 ++++++++++++++++++++-----------
 src/copick_utils/io/writers.py   |  18 +--
 4 files changed, 299 insertions(+), 81 deletions(-)
 create mode 100644 src/copick_utils/cli/download.py
 create mode 100644 src/copick_utils/io/portal.py

diff --git a/src/copick_utils/cli/download.py b/src/copick_utils/cli/download.py
new file mode 100644
index 0000000..f15ece4
--- /dev/null
+++ b/src/copick_utils/cli/download.py
@@ -0,0 +1,25 @@
+import click
+
+@click.command(
+    context_settings={"show_default": True},
+    short_help="Download tilt series and alignments from the CryoET Data Portal.",
+    no_args_is_help=True,
+)
+
+@click.option(
+    '-d', '--dataset', 
+    required=True, type=str,
+    help='Dataset ID to download from the CryoET Data Portal.',
+)
+@click.option(
+    '-o', '--output',
+    required=True, default='.', type=str,
+    help='Output directory to save the downloaded files.',
+)
+
+def download(dataset: str, output: str):
+    download_project(dataset, output)
+
+def download_project(dataset: str, output: str):
+    import copick_utils.io.portal as portal
+    portal.download_aretomo_files(dataset, output)
\ No newline at end of file
diff --git a/src/copick_utils/io/portal.py b/src/copick_utils/io/portal.py
new file mode 100644
index 0000000..25c0840
--- /dev/null
+++ b/src/copick_utils/io/portal.py
@@ -0,0 +1,127 @@
+"""
+A minimal example using minimal libraries / imports to download relevant AreTomo files from the CryoET Data Portal. Downloads the corresponding files, using the run ID as the base filename.
+"""
+import multiprocessing, requests, argparse, os
+import pandas as pd
+import numpy as np
+import mdocfile, click
+
+import cryoet_data_portal as cdp
+import s3fs
+
+global_client = cdp.Client()
+
+def download_aretomo_files(dataset_id: int, output_dir: str):
+    print(f"Fetching tiltseries for dataset id {dataset_id}...", flush=True)
+    tiltseries_list: list[cdp.TiltSeries] = [tiltseries for run in cdp.Dataset.get_by_id(global_client, dataset_id).runs for tiltseries in run.tiltseries] # a bit slow for some reason, can take some time
+    tiltseries_run_ids_and_ts_ids = [(ts.run.id, ts.id) for ts in tiltseries_list]
+    print(f"Found {len(tiltseries_run_ids_and_ts_ids)} tiltseries for dataset id {dataset_id}. Starting downloads...", flush=True)
+    with multiprocessing.Pool(processes=8) as pool: # adjust number of processes as needed
+        for _ in pool.imap_unordered(_worker_download_aretomo_files_for_tiltseries, [(dataset_id, run_name, output_dir, tiltseries_id) for run_name, tiltseries_id in tiltseries_run_ids_and_ts_ids]):
+            pass
+            
+def _worker_download_aretomo_files_for_tiltseries(args):
+    dataset_id, run_name, output_dir, tiltseries_id = args
+    download_aretomo_files_for_tiltseries(dataset_id, run_name, output_dir, tiltseries_id)
+
+# note: this function assumes that there is only one tiltseries per run
+# note: the tiltseries name is equivlaent to the run name 
+# if tiltseries_id is provided, will be prioritized over dataset_id + run_name
+def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output_dir: str, tiltseries_id: int = None):
+
+    print(f"[{run_name}] Downloading AreTomo files for tiltseries id {tiltseries_id}...", flush=True)
+
+    client = cdp.Client()
+    s3 = s3fs.S3FileSystem(anon=True)
+    if not tiltseries_id:
+        all_tiltseries = cdp.TiltSeries.find(client, query_filters=[cdp.TiltSeries.run.dataset_id == dataset_id, cdp.TiltSeries.run.name == run_name])
+        if len(all_tiltseries) == 0:
+            raise ValueError(f"No tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
+        if len(all_tiltseries) > 1:
+            raise ValueError(f"Multiple tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
+        tiltseries = all_tiltseries[0]
+    else:
+        tiltseries = cdp.TiltSeries.get_by_id(client, tiltseries_id)
+
+    # get the s3 folder path and then glob for *.tlt / *.rawtlt files to download them, renaming the base to match the run id
+    s3_folder_path = tiltseries.s3_mrc_file.rsplit('/', 1)[0] + '/'
+    tlt_files = s3.glob(s3_folder_path + '*.tlt') + s3.glob(s3_folder_path + '*.rawtlt')
+    for tlt_file in tlt_files:
+        base_name = os.path.basename(tlt_file)
+        ext = os.path.splitext(base_name)[1]
+        dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
+        s3.get(tlt_file, dest_file)
+        print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
+
+    # do the same for "*CTF*.txt" files and "*ctf*.txt" files
+    ctf_files = s3.glob(s3_folder_path + '*CTF*.txt') + s3.glob(s3_folder_path + '*ctf*.txt')
+    if len(ctf_files) == 0:
+        print(f"WARNING: No CTF files found for tiltseries id {tiltseries.id}")
+    else:
+        ctf_file = ctf_files[0]
+        base_name = os.path.basename(ctf_file)
+        if len(ctf_files) > 1:
+            print(f"WARNING: Multiple CTF files found for tiltseries id {tiltseries.id}, using {base_name}")
+        ext = os.path.splitext(base_name)[1]
+        dest_file = os.path.join(output_dir, f"{tiltseries.run.id}_CTF.txt")       
+        s3.get(ctf_file, dest_file)
+        print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
+
+    # now find the corresponding alignment for this tiltseries and download the "*.aln" file
+    if len(tiltseries.alignments) == 0:
+        print(f"WARNING: No alignments found for tiltseries id {tiltseries.id}")
+    elif len(tiltseries.alignments) > 1:
+        print(f"WARNING: Multiple alignments found for tiltseries id {tiltseries.id}")
+    else:
+        alignment = tiltseries.alignments[0]
+        s3_alignment_folder_path = alignment.s3_alignment_metadata.rsplit('/', 1)[0] + '/'
+        aln_files = s3.glob(s3_alignment_folder_path + '*.aln')
+        if len(aln_files) == 0:
+            raise ValueError(f"No .aln files found for run name {tiltseries.run.name} and alignment id {alignment.id}")
+        aln_file = aln_files[0]
+        base_name = os.path.basename(aln_file)
+        if len(aln_files) > 1:
+            print(f"WARNING: Multiple .aln files found for run name {tiltseries.run.name}, using {base_name}")
+        ext = os.path.splitext(base_name)[1]
+        dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
+        s3.get(aln_file, dest_file)
+        print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
+
+    # now get the mdoc file from the Frames/ folder
+    frames = tiltseries.run.frames
+    if len(frames) == 0:
+        raise ValueError(f"No frames found for run name {tiltseries.run.name}")
+    frame = frames[0]
+    s3_frames_folder_path = frame.s3_frame_path.rsplit('/', 1)[0] + '/'
+    mdoc_files = s3.glob(s3_frames_folder_path + '*.mdoc')
+    if len(mdoc_files) == 0:
+        raise ValueError(f"No .mdoc files found for run name {tiltseries.run.name}")
+    mdoc_file = mdoc_files[0]
+    base_name = os.path.basename(mdoc_file)
+    if len(mdoc_files) > 1:
+        print(f"WARNING: Multiple .mdoc files found for run name {tiltseries.run.name}, using {base_name}")
+    ext = os.path.splitext(base_name)[1]
+    dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
+    s3.get(mdoc_file, dest_file)
+    print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
+
+    # download tiltseries mrc file
+    tiltseries_file = os.path.join(output_dir, f"{tiltseries.run.id}.mrc")
+    tiltseries_url = tiltseries.https_mrc_file
+    response = requests.get(tiltseries_url, stream=True)
+    response.raise_for_status()
+    with open(tiltseries_file, 'wb') as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            f.write(chunk)
+    print(f"[{tiltseries.run.id}] Downloaded tiltseries mrc file as {os.path.basename(tiltseries_file)}.", flush=True)
+
+    # create imod file for order list
+    mdoc = mdocfile.read(os.path.join(output_dir, f"{tiltseries.run.id}.mdoc"))
+    order_list = mdoc['TiltAngle']
+    imodpath = os.path.join(output_dir, f"{tiltseries.run.id}_Imod")
+    os.makedirs(imodpath, exist_ok=True)
+    number = np.arange(len(order_list)) + 1
+    
+    # save in csv with 'ImageNumber', 'TiltAngle' headers
+    df = pd.DataFrame({'ImageNumber': number, 'TiltAngle': order_list})
+    df.to_csv(os.path.join(imodpath, f"{tiltseries.run.id}_order_list.csv"), index=False)
diff --git a/src/copick_utils/io/readers.py b/src/copick_utils/io/readers.py
index 3de85f6..a7209e2 100644
--- a/src/copick_utils/io/readers.py
+++ b/src/copick_utils/io/readers.py
@@ -1,81 +1,131 @@
+from copick.util.uri import resolve_copick_objects
 import numpy as np
 
 
 def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: bool = False):
-    voxel_spacing_obj = run.get_voxel_spacing(voxel_size)
-
-    if voxel_spacing_obj is None:
-        # Query Avaiable Voxel Spacings
-        availableVoxelSpacings = [tomo.voxel_size for tomo in run.voxel_spacings]
-
-        # Report to the user which voxel spacings they can use
-        message = (
-            f"[Warning] No tomogram found for {run.name} with voxel size {voxel_size} and tomogram type {algorithm}"
-            f"Available spacings are: {', '.join(map(str, availableVoxelSpacings))}"
-        )
-        if raise_error:
-            raise ValueError(message)
-        else:
-            print(message)
-            return None
-
-    tomogram = voxel_spacing_obj.get_tomogram(algorithm)
-    if tomogram is None:
-        # Get available algorithms
-        availableAlgorithms = [tomo.tomo_type for tomo in run.get_voxel_spacing(voxel_size).tomograms]
-
-        # Report to the user which algorithms are available
-        message = (
-            f"[Warning] No tomogram found for {run.name} with voxel size {voxel_size} and tomogram type {algorithm}"
-            f"Available algorithms are: {', '.join(availableAlgorithms)}"
-        )
-        if raise_error:
-            raise ValueError(message)
-        else:
-            print(message)
-            return None
-
-    return tomogram.numpy()
-
-
-def segmentation(run, voxel_spacing: float, segmentation_name: str, session_id=None, user_id=None, raise_error=False):
-    seg = run.get_segmentations(
-        name=segmentation_name,
-        session_id=session_id,
-        user_id=user_id,
-        voxel_size=voxel_spacing,
-    )
-
-    # No Segmentations Are Available, Result in Error
-    if len(seg) == 0:
-        # Get all available segmentations with their metadata
-        available_segs = run.get_segmentations(voxel_size=voxel_spacing)
-        seg_info = [(s.name, s.user_id, s.session_id) for s in available_segs]
-
-        # Format the information for display
-        seg_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in seg_info]
-
-        message = (
-            f"\nNo segmentation found matching:\n"
-            f"  name: {segmentation_name}, user_id: {user_id}, session_id: {session_id}\n"
-            f"Available segmentations in {run.name} are:\n  " + "\n  ".join(seg_details)
-        )
-        if raise_error:
-            raise ValueError(message)
-        else:
-            print(message)
-            return None
-
-    # No Segmentations Are Available, Result in Error
-    if len(seg) > 1:
-        print(
-            f"[Warning] More Than 1 Segmentation is Available for the Query Information. "
-            f"Available Segmentations are: {seg} "
-            f"Defaulting to Loading: {seg[0]}\n",
+    """
+    Reads a tomogram from a Copick run.
+
+    Parameters:
+    -----------
+    run: copick.Run
+    voxel_size: float
+    algorithm: str
+    raise_error: bool
+
+    Returns:
+    --------
+    vol: np.ndarray - The tomogram.
+    """
+
+    # Get the tomogram from the Copick URI
+    try:
+        uri = f'{algorithm}@{voxel_size}'
+        vol = resolve_copick_objects(uri, run.root, 'tomogram', run_name = run.name)
+        return vol[0].numpy()
+    except: # Report which orbject is missing
+
+        # Try to resolve the tomogram using the Copick URI
+        voxel_spacing_obj = run.get_voxel_spacing(voxel_size)
+
+        if voxel_spacing_obj is None:
+            # Query Avaiable Voxel Spacings
+            availableVoxelSpacings = [tomo.voxel_size for tomo in run.voxel_spacings]
+
+            # Report to the user which voxel spacings they can use
+            message = (
+                f"[Warning] No tomogram found for {run.name} with voxel size {voxel_size} and tomogram type {algorithm}"
+                f"Available spacings are: {', '.join(map(str, availableVoxelSpacings))}"
+            )
+            if raise_error:
+                raise ValueError(message)
+            else:
+                print(message)
+                return None
+
+        tomogram = voxel_spacing_obj.get_tomogram(algorithm)
+        if tomogram is None:
+            # Get available algorithms
+            availableAlgorithms = [tomo.tomo_type for tomo in run.get_voxel_spacing(voxel_size).tomograms]
+
+            # Report to the user which algorithms are available
+            message = (
+                f"[Warning] No tomogram found for {run.name} with voxel size {voxel_size} and tomogram type {algorithm}"
+                f"Available algorithms are: {', '.join(availableAlgorithms)}"
+            )
+            if raise_error:
+                raise ValueError(message)
+            else:
+                print(message)
+                return None
+
+
+def segmentation(run, voxel_spacing: float, name: str, user_id=None,  session_id=None, raise_error=False):
+    """
+    Reads a segmentation from a Copick run.
+
+    Parameters:
+    -----------
+    run: copick.Run
+    voxel_spacing: float
+    name: str
+    user_id: str
+    session_id: str
+    raise_error: bool
+
+    Returns:
+    --------
+    seg: np.ndarray - The segmentation.
+    """
+
+    # Fill in the missing values with wildcards
+    if user_id is None: user_id = '*'
+    if session_id is None: session_id = '*'
+
+    # Try to resolve the segmentation using the Copick URI
+    try:
+        uri = f'{name}:{user_id}/{session_id}@{voxel_spacing}'
+        segs = resolve_copick_objects(uri, run.root, 'segmentation', run_name = run.name)
+        return segs[0].numpy()
+    except:
+        # If the query was unavailable, set the user_id and session_id to None
+        user_id, session_id = None, None
+
+        # Query Was Unavailable, Let's List Out All Available Segmentations
+        seg = run.get_segmentations(
+            name=name,
+            session_id=session_id,
+            user_id=user_id,
+            voxel_size=voxel_spacing,
         )
-    seg = seg[0]
 
-    return seg.numpy()
+        # No Segmentations Are Available, Result in Error
+        if len(seg) == 0:
+            # Get all available segmentations with their metadata
+            available_segs = run.get_segmentations(voxel_size=voxel_spacing)
+            seg_info = [(s.name, s.user_id, s.session_id) for s in available_segs]
+
+            # Format the information for display
+            seg_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in seg_info]
+
+            message = (
+                f"\nNo segmentation found matching:\n"
+                f"  name: {name}, user_id: {user_id}, session_id: {session_id}\n"
+                f"Available segmentations in {run.name} are:\n  " + "\n  ".join(seg_details)
+            )
+            if raise_error:
+                raise ValueError(message)
+            else:
+                print(message)
+                return None
+
+        # No Segmentations Are Available, Result in Error
+        if len(seg) > 1:
+            print(
+                f"[Warning] More Than 1 Segmentation is Available for the Query Information. "
+                f"Available Segmentations are: {seg} "
+                f"Defaulting to Loading: {seg[0]}\n",
+            )
 
 
 def coordinates(
@@ -86,6 +136,22 @@ def coordinates(
     voxel_size: float = 10,  # Voxel size of the tomogram, used for scaling the coordinates
     raise_error: bool = False,
 ):
+    """
+    Reads the coordinates of the picks from a Copick run.
+
+    Parameters:
+    -----------
+    run: copick.Run
+    name: str
+    user_id: str
+    session_id: str
+    voxel_size: float
+    raise_error: bool
+
+    Returns:
+    --------
+    coordinates: np.ndarray - The 3D coordinates of the picks in voxel space.
+    """
     # Retrieve the pick points associated with the specified object and user ID
     picks = run.get_picks(object_name=name, user_id=user_id, session_id=session_id)
 
diff --git a/src/copick_utils/io/writers.py b/src/copick_utils/io/writers.py
index 572525c..6c9877f 100644
--- a/src/copick_utils/io/writers.py
+++ b/src/copick_utils/io/writers.py
@@ -28,17 +28,17 @@ def tomogram(run, input_volume, voxel_size=10, algorithm="wbp"):
         voxel_spacing = run.new_voxel_spacing(voxel_size=voxel_size)
 
     # Check if We Need to Create a New Tomogram for Given Algorithm
-    tomogram = voxel_spacing.get_tomogram(algorithm)
-    if tomogram is None:
-        tomogram = voxel_spacing.new_tomogram(tomo_type=algorithm)
+    tomo = voxel_spacing.get_tomogram(algorithm)
+    if tomo is None:
+        tomo = voxel_spacing.new_tomogram(tomo_type=algorithm)
 
     # Write the tomogram data
-    tomogram.from_numpy(input_volume)
+    tomo.from_numpy(input_volume)
 
 
 def segmentation(
     run,
-    segmentation_volume,
+    seg_vol,
     user_id,
     name="segmentation",
     session_id="0",
@@ -52,7 +52,7 @@ def segmentation(
     -----------
     run : copick.Run
         The current Copick run object.
-    segmentation_volume : np.ndarray
+    seg_vol : np.ndarray
         The segmentation data to be written.
     user_id : str
         The ID of the user creating the segmentation.
@@ -76,7 +76,7 @@ def segmentation(
 
     # If no segmentation exists or no segmentation at the given voxel size, create a new one
     if len(segmentations) == 0 or any(seg.voxel_size != voxel_size for seg in segmentations):
-        segmentation = run.new_segmentation(
+        seg = run.new_segmentation(
             voxel_size=voxel_size,
             name=name,
             session_id=session_id,
@@ -85,7 +85,7 @@ def segmentation(
         )
     else:
         # Overwrite the current segmentation at the specified voxel size if it exists
-        segmentation = next(seg for seg in segmentations if seg.voxel_size == voxel_size)
+        seg = next(seg for seg in segmentations if seg.voxel_size == voxel_size)
 
     # Write the segmentation data
-    segmentation.from_numpy(segmentation_volume, dtype=np.uint8)
+    seg.from_numpy(seg_vol, dtype=np.uint8)

From 7a02e51e7c68de5b84a1acf5be22504188b87333 Mon Sep 17 00:00:00 2001
From: jtschwar <jtschwar@gmail.com>
Date: Fri, 30 Jan 2026 14:22:32 -0800
Subject: [PATCH 2/4] configure command to copick cli

---
 pyproject.toml                            |  4 ++++
 src/copick_utils/cli/download.py          |  7 ++++---
 src/copick_utils/cli/download_commands.py | 11 +++++++++++
 src/copick_utils/io/portal.py             |  4 ++--
 4 files changed, 21 insertions(+), 5 deletions(-)
 create mode 100644 src/copick_utils/cli/download_commands.py

diff --git a/pyproject.toml b/pyproject.toml
index 812fb86..8ea6305 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "trimesh",
     "manifold3d",
     "mapbox-earcut",
+    "mdocfile",
     "tqdm",
     "scikit-learn",
     "shapely",
@@ -87,6 +88,9 @@ clippicks = "copick_utils.cli.logical_commands:clippicks"
 picksin = "copick_utils.cli.logical_commands:picksin"
 picksout = "copick_utils.cli.logical_commands:picksout"
 
+[project.entry-points."copick.download.commands"]
+project = "copick_utils.cli.download_commands:project"
+
 [tool.hatch.version]
 path = "src/copick_utils/__init__.py"
 
diff --git a/src/copick_utils/cli/download.py b/src/copick_utils/cli/download.py
index f15ece4..3503762 100644
--- a/src/copick_utils/cli/download.py
+++ b/src/copick_utils/cli/download.py
@@ -5,7 +5,6 @@
     short_help="Download tilt series and alignments from the CryoET Data Portal.",
     no_args_is_help=True,
 )
-
 @click.option(
     '-d', '--dataset', 
     required=True, type=str,
@@ -16,8 +15,10 @@
     required=True, default='.', type=str,
     help='Output directory to save the downloaded files.',
 )
-
-def download(dataset: str, output: str):
+def project(dataset: str, output: str):
+    """
+    Download tilt series and alignments from the CryoET Data Portal for sub-tomogram averaging with py2rely.
+    """
     download_project(dataset, output)
 
 def download_project(dataset: str, output: str):
diff --git a/src/copick_utils/cli/download_commands.py b/src/copick_utils/cli/download_commands.py
new file mode 100644
index 0000000..d7c0792
--- /dev/null
+++ b/src/copick_utils/cli/download_commands.py
@@ -0,0 +1,11 @@
+"""CLI commands for downloading data from the CryoET Data Portal.
+
+This module imports all download commands from specialized files for better organization.
+"""
+
+from copick_utils.cli.download import project
+
+# All commands are now available for import by the main CLI
+__all__ = [
+    "project",
+]
\ No newline at end of file
diff --git a/src/copick_utils/io/portal.py b/src/copick_utils/io/portal.py
index 25c0840..64f65e6 100644
--- a/src/copick_utils/io/portal.py
+++ b/src/copick_utils/io/portal.py
@@ -1,10 +1,10 @@
 """
 A minimal example using minimal libraries / imports to download relevant AreTomo files from the CryoET Data Portal. Downloads the corresponding files, using the run ID as the base filename.
 """
-import multiprocessing, requests, argparse, os
+import multiprocessing, requests, os
 import pandas as pd
 import numpy as np
-import mdocfile, click
+import mdocfile
 
 import cryoet_data_portal as cdp
 import s3fs

From 812de6c3749f825b30ac31e9eda981908b93f4da Mon Sep 17 00:00:00 2001
From: jtschwar <jtschwar@gmail.com>
Date: Fri, 30 Jan 2026 14:28:44 -0800
Subject: [PATCH 3/4] linting + acknowledgements

---
 src/copick_utils/cli/download.py          | 22 ++++---
 src/copick_utils/cli/download_commands.py |  2 +-
 src/copick_utils/io/portal.py             | 72 +++++++++++++++--------
 src/copick_utils/io/readers.py            | 29 ++++-----
 4 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/src/copick_utils/cli/download.py b/src/copick_utils/cli/download.py
index 3503762..895e042 100644
--- a/src/copick_utils/cli/download.py
+++ b/src/copick_utils/cli/download.py
@@ -1,19 +1,25 @@
 import click
 
+
 @click.command(
     context_settings={"show_default": True},
     short_help="Download tilt series and alignments from the CryoET Data Portal.",
     no_args_is_help=True,
 )
 @click.option(
-    '-d', '--dataset', 
-    required=True, type=str,
-    help='Dataset ID to download from the CryoET Data Portal.',
+    "-d",
+    "--dataset",
+    required=True,
+    type=str,
+    help="Dataset ID to download from the CryoET Data Portal.",
 )
 @click.option(
-    '-o', '--output',
-    required=True, default='.', type=str,
-    help='Output directory to save the downloaded files.',
+    "-o",
+    "--output",
+    required=True,
+    default=".",
+    type=str,
+    help="Output directory to save the downloaded files.",
 )
 def project(dataset: str, output: str):
     """
@@ -21,6 +27,8 @@ def project(dataset: str, output: str):
     """
     download_project(dataset, output)
 
+
 def download_project(dataset: str, output: str):
     import copick_utils.io.portal as portal
-    portal.download_aretomo_files(dataset, output)
\ No newline at end of file
+
+    portal.download_aretomo_files(dataset, output)
diff --git a/src/copick_utils/cli/download_commands.py b/src/copick_utils/cli/download_commands.py
index d7c0792..a97525f 100644
--- a/src/copick_utils/cli/download_commands.py
+++ b/src/copick_utils/cli/download_commands.py
@@ -8,4 +8,4 @@
 # All commands are now available for import by the main CLI
 __all__ = [
     "project",
-]
\ No newline at end of file
+]
diff --git a/src/copick_utils/io/portal.py b/src/copick_utils/io/portal.py
index 64f65e6..8e96a37 100644
--- a/src/copick_utils/io/portal.py
+++ b/src/copick_utils/io/portal.py
@@ -1,40 +1,62 @@
 """
-A minimal example using minimal libraries / imports to download relevant AreTomo files from the CryoET Data Portal. Downloads the corresponding files, using the run ID as the base filename.
+A minimal example using minimal libraries / imports to download relevant AreTomo files
+from the CryoET Data Portal. Downloads the corresponding files, using the run ID as the
+base filename.
+
+Original implementation by Daniel Ji and Utz Ermel.
 """
-import multiprocessing, requests, os
-import pandas as pd
-import numpy as np
-import mdocfile
+import multiprocessing
+import os
 
 import cryoet_data_portal as cdp
+import mdocfile
+import numpy as np
+import pandas as pd
+import requests
 import s3fs
 
 global_client = cdp.Client()
 
+
 def download_aretomo_files(dataset_id: int, output_dir: str):
     print(f"Fetching tiltseries for dataset id {dataset_id}...", flush=True)
-    tiltseries_list: list[cdp.TiltSeries] = [tiltseries for run in cdp.Dataset.get_by_id(global_client, dataset_id).runs for tiltseries in run.tiltseries] # a bit slow for some reason, can take some time
+    tiltseries_list: list[cdp.TiltSeries] = [
+        tiltseries for run in cdp.Dataset.get_by_id(global_client, dataset_id).runs for tiltseries in run.tiltseries
+    ]  # a bit slow for some reason, can take some time
     tiltseries_run_ids_and_ts_ids = [(ts.run.id, ts.id) for ts in tiltseries_list]
-    print(f"Found {len(tiltseries_run_ids_and_ts_ids)} tiltseries for dataset id {dataset_id}. Starting downloads...", flush=True)
-    with multiprocessing.Pool(processes=8) as pool: # adjust number of processes as needed
-        for _ in pool.imap_unordered(_worker_download_aretomo_files_for_tiltseries, [(dataset_id, run_name, output_dir, tiltseries_id) for run_name, tiltseries_id in tiltseries_run_ids_and_ts_ids]):
+    print(
+        f"Found {len(tiltseries_run_ids_and_ts_ids)} tiltseries for dataset id {dataset_id}. Starting downloads...",
+        flush=True,
+    )
+    with multiprocessing.Pool(processes=8) as pool:  # adjust number of processes as needed
+        for _ in pool.imap_unordered(
+            _worker_download_aretomo_files_for_tiltseries,
+            [
+                (dataset_id, run_name, output_dir, tiltseries_id)
+                for run_name, tiltseries_id in tiltseries_run_ids_and_ts_ids
+            ],
+        ):
             pass
-            
+
+
 def _worker_download_aretomo_files_for_tiltseries(args):
     dataset_id, run_name, output_dir, tiltseries_id = args
     download_aretomo_files_for_tiltseries(dataset_id, run_name, output_dir, tiltseries_id)
 
+
 # note: this function assumes that there is only one tiltseries per run
-# note: the tiltseries name is equivlaent to the run name 
+# note: the tiltseries name is equivlaent to the run name
 # if tiltseries_id is provided, will be prioritized over dataset_id + run_name
 def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output_dir: str, tiltseries_id: int = None):
-
     print(f"[{run_name}] Downloading AreTomo files for tiltseries id {tiltseries_id}...", flush=True)
 
     client = cdp.Client()
     s3 = s3fs.S3FileSystem(anon=True)
     if not tiltseries_id:
-        all_tiltseries = cdp.TiltSeries.find(client, query_filters=[cdp.TiltSeries.run.dataset_id == dataset_id, cdp.TiltSeries.run.name == run_name])
+        all_tiltseries = cdp.TiltSeries.find(
+            client,
+            query_filters=[cdp.TiltSeries.run.dataset_id == dataset_id, cdp.TiltSeries.run.name == run_name],
+        )
         if len(all_tiltseries) == 0:
             raise ValueError(f"No tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
         if len(all_tiltseries) > 1:
@@ -44,8 +66,8 @@ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output
         tiltseries = cdp.TiltSeries.get_by_id(client, tiltseries_id)
 
     # get the s3 folder path and then glob for *.tlt / *.rawtlt files to download them, renaming the base to match the run id
-    s3_folder_path = tiltseries.s3_mrc_file.rsplit('/', 1)[0] + '/'
-    tlt_files = s3.glob(s3_folder_path + '*.tlt') + s3.glob(s3_folder_path + '*.rawtlt')
+    s3_folder_path = tiltseries.s3_mrc_file.rsplit("/", 1)[0] + "/"
+    tlt_files = s3.glob(s3_folder_path + "*.tlt") + s3.glob(s3_folder_path + "*.rawtlt")
     for tlt_file in tlt_files:
         base_name = os.path.basename(tlt_file)
         ext = os.path.splitext(base_name)[1]
@@ -54,7 +76,7 @@ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output
         print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
 
     # do the same for "*CTF*.txt" files and "*ctf*.txt" files
-    ctf_files = s3.glob(s3_folder_path + '*CTF*.txt') + s3.glob(s3_folder_path + '*ctf*.txt')
+    ctf_files = s3.glob(s3_folder_path + "*CTF*.txt") + s3.glob(s3_folder_path + "*ctf*.txt")
     if len(ctf_files) == 0:
         print(f"WARNING: No CTF files found for tiltseries id {tiltseries.id}")
     else:
@@ -63,7 +85,7 @@ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output
         if len(ctf_files) > 1:
             print(f"WARNING: Multiple CTF files found for tiltseries id {tiltseries.id}, using {base_name}")
         ext = os.path.splitext(base_name)[1]
-        dest_file = os.path.join(output_dir, f"{tiltseries.run.id}_CTF.txt")       
+        dest_file = os.path.join(output_dir, f"{tiltseries.run.id}_CTF.txt")
         s3.get(ctf_file, dest_file)
         print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
 
@@ -74,8 +96,8 @@ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output
         print(f"WARNING: Multiple alignments found for tiltseries id {tiltseries.id}")
     else:
         alignment = tiltseries.alignments[0]
-        s3_alignment_folder_path = alignment.s3_alignment_metadata.rsplit('/', 1)[0] + '/'
-        aln_files = s3.glob(s3_alignment_folder_path + '*.aln')
+        s3_alignment_folder_path = alignment.s3_alignment_metadata.rsplit("/", 1)[0] + "/"
+        aln_files = s3.glob(s3_alignment_folder_path + "*.aln")
         if len(aln_files) == 0:
             raise ValueError(f"No .aln files found for run name {tiltseries.run.name} and alignment id {alignment.id}")
         aln_file = aln_files[0]
@@ -92,8 +114,8 @@ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output
     if len(frames) == 0:
         raise ValueError(f"No frames found for run name {tiltseries.run.name}")
     frame = frames[0]
-    s3_frames_folder_path = frame.s3_frame_path.rsplit('/', 1)[0] + '/'
-    mdoc_files = s3.glob(s3_frames_folder_path + '*.mdoc')
+    s3_frames_folder_path = frame.s3_frame_path.rsplit("/", 1)[0] + "/"
+    mdoc_files = s3.glob(s3_frames_folder_path + "*.mdoc")
     if len(mdoc_files) == 0:
         raise ValueError(f"No .mdoc files found for run name {tiltseries.run.name}")
     mdoc_file = mdoc_files[0]
@@ -110,18 +132,18 @@ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output
     tiltseries_url = tiltseries.https_mrc_file
     response = requests.get(tiltseries_url, stream=True)
     response.raise_for_status()
-    with open(tiltseries_file, 'wb') as f:
+    with open(tiltseries_file, "wb") as f:
         for chunk in response.iter_content(chunk_size=8192):
             f.write(chunk)
     print(f"[{tiltseries.run.id}] Downloaded tiltseries mrc file as {os.path.basename(tiltseries_file)}.", flush=True)
 
     # create imod file for order list
     mdoc = mdocfile.read(os.path.join(output_dir, f"{tiltseries.run.id}.mdoc"))
-    order_list = mdoc['TiltAngle']
+    order_list = mdoc["TiltAngle"]
     imodpath = os.path.join(output_dir, f"{tiltseries.run.id}_Imod")
     os.makedirs(imodpath, exist_ok=True)
     number = np.arange(len(order_list)) + 1
-    
+
     # save in csv with 'ImageNumber', 'TiltAngle' headers
-    df = pd.DataFrame({'ImageNumber': number, 'TiltAngle': order_list})
+    df = pd.DataFrame({"ImageNumber": number, "TiltAngle": order_list})
     df.to_csv(os.path.join(imodpath, f"{tiltseries.run.id}_order_list.csv"), index=False)
diff --git a/src/copick_utils/io/readers.py b/src/copick_utils/io/readers.py
index a7209e2..b24efe7 100644
--- a/src/copick_utils/io/readers.py
+++ b/src/copick_utils/io/readers.py
@@ -1,5 +1,5 @@
-from copick.util.uri import resolve_copick_objects
 import numpy as np
+from copick.util.uri import resolve_copick_objects
 
 
 def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: bool = False):
@@ -20,11 +20,10 @@ def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: b
 
     # Get the tomogram from the Copick URI
     try:
-        uri = f'{algorithm}@{voxel_size}'
-        vol = resolve_copick_objects(uri, run.root, 'tomogram', run_name = run.name)
+        uri = f"{algorithm}@{voxel_size}"
+        vol = resolve_copick_objects(uri, run.root, "tomogram", run_name=run.name)
         return vol[0].numpy()
-    except: # Report which orbject is missing
-
+    except Exception as err:  # Report which orbject is missing
         # Try to resolve the tomogram using the Copick URI
         voxel_spacing_obj = run.get_voxel_spacing(voxel_size)
 
@@ -38,7 +37,7 @@ def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: b
                 f"Available spacings are: {', '.join(map(str, availableVoxelSpacings))}"
             )
             if raise_error:
-                raise ValueError(message)
+                raise ValueError(message) from err
             else:
                 print(message)
                 return None
@@ -54,13 +53,13 @@ def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: b
                 f"Available algorithms are: {', '.join(availableAlgorithms)}"
             )
             if raise_error:
-                raise ValueError(message)
+                raise ValueError(message) from err
             else:
                 print(message)
                 return None
 
 
-def segmentation(run, voxel_spacing: float, name: str, user_id=None,  session_id=None, raise_error=False):
+def segmentation(run, voxel_spacing: float, name: str, user_id=None, session_id=None, raise_error=False):
     """
     Reads a segmentation from a Copick run.
 
@@ -79,15 +78,17 @@ def segmentation(run, voxel_spacing: float, name: str, user_id=None,  session_id
     """
 
     # Fill in the missing values with wildcards
-    if user_id is None: user_id = '*'
-    if session_id is None: session_id = '*'
+    if user_id is None:
+        user_id = "*"
+    if session_id is None:
+        session_id = "*"
 
     # Try to resolve the segmentation using the Copick URI
     try:
-        uri = f'{name}:{user_id}/{session_id}@{voxel_spacing}'
-        segs = resolve_copick_objects(uri, run.root, 'segmentation', run_name = run.name)
+        uri = f"{name}:{user_id}/{session_id}@{voxel_spacing}"
+        segs = resolve_copick_objects(uri, run.root, "segmentation", run_name=run.name)
         return segs[0].numpy()
-    except:
+    except Exception as err:
         # If the query was unavailable, set the user_id and session_id to None
         user_id, session_id = None, None
 
@@ -114,7 +115,7 @@ def segmentation(run, voxel_spacing: float, name: str, user_id=None,  session_id
                 f"Available segmentations in {run.name} are:\n  " + "\n  ".join(seg_details)
             )
             if raise_error:
-                raise ValueError(message)
+                raise ValueError(message) from err
             else:
                 print(message)
                 return None

From bd0dd640beb903c15005273b6e55a094f45722a2 Mon Sep 17 00:00:00 2001
From: jtschwar <jtschwar@gmail.com>
Date: Fri, 30 Jan 2026 15:06:53 -0800
Subject: [PATCH 4/4] update flag

---
 src/copick_utils/cli/download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/copick_utils/cli/download.py b/src/copick_utils/cli/download.py
index 895e042..473fe2a 100644
--- a/src/copick_utils/cli/download.py
+++ b/src/copick_utils/cli/download.py
@@ -7,7 +7,7 @@
     no_args_is_help=True,
 )
 @click.option(
-    "-d",
+    "-ds",
     "--dataset",
     required=True,
     type=str,