From 0559db5b274994762bbc6f3220d274d36e2fba0b Mon Sep 17 00:00:00 2001 From: Sophia Maedler Date: Wed, 22 Oct 2025 16:00:48 +0000 Subject: [PATCH 1/4] fix UPath S3 access --- src/spatialdata/_core/spatialdata.py | 6 +++++- src/spatialdata/_io/_utils.py | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index bcd993d7..da583ced 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -62,6 +62,7 @@ Point_s = PointsModel() Table_s = TableModel() +import upath class SpatialData: """ @@ -586,8 +587,11 @@ def path(self) -> Path | None: def path(self, value: Path | None) -> None: if value is None or isinstance(value, str | Path): self._path = value + elif isinstance(value ,upath.implementations.cloud.S3Path): + self._path = value + else: - raise TypeError("Path must be `None`, a `str` or a `Path` object.") + raise TypeError("Path must be `None`, a `str` or a `Path` object, but is {}".format(type(value))) if not self.is_self_contained(): logger.info( diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index e1756d6b..54452fe4 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -35,6 +35,7 @@ from spatialdata.transformations.ngff.ngff_transformations import NgffBaseTransformation from spatialdata.transformations.transformations import BaseTransformation, _get_current_output_axes +from upath import UPath # suppress logger debug from ome_zarr with context manager @contextmanager @@ -333,12 +334,12 @@ def _backed_elements_contained_in_path(path: Path, object: SpatialData | Spatial If an object does not have a Dask computational graph, it will return an empty list. It is possible for a single SpatialElement to contain multiple files in their Dask computational graph. """ - if not isinstance(path, Path): + if not isinstance(path, Path | UPath): raise TypeError(f"Expected a Path object, got {type(path)}") return [_is_subfolder(parent=path, child=Path(fp)) for fp in get_dask_backing_files(object)] -def _is_subfolder(parent: Path, child: Path) -> bool: +def _is_subfolder(parent: Path | UPath, child: Path |UPath) -> bool: """ Check if a path is a subfolder of another path. @@ -357,13 +358,19 @@ def _is_subfolder(parent: Path, child: Path) -> bool: child = Path(child) if isinstance(parent, str): parent = Path(parent) - if not isinstance(parent, Path) or not isinstance(child, Path): + + if not isinstance(parent, Path|UPath) or not isinstance(child, Path|UPath): raise TypeError(f"Expected a Path object, got {type(parent)} and {type(child)}") - return child.resolve().is_relative_to(parent.resolve()) + + if isinstance(parent, UPath) and isinstance(child, UPath): + # if both are UPath, use the resolve method to check relative path + return child.relative_to(parent) + elif isinstance(parent, Path) and isinstance(child, Path): + return child.resolve().is_relative_to(parent.resolve()) def _is_element_self_contained( - element: DataArray | DataTree | DaskDataFrame | GeoDataFrame | AnnData, element_path: Path + element: DataArray | DataTree | DaskDataFrame | GeoDataFrame | AnnData, element_path: Path |UPath ) -> bool: if isinstance(element, DaskDataFrame): pass From 96e339e1bba96fb753c06700e1a2adc917c8c85e Mon Sep 17 00:00:00 2001 From: Sophia Maedler Date: Wed, 22 Oct 2025 16:02:43 +0000 Subject: [PATCH 2/4] cleanup code --- src/spatialdata/_core/spatialdata.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index da583ced..dca4294a 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -7,6 +7,7 @@ from collections.abc import Generator, Mapping from itertools import chain from pathlib import Path +from upath import UPath from typing import TYPE_CHECKING, Any, Literal import pandas as pd @@ -62,7 +63,6 @@ Point_s = PointsModel() Table_s = TableModel() -import upath class SpatialData: """ @@ -585,11 +585,8 @@ def path(self) -> Path | None: @path.setter def path(self, value: Path | None) -> None: - if value is None or isinstance(value, str | Path): + if value is None or isinstance(value, str | Path | UPath): self._path = value - elif isinstance(value ,upath.implementations.cloud.S3Path): - self._path = value - else: raise TypeError("Path must be `None`, a `str` or a `Path` object, but is {}".format(type(value))) From 300765c8daf3b6201351191f53973e6bdf5eb12a Mon Sep 17 00:00:00 2001 From: Sophia Maedler Date: Wed, 22 Oct 2025 16:10:41 +0000 Subject: [PATCH 3/4] fix pre-commit issues --- src/spatialdata/_core/spatialdata.py | 4 ++-- src/spatialdata/_io/_utils.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index dca4294a..17d7b120 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -7,7 +7,6 @@ from collections.abc import Generator, Mapping from itertools import chain from pathlib import Path -from upath import UPath from typing import TYPE_CHECKING, Any, Literal import pandas as pd @@ -19,6 +18,7 @@ from geopandas import GeoDataFrame from ome_zarr.io import parse_url from shapely import MultiPolygon, Polygon +from upath import UPath from xarray import DataArray, DataTree from spatialdata._core._elements import Images, Labels, Points, Shapes, Tables @@ -588,7 +588,7 @@ def path(self, value: Path | None) -> None: if value is None or isinstance(value, str | Path | UPath): self._path = value else: - raise TypeError("Path must be `None`, a `str` or a `Path` object, but is {}".format(type(value))) + raise TypeError(f"Path must be `None`, a `str` or a `Path` object, but is {type(value)}") if not self.is_self_contained(): logger.info( diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index 54452fe4..8849f1d9 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -35,7 +35,6 @@ from spatialdata.transformations.ngff.ngff_transformations import NgffBaseTransformation from spatialdata.transformations.transformations import BaseTransformation, _get_current_output_axes -from upath import UPath # suppress logger debug from ome_zarr with context manager @contextmanager @@ -339,7 +338,7 @@ def _backed_elements_contained_in_path(path: Path, object: SpatialData | Spatial return [_is_subfolder(parent=path, child=Path(fp)) for fp in get_dask_backing_files(object)] -def _is_subfolder(parent: Path | UPath, child: Path |UPath) -> bool: +def _is_subfolder(parent: Path | UPath, child: Path | UPath) -> bool: """ Check if a path is a subfolder of another path. @@ -359,18 +358,22 @@ def _is_subfolder(parent: Path | UPath, child: Path |UPath) -> bool: if isinstance(parent, str): parent = Path(parent) - if not isinstance(parent, Path|UPath) or not isinstance(child, Path|UPath): + if not isinstance(parent, Path | UPath) or not isinstance(child, Path | UPath): raise TypeError(f"Expected a Path object, got {type(parent)} and {type(child)}") - + if isinstance(parent, UPath) and isinstance(child, UPath): # if both are UPath, use the resolve method to check relative path - return child.relative_to(parent) - elif isinstance(parent, Path) and isinstance(child, Path): - return child.resolve().is_relative_to(parent.resolve()) + try: + child.relative_to(parent) # .resolve is not needed here, as UPath already resolves the path correctly + return True + except ValueError: + return False + + return child.resolve().is_relative_to(parent.resolve()) def _is_element_self_contained( - element: DataArray | DataTree | DaskDataFrame | GeoDataFrame | AnnData, element_path: Path |UPath + element: DataArray | DataTree | DaskDataFrame | GeoDataFrame | AnnData, element_path: Path | UPath ) -> bool: if isinstance(element, DaskDataFrame): pass From 5a5ec11a64f4caba57cd9af8a2d6c7b6b55f822e Mon Sep 17 00:00:00 2001 From: Sophia Maedler Date: Wed, 22 Oct 2025 16:36:40 +0000 Subject: [PATCH 4/4] improve logic --- src/spatialdata/_io/_utils.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index 8849f1d9..f633d272 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -361,15 +361,34 @@ def _is_subfolder(parent: Path | UPath, child: Path | UPath) -> bool: if not isinstance(parent, Path | UPath) or not isinstance(child, Path | UPath): raise TypeError(f"Expected a Path object, got {type(parent)} and {type(child)}") + # both UPath if isinstance(parent, UPath) and isinstance(child, UPath): - # if both are UPath, use the resolve method to check relative path try: - child.relative_to(parent) # .resolve is not needed here, as UPath already resolves the path correctly + child.relative_to(parent) return True except ValueError: return False - return child.resolve().is_relative_to(parent.resolve()) + # both pathlib + if isinstance(parent, Path) and isinstance(child, Path): + return child.resolve().is_relative_to(parent.resolve()) + + # mixed: only valid if both are local paths + if isinstance(parent, UPath) and isinstance(child, Path): + if getattr(parent, "protocol", None) in (None, "file"): + return child.resolve().is_relative_to(Path(parent).resolve()) + return False + + if isinstance(parent, Path) and isinstance(child, UPath): + if getattr(child, "protocol", None) in (None, "file"): + try: + UPath(child).relative_to(UPath(parent)) + return True + except ValueError: + return False + return False + + return False def _is_element_self_contained(