diff --git a/.pylintrc b/.pylintrc index 1667c84..350da63 100644 --- a/.pylintrc +++ b/.pylintrc @@ -62,7 +62,7 @@ py-version=3.9 # When enabled, pylint would attempt to guess common misconfiguration and emit # user-friendly hints instead of false-positive error messages. -suggestion-mode=yes +# suggestion-mode=yes # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. diff --git a/AFMReader/io.py b/AFMReader/io.py index 74ada20..9113b62 100644 --- a/AFMReader/io.py +++ b/AFMReader/io.py @@ -5,6 +5,7 @@ from typing import BinaryIO import h5py +import numpy as np from loguru import logger from ruamel.yaml import YAML, YAMLError @@ -255,7 +256,14 @@ def unpack_hdf5(open_hdf5_file: h5py.File, group_path: str = "/") -> dict: # Decode byte strings to utf-8. The data type "O" is a byte string. elif isinstance(item, h5py.Dataset) and item.dtype == "O": # Byte string - data[key] = item[()].decode("utf-8") + try: + data[key] = item[()].decode("utf-8") + # Numpy arrays of strings can not be directly decoded, have to iterate over each item + except AttributeError as e: + if isinstance(item[()], np.ndarray): + data[key] = [_item.decode("utf-8") for _item in item[()]] # type: ignore + else: + raise e else: # Another type of dataset data[key] = item[()] diff --git a/AFMReader/topostats.py b/AFMReader/topostats.py index 8dc9148..9b3ac44 100644 --- a/AFMReader/topostats.py +++ b/AFMReader/topostats.py @@ -5,6 +5,7 @@ import h5py +from packaging.version import parse as parse_version from AFMReader.io import unpack_hdf5 from AFMReader.logging import logger @@ -41,10 +42,15 @@ def load_topostats(file_path: Path | str) -> dict[str, Any]: try: with h5py.File(file_path, "r") as f: data = unpack_hdf5(open_hdf5_file=f, group_path="/") - if str(data["topostats_file_version"]) >= "0.2": + # Handle different names for variables holding the file version (<=0.3) or the newer topostats version + version = ( + data["topostats_file_version"] + if "topostats_file_version" in data.keys() # pylint: disable=consider-iterating-dictionary + else data["topostats_version"] + ) + if parse_version(str(version)) > parse_version("0.2"): data["img_path"] = Path(data["img_path"]) - file_version = data["topostats_file_version"] - logger.info(f"[{filename}] TopoStats file version : {file_version}") + logger.info(f"[{filename}] TopoStats file version : {version}") except OSError as e: if "Unable to open file" in str(e): diff --git a/tests/test_io.py b/tests/test_io.py index 885628f..5d009e5 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -201,6 +201,46 @@ def test_unpack_hdf5_nested_dict_group_path(tmp_path: Path) -> None: np.testing.assert_equal(result, expected) +def test_unpack_hdf5_list_of_bytes(tmp_path: Path) -> None: + """Test loading a list of strings which are encoded to Numpy array on saving.""" + to_save = { + "config": { + "grainstats": { + "class_names": np.asarray([b"DNA", b"Protein"], dtype="S7"), + "edge_detection_method": "binary_erosion", + "extract_height_profile": True, + "run": True, + } + } + } + group_path = "/config/grainstats/" + expected = { + "class_names": np.asarray([b"DNA", b"Protein"], dtype="S7"), + "edge_detection_method": "binary_erosion", + "extract_height_profile": True, + "run": True, + } + # Manually save the dictionary to HDF5 format + with h5py.File(tmp_path / "hdf5_file_list_of_strings", "w") as f: + # t_path = Path.cwd() + # with h5py.File(t_path / "tmp" / "something_else", "w") as f: + config = f.create_group("config") + grainstats = config.create_group("grainstats") + grainstats.create_dataset("class_names", data=to_save["config"]["grainstats"]["class_names"]) + grainstats.create_dataset( + "edge_detection_method", data=to_save["config"]["grainstats"]["edge_detection_method"] + ) + grainstats.create_dataset( + "extract_height_profile", data=to_save["config"]["grainstats"]["extract_height_profile"] + ) + grainstats.create_dataset("run", data=to_save["config"]["grainstats"]["run"]) + + # Load it back in and check if the list is the same + with h5py.File(tmp_path / "hdf5_file_list_of_strings", "r") as f: + result = unpack_hdf5(open_hdf5_file=f, group_path=group_path) + np.testing.assert_equal(result, expected) + + def test_read_yaml() -> None: """Test reading of YAML file.""" sample_config = read_yaml(RESOURCES / "test.yaml") diff --git a/tests/test_topostats.py b/tests/test_topostats.py index 4a1dc60..66a7989 100644 --- a/tests/test_topostats.py +++ b/tests/test_topostats.py @@ -99,7 +99,7 @@ def test_load_topostats( assert topostats_data["pixel_to_nm_scaling"] == pytest.approx(pixel_to_nm_scaling) assert topostats_data["image"].shape == image_shape assert topostats_data["image"].sum() == pytest.approx(image_sum) - if version >= "0.2": + if version > "0.2": assert isinstance(topostats_data["img_path"], Path)