Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ py-version=3.9

# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes
# suggestion-mode=yes

# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
Expand Down
10 changes: 9 additions & 1 deletion AFMReader/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import BinaryIO

import h5py
import numpy as np
from loguru import logger
from ruamel.yaml import YAML, YAMLError

Expand Down Expand Up @@ -255,7 +256,14 @@ def unpack_hdf5(open_hdf5_file: h5py.File, group_path: str = "/") -> dict:
# Decode byte strings to utf-8. The data type "O" is a byte string.
elif isinstance(item, h5py.Dataset) and item.dtype == "O":
# Byte string
data[key] = item[()].decode("utf-8")
try:
data[key] = item[()].decode("utf-8")
# Numpy arrays of strings can not be directly decoded, have to iterate over each item
except AttributeError as e:
if isinstance(item[()], np.ndarray):
data[key] = [_item.decode("utf-8") for _item in item[()]] # type: ignore
else:
raise e
else:
# Another type of dataset
data[key] = item[()]
Expand Down
12 changes: 9 additions & 3 deletions AFMReader/topostats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import h5py

from packaging.version import parse as parse_version
from AFMReader.io import unpack_hdf5
from AFMReader.logging import logger

Expand Down Expand Up @@ -41,10 +42,15 @@ def load_topostats(file_path: Path | str) -> dict[str, Any]:
try:
with h5py.File(file_path, "r") as f:
data = unpack_hdf5(open_hdf5_file=f, group_path="/")
if str(data["topostats_file_version"]) >= "0.2":
# Handle different names for variables holding the file version (<=0.3) or the newer topostats version
version = (
data["topostats_file_version"]
if "topostats_file_version" in data.keys() # pylint: disable=consider-iterating-dictionary
else data["topostats_version"]
)
if parse_version(str(version)) > parse_version("0.2"):
data["img_path"] = Path(data["img_path"])
file_version = data["topostats_file_version"]
logger.info(f"[{filename}] TopoStats file version : {file_version}")
logger.info(f"[{filename}] TopoStats file version : {version}")

except OSError as e:
if "Unable to open file" in str(e):
Expand Down
40 changes: 40 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,46 @@ def test_unpack_hdf5_nested_dict_group_path(tmp_path: Path) -> None:
np.testing.assert_equal(result, expected)


def test_unpack_hdf5_list_of_bytes(tmp_path: Path) -> None:
"""Test loading a list of strings which are encoded to Numpy array on saving."""
to_save = {
"config": {
"grainstats": {
"class_names": np.asarray([b"DNA", b"Protein"], dtype="S7"),
"edge_detection_method": "binary_erosion",
"extract_height_profile": True,
"run": True,
}
}
}
group_path = "/config/grainstats/"
expected = {
"class_names": np.asarray([b"DNA", b"Protein"], dtype="S7"),
"edge_detection_method": "binary_erosion",
"extract_height_profile": True,
"run": True,
}
# Manually save the dictionary to HDF5 format
with h5py.File(tmp_path / "hdf5_file_list_of_strings", "w") as f:
# t_path = Path.cwd()
# with h5py.File(t_path / "tmp" / "something_else", "w") as f:
config = f.create_group("config")
grainstats = config.create_group("grainstats")
grainstats.create_dataset("class_names", data=to_save["config"]["grainstats"]["class_names"])
grainstats.create_dataset(
"edge_detection_method", data=to_save["config"]["grainstats"]["edge_detection_method"]
)
grainstats.create_dataset(
"extract_height_profile", data=to_save["config"]["grainstats"]["extract_height_profile"]
)
grainstats.create_dataset("run", data=to_save["config"]["grainstats"]["run"])

# Load it back in and check if the list is the same
with h5py.File(tmp_path / "hdf5_file_list_of_strings", "r") as f:
result = unpack_hdf5(open_hdf5_file=f, group_path=group_path)
np.testing.assert_equal(result, expected)


def test_read_yaml() -> None:
"""Test reading of YAML file."""
sample_config = read_yaml(RESOURCES / "test.yaml")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_topostats.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_load_topostats(
assert topostats_data["pixel_to_nm_scaling"] == pytest.approx(pixel_to_nm_scaling)
assert topostats_data["image"].shape == image_shape
assert topostats_data["image"].sum() == pytest.approx(image_sum)
if version >= "0.2":
if version > "0.2":
assert isinstance(topostats_data["img_path"], Path)


Expand Down