From 099f9727f6f503cfaa766ec38978055aa9d25749 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 3 Jan 2026 20:40:28 -0800 Subject: [PATCH 1/4] switch element metadata to biocframe --- src/compressed_lists/base.py | 54 +++++++++++++++++++------- src/compressed_lists/biocframe_list.py | 5 +-- tests/test_base.py | 12 +++--- tests/test_comp_biocframe.py | 1 - 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/compressed_lists/base.py b/src/compressed_lists/base.py index f9983c5..de25376 100644 --- a/src/compressed_lists/base.py +++ b/src/compressed_lists/base.py @@ -4,6 +4,7 @@ from warnings import warn import biocutils as ut +from biocframe import BiocFrame import numpy as np from .partition import Partitioning @@ -13,6 +14,31 @@ __license__ = "MIT" +def is_pandas(x: Any) -> bool: + """Check if ``x`` is a :py:class:`~pandas.DataFrame`. + + Args: + x: + Any object. + + Returns: + True if ``x`` is a :py:class:`~pandas.DataFrame`. + """ + if hasattr(x, "dtypes"): + return True + + return False + + +def _sanitize_frame(frame, num_rows: int): + frame = frame if frame is not None else BiocFrame({}, number_of_rows=num_rows) + + if is_pandas(frame): + frame = BiocFrame.from_pandas(frame) + + return frame + + def _validate_data_and_partitions(unlist_data, partition): if len(unlist_data) != partition.nobj(): raise ValueError( @@ -33,7 +59,7 @@ def __init__( unlist_data: Any, partitioning: Partitioning, element_type: Any = None, - element_metadata: Optional[dict] = None, + element_metadata: Optional[BiocFrame] = None, metadata: Optional[Union[Dict[str, Any], ut.NamedList]] = None, _validate: bool = True, ): @@ -64,7 +90,7 @@ class for the type of elements. self._unlist_data = unlist_data self._partitioning = partitioning self._element_type = element_type - self._element_metadata = element_metadata or {} + self._element_metadata = _sanitize_frame(element_metadata, len(partitioning)) if _validate: _validate_data_and_partitions(self._unlist_data, self._partitioning) @@ -93,6 +119,7 @@ def __deepcopy__(self, memo=None, _nil=[]): element_type=_elem_type_copy, element_metadata=_elem_metadata_copy, metadata=_metadata_copy, + _validate=False, ) def __copy__(self): @@ -107,6 +134,7 @@ def __copy__(self): element_type=self._element_type, element_metadata=self._element_metadata, metadata=self._metadata, + _validate=False, ) def copy(self): @@ -150,8 +178,7 @@ def __repr__(self) -> str: _etype_name = self._element_type.__name__ output += ", element_type=" + _etype_name - if len(self._element_metadata) > 0: - output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata) + output += ", element_metadata=" + self._element_metadata.__repr__() if len(self._metadata) > 0: output += ", metadata=" + ut.print_truncated_dict(self._metadata) @@ -178,7 +205,7 @@ def __str__(self) -> str: output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n" - output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" + output += f"element_metadata({str(len(self._element_metadata))} rows): {ut.print_truncated_list(list(self._element_metadata.get_column_names()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" return output @@ -303,14 +330,14 @@ def unlist_data(self, unlist_data: Any): ######>> element metadata <<####### ################################### - def get_element_metadata(self) -> dict: + def get_element_metadata(self) -> BiocFrame: """ Returns: Dictionary of metadata for each element in this object. """ return self._element_metadata - def set_element_metadata(self, element_metadata: dict, in_place: bool = False) -> CompressedList: + def set_element_metadata(self, element_metadata: BiocFrame, in_place: bool = False) -> CompressedList: """Set new element metadata. Args: @@ -324,19 +351,20 @@ def set_element_metadata(self, element_metadata: dict, in_place: bool = False) - A modified ``CompressedList`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ - if not isinstance(element_metadata, dict): - raise TypeError(f"`element_metadata` must be a dictionary, provided {type(element_metadata)}.") + if not isinstance(element_metadata, BiocFrame): + raise TypeError(f"`element_metadata` must be a BiocFrame, provided {type(element_metadata)}.") + output = self._define_output(in_place) - output._element_metadata = element_metadata + output._element_metadata = _sanitize_frame(element_metadata, len(self._partitioning)) return output @property - def element_metadata(self) -> dict: + def element_metadata(self) -> BiocFrame: """Alias for :py:attr:`~get_element_metadata`.""" return self.get_element_metadata() @element_metadata.setter - def element_metadata(self, element_metadata: dict): + def element_metadata(self, element_metadata: BiocFrame): """Alias for :py:attr:`~set_element_metadata` with ``in_place = True``. As this mutates the original object, a warning is raised. @@ -576,7 +604,7 @@ def extract_subset(self, indices: Sequence[int]) -> CompressedList: new_data, new_partitioning, element_type=self._element_type, - element_metadata={k: v for k, v in self._element_metadata.items() if k in indices}, + element_metadata=self._element_metadata[indices,], metadata=self._metadata.copy(), ) diff --git a/src/compressed_lists/biocframe_list.py b/src/compressed_lists/biocframe_list.py index c0a4cce..0a63213 100644 --- a/src/compressed_lists/biocframe_list.py +++ b/src/compressed_lists/biocframe_list.py @@ -133,8 +133,7 @@ def __repr__(self) -> str: else self._element_type ) - if len(self._element_metadata) > 0: - output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata) + output += ", element_metadata=" + self._element_metadata.__repr__() if len(self._metadata) > 0: output += ", metadata=" + ut.print_truncated_dict(self._metadata) @@ -155,7 +154,7 @@ def __str__(self) -> str: output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n" - output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" + output += f"element_metadata({str(len(self._element_metadata))} rows): {ut.print_truncated_list(list(self._element_metadata.get_column_names()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" return output diff --git a/tests/test_base.py b/tests/test_base.py index e58b17c..164e519 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -4,6 +4,7 @@ import pytest from compressed_lists import CompressedList +from biocframe import BiocFrame __author__ = "Jayaram Kancherla" __copyright__ = "Jayaram Kancherla" @@ -95,14 +96,13 @@ def test_base_metadata(base_list): base_list.metadata = meta assert base_list.metadata == ut.NamedList.from_dict({"source": "test"}) - el_meta = {"info": "details"} + el_meta = BiocFrame({"score": [1, 2, 3]}) cl_el_meta = base_list.set_element_metadata(el_meta, in_place=False) - assert base_list.element_metadata == {} - assert cl_el_meta.element_metadata == {"info": "details"} + assert len(base_list.element_metadata) == 3 + assert cl_el_meta.element_metadata.get_column("score") == el_meta.get_column("score") - with pytest.warns(UserWarning, match="Setting property 'element_metadata'"): - base_list.element_metadata = el_meta - assert base_list.element_metadata == {"info": "details"} + with pytest.raises(Exception): + base_list.element_metadata = {"info": "details"} def test_base_copying(base_list): diff --git a/tests/test_comp_biocframe.py b/tests/test_comp_biocframe.py index 6504452..5849dbe 100644 --- a/tests/test_comp_biocframe.py +++ b/tests/test_comp_biocframe.py @@ -46,7 +46,6 @@ def test_bframe_typed_list_column(): def test_split_biocframe(frame_data): frame_data.set_column("groups", [0, 0, 1], in_place=True) - print(frame_data) clist = splitAsCompressedList(frame_data, groups_or_partitions=frame_data.get_column("groups")) assert isinstance(clist, CompressedSplitBiocFrameList) From 8b3ffc64de8c89c8078ef66bebff29562adab03b Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 3 Jan 2026 20:41:18 -0800 Subject: [PATCH 2/4] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17c9ac5..b492c5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,12 @@ # Changelog -## Version 0.4.0 - 0.4.3 +## Version 0.4.0 - 0.4.4 - Classes extend `BiocObject` from biocutils. `metadata` is a named list. - Update actions to run from 3.10-3.14 - Support empty compressed list objects of size `n`. - Implement combine generic for compressed lists. +- element metadata slot is a `BiocFrame`. ## Version 0.3.0 From 7a7bc52cb2a5ca94a482b4aa20446e41ca41f103 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 3 Jan 2026 20:43:39 -0800 Subject: [PATCH 3/4] fix imports in tests --- tests/test_comp_bool.py | 2 +- tests/test_comp_float.py | 2 +- tests/test_comp_int.py | 2 +- tests/test_comp_str.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_comp_bool.py b/tests/test_comp_bool.py index 45b1a70..2800885 100644 --- a/tests/test_comp_bool.py +++ b/tests/test_comp_bool.py @@ -1,5 +1,5 @@ import pytest -from biocutils.boolean_list import BooleanList +from biocutils import BooleanList from compressed_lists import CompressedBooleanList diff --git a/tests/test_comp_float.py b/tests/test_comp_float.py index 92ef037..6828d25 100644 --- a/tests/test_comp_float.py +++ b/tests/test_comp_float.py @@ -1,5 +1,5 @@ import pytest -from biocutils.float_list import FloatList +from biocutils import FloatList from compressed_lists import CompressedFloatList diff --git a/tests/test_comp_int.py b/tests/test_comp_int.py index 43692b1..e3f7777 100644 --- a/tests/test_comp_int.py +++ b/tests/test_comp_int.py @@ -1,6 +1,6 @@ import numpy as np import pytest -from biocutils.integer_list import IntegerList +from biocutils import IntegerList from compressed_lists import CompressedIntegerList, Partitioning diff --git a/tests/test_comp_str.py b/tests/test_comp_str.py index b9c9ad0..29790b3 100644 --- a/tests/test_comp_str.py +++ b/tests/test_comp_str.py @@ -1,5 +1,5 @@ import pytest -from biocutils.string_list import StringList +from biocutils import StringList from compressed_lists import CompressedStringList From 540688d445248daa5e76135db992dd6003374891 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 3 Jan 2026 20:44:40 -0800 Subject: [PATCH 4/4] bump packages --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 09da1cc..7088216 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,9 +49,9 @@ python_requires = >=3.9 # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - biocutils>=0.3.1 + biocutils>=0.3.3 numpy - biocframe>=0.7.1 + biocframe>=0.7.2 [options.packages.find]