From dcf7702a2f08656333e8dee33868136ee39301e3 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sat, 22 Nov 2025 10:21:00 +0000 Subject: [PATCH 1/3] fix flat_map + minor tweaks --- README.md | 7 +++---- doc/apidoc.md | 8 ++++---- pyproject.toml | 2 +- src/itrx/itr.py | 16 +++++++++------- src/test/test_aggregation.py | 7 +++++++ src/test/test_general.py | 7 ++++--- src/test/test_transform_filter.py | 4 ++-- 7 files changed, 30 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 821622d..ab02484 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ -# itrx - A Chainable Iterable Adaptor +# `itrx`: A Chainable Iterable Adaptor -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -![Python Version](https://img.shields.io/python/required-version-toml?tomlFilePath=https://raw.githubusercontent.com/virgesmith/itrx/refs/heads/main/pyproject.toml) ![PyPI - Version](https://img.shields.io/pypi/v/itrx) - +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/itrx) +![PyPI - License](https://img.shields.io/pypi/l/xenoform) `itrx` is a Python library that adapts iterators, iterables, and generators, providing a Rust-inspired `Iterator` trait experience with added Pythonic conveniences. It enables developers to build complex data processing pipelines with a fluent, chainable, and lazy API. In most cases, it simply wraps `itertools` and/or builtins in syntactic sugar. diff --git a/doc/apidoc.md b/doc/apidoc.md index 08c9651..3874c52 100644 --- a/doc/apidoc.md +++ b/doc/apidoc.md @@ -1,4 +1,4 @@ -# `Itr` v0.1.7 class documentation +# `Itr` v0.2.0 class documentation A generic iterator adaptor class inspired by Rust's Iterator trait, providing a composable API for functional-style iteration and transformation over Python iterables. ## Public methods @@ -183,13 +183,13 @@ Returns: ### `flat_map` -Flatten an iterable and map the results. Each item must itself be iterable. +Map each item to an iterable, then flatten one level. Args: - mapper (Callable[[U], V]): A function mapping each item to an iterable. + mapper (Callable[[T], Iterable[U]]): A function mapping each item to an iterable. Returns: - Itr[V]: An iterator over the mapped and flattened items. + Itr[U]: An iterator over the mapped and flattened items. diff --git a/pyproject.toml b/pyproject.toml index b319e5f..752ebf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "itrx" -version = "0.1.7" +version = "0.2.0" description = "A chainable iterator adapter" readme = "README.md" authors = [ diff --git a/src/itrx/itr.py b/src/itrx/itr.py index 4d0fa1b..befece8 100644 --- a/src/itrx/itr.py +++ b/src/itrx/itr.py @@ -25,7 +25,7 @@ def __init__(self, it: Iterable[T]) -> None: def __iter__(self) -> Iterator[T]: "Implement the iter method of the Iterator protocol" - return self._it + return self def __next__(self) -> T: "Implement the next method of the Iterator protocol" @@ -204,18 +204,18 @@ def find(self, predicate: Predicate[T]) -> T | None: """ return next(filter(predicate, self._it), None) - # TODO fix the type annotations - def flat_map[U, V](self, mapper: Callable[[U], V]) -> "Itr[V]": - """Flatten an iterable and map the results. Each item must itself be iterable. + def flat_map[U](self, mapper: Callable[[T], Iterable[U]]) -> "Itr[U]": + """Map each item to an iterable, then flatten one level. Args: - mapper (Callable[[U], V]): A function mapping each item to an iterable. + mapper (Callable[[T], Iterable[U]]): A function mapping each item to an iterable. Returns: - Itr[V]: An iterator over the mapped and flattened items. + Itr[U]: An iterator over the mapped and flattened items. """ - return self.flatten().map(mapper) + # map then flatten (map -> flatten) is the correct semantics for flat_map + return self.map(mapper).flatten() def flatten[U](self) -> "Itr[U]": """Flatten one level of nesting in the iterator. Each item must itself be iterable. @@ -523,6 +523,8 @@ def repeat(self, n: int) -> "Itr[T]": Note: This implementation creates `n` independent iterators using `itertools.tee`, which may be inefficient for large `n` or large input iterators. """ + if n == 1: + return self # this creates n iterators so may be inefficient return Itr(itertools.chain(*itertools.tee(self._it, n))) diff --git a/src/test/test_aggregation.py b/src/test/test_aggregation.py index 9441df6..6ae1f11 100644 --- a/src/test/test_aggregation.py +++ b/src/test/test_aggregation.py @@ -50,6 +50,13 @@ def test_last() -> None: assert it.last() == 3 +def test_last_exhausted() -> None: + it = Itr([1, 2, 3]) + it.consume() + with pytest.raises(ValueError): + it.last() + + def test_reduce() -> None: it = Itr([1, 2, 3, 4]) assert it.copy().reduce(lambda a, b: a + b) == 10 diff --git a/src/test/test_general.py b/src/test/test_general.py index 92d6f8b..b15726a 100644 --- a/src/test/test_general.py +++ b/src/test/test_general.py @@ -13,7 +13,7 @@ def test_itr_iter_protocol() -> None: assert hasattr(iterator, "__next__") # __next__ should yield items in order assert next(it) == 1 - assert next(it) == 2 + assert next(iterator) == 2 assert next(it) == 3 with pytest.raises(StopIteration): next(it) @@ -23,7 +23,7 @@ def test_itr_iter_and_next_independent() -> None: data = [10, 20] it = Itr(data) # __iter__ returns the underlying iterator, so iter(it) is the same as it._it - assert iter(it) is it._it + assert iter(it) is it # __next__ advances the iterator assert it.__next__() == 10 assert it.__next__() == 20 @@ -45,7 +45,8 @@ def gen() -> Iterator[int]: assert result is None assert side == [10, 20, 30] # iterator should now be exhausted - assert itr.collect() == () + with pytest.raises(StopIteration): + next(itr) def test_consume_consumes_remaining_only() -> None: diff --git a/src/test/test_transform_filter.py b/src/test/test_transform_filter.py index 2fbcf1d..16f162d 100644 --- a/src/test/test_transform_filter.py +++ b/src/test/test_transform_filter.py @@ -45,8 +45,8 @@ def test_flatten() -> None: def test_flat_map() -> None: - it: Itr[int] = Itr([[1, 2], [3]]).flat_map(lambda x: x * 10) # type: ignore[operator] - assert it.collect() == (10, 20, 30) + it: Itr[int] = Itr([1, 2, 3]).flat_map(lambda n: [n] * n) + assert it.collect() == (1, 2, 2, 3, 3, 3) def test_map() -> None: From 70609dc6f7ca6bb07f3d1b74fe2eff6e42cf9150 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sat, 22 Nov 2025 12:26:34 +0000 Subject: [PATCH 2/3] implement flat_map directly, add tests --- src/itrx/itr.py | 7 +++++-- src/test/test_transform_filter.py | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/itrx/itr.py b/src/itrx/itr.py index befece8..25cf521 100644 --- a/src/itrx/itr.py +++ b/src/itrx/itr.py @@ -214,8 +214,11 @@ def flat_map[U](self, mapper: Callable[[T], Iterable[U]]) -> "Itr[U]": Itr[U]: An iterator over the mapped and flattened items. """ - # map then flatten (map -> flatten) is the correct semantics for flat_map - return self.map(mapper).flatten() + + def gen() -> Iterable[U]: + for elem in self: + yield from mapper(elem) + return Itr(gen()) def flatten[U](self) -> "Itr[U]": """Flatten one level of nesting in the iterator. Each item must itself be iterable. diff --git a/src/test/test_transform_filter.py b/src/test/test_transform_filter.py index 16f162d..ba7f5cc 100644 --- a/src/test/test_transform_filter.py +++ b/src/test/test_transform_filter.py @@ -45,9 +45,23 @@ def test_flatten() -> None: def test_flat_map() -> None: - it: Itr[int] = Itr([1, 2, 3]).flat_map(lambda n: [n] * n) + it = Itr([1, 2, 3]).flat_map(lambda n: [n] * n) assert it.collect() == (1, 2, 2, 3, 3, 3) + it = Itr([1, 2, 3]).flat_map(lambda n: range(n)) + assert it.collect() == (0, 0, 1, 0, 1, 2) + + +def test_flat_map_empty() -> None: + it: Itr[int] = Itr([]).flat_map(lambda n: [n] * n) + assert it.collect() == () + + +def test_flat_map_invalid_mapper() -> None: + # mapper must return an iterable + with pytest.raises(TypeError): + Itr([1, 2, 3]).flat_map(lambda n: n * 2).collect() # type: ignore[arg-type, return-value] + def test_map() -> None: it = Itr([1, 2, 3]).map(lambda x: x * 2) From de559b09d1363a2d7604326b15a8f9aeb1ea7470 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sat, 22 Nov 2025 12:43:08 +0000 Subject: [PATCH 3/3] ... --- doc/examples.ipynb | 104 ++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/doc/examples.ipynb b/doc/examples.ipynb index 0532352..fb7c5ca 100644 --- a/doc/examples.ipynb +++ b/doc/examples.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "8b2541fb", "metadata": {}, "outputs": [], @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "fc89902b", "metadata": {}, "outputs": [], @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "d9af20ad", "metadata": {}, "outputs": [], @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "83d002b5", "metadata": {}, "outputs": [ @@ -132,7 +132,7 @@ "514229" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -154,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "99c944c1", "metadata": {}, "outputs": [ @@ -164,7 +164,7 @@ "(19, 58, 29, 88, 44, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1)" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -190,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "fd47677f", "metadata": {}, "outputs": [ @@ -250,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "8bcc535f", "metadata": {}, "outputs": [ @@ -286,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "4c33c79c", "metadata": {}, "outputs": [ @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "d9b906a6", "metadata": {}, "outputs": [ @@ -334,7 +334,7 @@ "(5, 16, 8, 4, 2, 1, 6, 3, 10, 5, 16, 8, 4, 2, 1)" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "377cd2c0", "metadata": {}, "outputs": [ @@ -363,7 +363,7 @@ "((5, 6), (16, 3), (8, 10), (4, 5), (2, 16), (1, 8))" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -383,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "03e8e659", "metadata": {}, "outputs": [ @@ -393,7 +393,7 @@ "((5, 16, 8, 4, 2, 1), (6, 3, 10, 5, 16, 8))" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -413,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "c3b77ea4", "metadata": {}, "outputs": [ @@ -423,7 +423,7 @@ "(8, 'abcde', 4, 'abcde', 2, 'abcde', 1)" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -442,7 +442,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "ed21cfb9", "metadata": {}, "outputs": [ @@ -452,7 +452,7 @@ "(5, 1, 16, 2, 8, 4, 4, 8, 2, 16, 1, 5)" ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -473,7 +473,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "53be0e6a", "metadata": {}, "outputs": [ @@ -483,7 +483,7 @@ "((1, 5), (2, 16), (3, 8), (4, 4), (5, 2), (6, 1))" ] }, - "execution_count": 14, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -507,7 +507,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "bb9a323a", "metadata": {}, "outputs": [ @@ -517,7 +517,7 @@ "(5, 8, 4)" ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "87e4444d", "metadata": {}, "outputs": [ @@ -566,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "6c9cb5c1", "metadata": {}, "outputs": [ @@ -576,7 +576,7 @@ "(5, 17, 9, 6, 5, 6)" ] }, - "execution_count": 17, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -604,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "9d9bed75", "metadata": {}, "outputs": [ @@ -620,7 +620,7 @@ { "data": { "text/plain": [ - "(1, 1, 4, 4, 9, 9, 9, 25, 25, 25, 25, 25)" + "(2, 1, 3, 10, 5, 16, 8, 4, 2, 1, 4, 2, 1, 5, 16, 8, 4, 2, 1)" ] }, "metadata": {}, @@ -630,7 +630,7 @@ "source": [ "display(Itr(fibonacci()).take(6).map(lambda n: (n,) * n).flatten().collect())\n", "\n", - "display(Itr(fibonacci()).take(6).map(lambda n: (n,) * n).flat_map(lambda i: i * i).collect())" + "display(Itr(range(2,6)).flat_map(lambda i: collatz(i)).collect())" ] }, { @@ -643,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 25, "id": "ad83ad71", "metadata": {}, "outputs": [ @@ -655,7 +655,7 @@ " ())" ] }, - "execution_count": 19, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -677,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 26, "id": "b52225da", "metadata": {}, "outputs": [ @@ -687,7 +687,7 @@ "(10, 5)" ] }, - "execution_count": 20, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -709,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 27, "id": "7fcc7a00", "metadata": {}, "outputs": [ @@ -722,7 +722,7 @@ " (4, 2, 1))" ] }, - "execution_count": 21, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -742,7 +742,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 28, "id": "6f2d5115", "metadata": {}, "outputs": [ @@ -752,7 +752,7 @@ "((10, 5, 16), (5, 16, 8), (16, 8, 4), (8, 4, 2), (4, 2, 1))" ] }, - "execution_count": 22, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -771,7 +771,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 29, "id": "cce14801", "metadata": {}, "outputs": [ @@ -782,7 +782,7 @@ " 2: (29, 44, 11, 17, 26, 20, 5, 8, 2)}" ] }, - "execution_count": 23, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -807,7 +807,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 30, "id": "2ba56157", "metadata": {}, "outputs": [ @@ -817,7 +817,7 @@ "(46, 46)" ] }, - "execution_count": 24, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -846,7 +846,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 31, "id": "fab7b5c0", "metadata": {}, "outputs": [ @@ -892,7 +892,7 @@ "8" ] }, - "execution_count": 25, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -923,7 +923,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 32, "id": "2ac1c105", "metadata": {}, "outputs": [ @@ -942,7 +942,7 @@ "(19, 58, 29)" ] }, - "execution_count": 26, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -962,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 33, "id": "3be2d7d0", "metadata": {}, "outputs": [ @@ -972,7 +972,7 @@ "(5, 16, 8, 4, 2, 1, 5, 16, 8, 4, 2, 1)" ] }, - "execution_count": 27, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -994,7 +994,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 34, "id": "dac3efaa", "metadata": {}, "outputs": [ @@ -1004,7 +1004,7 @@ "(5, 16, 8, 4, 2, 1, 5, 16, 8, 4, 2, 1, 5, 16, 8, 4, 2, 1)" ] }, - "execution_count": 28, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1023,7 +1023,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 35, "id": "1be027d4", "metadata": {}, "outputs": [ @@ -1044,7 +1044,7 @@ " (1, 1))" ] }, - "execution_count": 29, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" }