diff --git a/doc/apidoc.md b/doc/apidoc.md index 3874c52..90d8ce1 100644 --- a/doc/apidoc.md +++ b/doc/apidoc.md @@ -309,6 +309,18 @@ Returns: +### `map_dict` + +Map each item in the iterator using the given dictionary (supports defaultdict). + +Args: + mapper (dict[T], U]): The lookup to apply. + +Returns: + Itr[U]: An iterator of mapped items. + + + ### `map_while` Map each item in the iterator using the given function, while the predicate remains True. @@ -569,6 +581,53 @@ Returns: +### `tee` + + +Create multiple independent Itr wrappers that iterate over the same underlying iterator. NB Consuming any of the +returned iterators will consume the original iterator + +This method calls itertools.tee on the wrapped iterator and returns a tuple of Itr +objects, each wrapping one of the tee'd iterators. Each returned Itr yields the same +sequence of items and can be consumed independently of the others. + +Parameters +---------- +n : int, optional + Number of independent iterators to create (default: 2). Must be >= 1. + +Returns +------- +tuple[Itr[T], ...] + Tuple of length `n` containing the newly created Itr objects. + +Raises +------ +ValueError + If `n` is less than 1. + +Notes +----- +- The implementation uses itertools.tee; the tee'd iterators share internal buffers + that store items produced by the original iterator until all tees have consumed them. + If one or more returned iterators lag behind the others, buffered items will be + retained and memory usage can grow. +- After calling this method, avoid consuming the original wrapped iterator (`self._it`) + directly; use the returned Itr objects to prevent surprising interactions with the + shared buffer. +- Creating the tees is inexpensive, but the memory characteristics depend on how the + resulting iterators are consumed relative to each other. + +Examples +-------- +>>> i = Itr(range(3)) +>>> a, b = i.tee(2) +>>> list(a) +[0, 1, 2] +>>> list(b) +[0, 1, 2] + + ### `unzip` Splits the iterator of pairs into two separate iterators, each containing the elements from one position of diff --git a/doc/examples.ipynb b/doc/examples.ipynb index fb7c5ca..40574fe 100644 --- a/doc/examples.ipynb +++ b/doc/examples.ipynb @@ -16,12 +16,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "8b2541fb", "metadata": {}, "outputs": [], "source": [ "import contextlib\n", + "from collections import defaultdict\n", "from collections.abc import Generator, Iterator\n", "from typing import Self\n", "\n", @@ -30,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "fc89902b", "metadata": {}, "outputs": [], @@ -73,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "d9af20ad", "metadata": {}, "outputs": [], @@ -104,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "83d002b5", "metadata": {}, "outputs": [ @@ -132,7 +133,7 @@ "514229" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -154,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "99c944c1", "metadata": {}, "outputs": [ @@ -164,7 +165,7 @@ "(19, 58, 29, 88, 44, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1)" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -190,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "fd47677f", "metadata": {}, "outputs": [ @@ -250,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "8bcc535f", "metadata": {}, "outputs": [ @@ -286,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "4c33c79c", "metadata": {}, "outputs": [ @@ -324,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "id": "d9b906a6", "metadata": {}, "outputs": [ @@ -334,7 +335,7 @@ "(5, 16, 8, 4, 2, 1, 6, 3, 10, 5, 16, 8, 4, 2, 1)" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -353,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "id": "377cd2c0", "metadata": {}, "outputs": [ @@ -363,7 +364,7 @@ "((5, 6), (16, 3), (8, 10), (4, 5), (2, 16), (1, 8))" ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -383,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "id": "03e8e659", "metadata": {}, "outputs": [ @@ -393,7 +394,7 @@ "((5, 16, 8, 4, 2, 1), (6, 3, 10, 5, 16, 8))" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -413,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "id": "c3b77ea4", "metadata": {}, "outputs": [ @@ -423,7 +424,7 @@ "(8, 'abcde', 4, 'abcde', 2, 'abcde', 1)" ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -442,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "id": "ed21cfb9", "metadata": {}, "outputs": [ @@ -452,7 +453,7 @@ "(5, 1, 16, 2, 8, 4, 4, 8, 2, 16, 1, 5)" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -473,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "id": "53be0e6a", "metadata": {}, "outputs": [ @@ -483,7 +484,7 @@ "((1, 5), (2, 16), (3, 8), (4, 4), (5, 2), (6, 1))" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -507,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "id": "bb9a323a", "metadata": {}, "outputs": [ @@ -517,7 +518,7 @@ "(5, 8, 4)" ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "id": "87e4444d", "metadata": {}, "outputs": [ @@ -556,6 +557,88 @@ "Itr(fibonacci()).take(5).map(lambda i: i**0.5).for_each(print)" ] }, + { + "cell_type": "markdown", + "id": "95194cd9", + "metadata": {}, + "source": [ + "`map` can be easily used with methods and even properties, just qualify the method, and use the `fget` attribute for properties:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2199b97", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('method 0', 'method 1', 'method 2', 'method 3', 'method 4')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "('property 0', 'property 1', 'property 2', 'property 3', 'property 4')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class X:\n", + " def __init__(self, i: int) -> None:\n", + " self._i = i\n", + "\n", + " @property\n", + " def i(self) -> str:\n", + " return f\"property {self._i}\"\n", + "\n", + " def f(self) -> str:\n", + " return f\"method {self._i}\"\n", + "\n", + "\n", + "itr = Itr(X(i) for i in range(5)) # .map(X.f)\n", + "display(itr.copy().map(X.f).collect())\n", + "display(itr.map(X.i.fget).collect()) # type: ignore[attr-defined]" + ] + }, + { + "cell_type": "markdown", + "id": "95e2f8cd", + "metadata": {}, + "source": [ + "When it's more convenient to represent the mapping as a `dict` (or `defaultdict`) than a function, use the `map_dict` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1e6f6f75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 2, 0, 0, 0, 0)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lookup = defaultdict(int, {\"a\": 1, \"b\": 2})\n", + "\n", + "Itr(\"abcdef\").map_dict(lookup).collect()" + ] + }, { "cell_type": "markdown", "id": "61943275", @@ -630,7 +713,7 @@ "source": [ "display(Itr(fibonacci()).take(6).map(lambda n: (n,) * n).flatten().collect())\n", "\n", - "display(Itr(range(2,6)).flat_map(lambda i: collatz(i)).collect())" + "display(Itr(range(2, 6)).flat_map(lambda i: collatz(i)).collect())" ] }, { @@ -699,6 +782,44 @@ "next(it1), next(it0)" ] }, + { + "cell_type": "markdown", + "id": "07b9bc9d", + "metadata": {}, + "source": [ + "`tee` can be used to create multiple independent copies of the input iterator. But note that consuming *any* of the outputs consumes the original iterator." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ace74e50", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "original = Itr([1, 2, 3])\n", + "\n", + "a, b = original.tee(2)\n", + "\n", + "a.consume()\n", + "\n", + "with contextlib.suppress(StopIteration):\n", + " next(original)\n", + "\n", + "next(b)" + ] + }, { "cell_type": "markdown", "id": "fd54367a", @@ -1056,7 +1177,7 @@ ], "metadata": { "kernelspec": { - "display_name": "itrx", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -1070,7 +1191,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.0" + "version": "3.13.9" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 752ebf8..4edd479 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "itrx" -version = "0.2.0" +version = "0.2.1" description = "A chainable iterator adapter" readme = "README.md" authors = [ diff --git a/src/itrx/itr.py b/src/itrx/itr.py index 25cf521..5f6f22c 100644 --- a/src/itrx/itr.py +++ b/src/itrx/itr.py @@ -218,6 +218,7 @@ def flat_map[U](self, mapper: Callable[[T], Iterable[U]]) -> "Itr[U]": def gen() -> Iterable[U]: for elem in self: yield from mapper(elem) + return Itr(gen()) def flatten[U](self) -> "Itr[U]": @@ -358,6 +359,18 @@ def map[U](self, mapper: Callable[[T], U]) -> "Itr[U]": """ return Itr(map(mapper, self._it)) + def map_dict[U](self, mapper: dict[T, U]) -> "Itr[U]": + """Map each item in the iterator using the given dictionary (supports defaultdict). + + Args: + mapper (dict[T], U]): The lookup to apply. + + Returns: + Itr[U]: An iterator of mapped items. + + """ + return Itr(mapper[m] for m in self._it) + def map_while[U](self, predicate: Predicate[T], mapper: Callable[[T], U]) -> "Itr[U]": """Map each item in the iterator using the given function, while the predicate remains True. @@ -630,6 +643,53 @@ def take_while(self, predicate: Predicate[T]) -> "Itr[T]": """ return Itr(itertools.takewhile(predicate, self._it)) + def tee(self, n: int = 2) -> tuple["Itr[T]", ...]: + """ + Create multiple independent Itr wrappers that iterate over the same underlying iterator. NB Consuming any of the + returned iterators will consume the original iterator + + This method calls itertools.tee on the wrapped iterator and returns a tuple of Itr + objects, each wrapping one of the tee'd iterators. Each returned Itr yields the same + sequence of items and can be consumed independently of the others. + + Parameters + ---------- + n : int, optional + Number of independent iterators to create (default: 2). Must be >= 1. + + Returns + ------- + tuple[Itr[T], ...] + Tuple of length `n` containing the newly created Itr objects. + + Raises + ------ + ValueError + If `n` is less than 1. + + Notes + ----- + - The implementation uses itertools.tee; the tee'd iterators share internal buffers + that store items produced by the original iterator until all tees have consumed them. + If one or more returned iterators lag behind the others, buffered items will be + retained and memory usage can grow. + - After calling this method, avoid consuming the original wrapped iterator (`self._it`) + directly; use the returned Itr objects to prevent surprising interactions with the + shared buffer. + - Creating the tees is inexpensive, but the memory characteristics depend on how the + resulting iterators are consumed relative to each other. + + Examples + -------- + >>> i = Itr(range(3)) + >>> a, b = i.tee(2) + >>> list(a) + [0, 1, 2] + >>> list(b) + [0, 1, 2] + """ + return tuple(Itr(t) for t in itertools.tee(self._it, n)) + def unzip[U, V](self) -> tuple["Itr[U]", "Itr[V]"]: """Splits the iterator of pairs into two separate iterators, each containing the elements from one position of the pairs. diff --git a/src/test/test_combine_split.py b/src/test/test_combine_split.py index 203caeb..29e8683 100644 --- a/src/test/test_combine_split.py +++ b/src/test/test_combine_split.py @@ -368,3 +368,55 @@ def test_zip_with_different_types() -> None: b = ["a", "b", "c"] result = a.zip(b).collect(list) assert result == [(1, "a"), (2, "b"), (3, "c")] + + +def test_tee_default_two_returns_two_independent_itrs() -> None: + i = Itr([0, 1, 2]) + a, b = i.tee() # default n=2 + assert isinstance(a, Itr) + assert isinstance(b, Itr) + # Both should yield the full sequence independently + assert list(a) == [0, 1, 2] + assert list(b) == [0, 1, 2] + # but i is now consumed + with pytest.raises(StopIteration): + next(i) + + +def test_tee_with_n_creates_independent_copies_and_preserves_items() -> None: + i = Itr([1, 2, 3, 4]) + a, b, c = i.tee(3) + # a consumes one item + first = next(a) + assert first == 1 + # b and c should still be able to produce the whole sequence from the start + assert list(b) == [1, 2, 3, 4] + assert list(c) == [1, 2, 3, 4] + # a should continue from where it left off + assert list(a) == [2, 3, 4] + + +def test_tee_returns_correct_number_of_iterators() -> None: + i = Itr([10, 20]) + tees = i.tee(1) + assert isinstance(tees, tuple) + assert len(tees) == 1 + assert isinstance(tees[0], Itr) + assert list(tees[0]) == [10, 20] + + +def test_tee_raises_value_error_for_invalid_n() -> None: + i = Itr([1, 2, 3]) + # with pytest.raises(ValueError): + assert i.tee(0) == () + + +def test_tee_iterators_are_distinct_objects() -> None: + i = Itr([7, 8, 9]) + a, b = i.tee(2) + assert a is not b + assert a is not i + assert b is not i + # ensure both still yield the same items + assert list(a) == [7, 8, 9] + assert list(b) == [7, 8, 9] diff --git a/src/test/test_transform_filter.py b/src/test/test_transform_filter.py index ba7f5cc..98c1849 100644 --- a/src/test/test_transform_filter.py +++ b/src/test/test_transform_filter.py @@ -1,3 +1,4 @@ +from collections import defaultdict from operator import mul import pytest @@ -73,6 +74,30 @@ def test_map_while() -> None: assert it.collect() == (0, 1, 4, 9, 16) +def test_map_dict_basic() -> None: + mapper = {1: "a", 2: "b", 3: "c"} + assert Itr([1, 2, 3]).map_dict(mapper).collect() == ("a", "b", "c") + + +def test_map_dict_defaultdict_handles_missing_keys() -> None: + mapper = defaultdict(lambda: "x", {1: "a"}) + assert Itr([1, 2, 99]).map_dict(mapper).collect() == ("a", "x", "x") + # defaultdict should have created entries for missing keys when accessed + assert 2 in mapper and 99 in mapper + + +def test_map_dict_missing_key_raises_keyerror_for_plain_dict() -> None: + mapper = {1: "a"} + with pytest.raises(KeyError): + Itr([1, 2]).map_dict(mapper).collect() + + +def test_map_dict_empty_iterable_returns_empty_tuple() -> None: + mapper = {1: "a"} + empty: list[int] = [] + assert Itr(empty).map_dict(mapper).collect() == () + + def test_skip_while_some_skipped() -> None: it = Itr([1, 2, 3, 4, 5]).skip_while(lambda x: x < 3) assert it.collect() == (3, 4, 5)