From 2aae637e7335c3e8b6562536ce920b4f071bf6f8 Mon Sep 17 00:00:00 2001 From: 27bslash6 <2221076+27bslash6@users.noreply.github.com> Date: Tue, 13 Jan 2026 22:01:24 +1100 Subject: [PATCH 1/3] fix: lazy-load ArrowSerializer to avoid ImportError without pyarrow ArrowSerializer was unconditionally imported at module load time, causing ImportError for users who install cachekit without the [data] extra (pyarrow). Changes: - Remove top-level ArrowSerializer import - Add lazy loading via _get_arrow_serializer() helper - Use __getattr__ for lazy attribute access - Update SERIALIZER_REGISTRY to use None placeholder - Fix benchmark_serializers and get_serializer_info to handle lazy loading gracefully Users can now: - pip install cachekit and use default serializers - pip install cachekit[data] to enable ArrowSerializer Fixes #41 --- src/cachekit/serializers/__init__.py | 63 ++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/src/cachekit/serializers/__init__.py b/src/cachekit/serializers/__init__.py index 28a4a1c..ee55510 100644 --- a/src/cachekit/serializers/__init__.py +++ b/src/cachekit/serializers/__init__.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import logging from threading import Lock -from typing import Any +from typing import TYPE_CHECKING, Any from cachekit._rust_serializer import ByteStorage -from .arrow_serializer import ArrowSerializer from .auto_serializer import AutoSerializer from .base import ( SerializationError, @@ -16,8 +17,25 @@ from .orjson_serializer import OrjsonSerializer from .standard_serializer import StandardSerializer +if TYPE_CHECKING: + from .arrow_serializer import ArrowSerializer + logger = logging.getLogger(__name__) +# Lazy import for optional ArrowSerializer (requires pyarrow from [data] extra) +_ArrowSerializer: type | None = None + + +def _get_arrow_serializer() -> type: + """Lazy-load ArrowSerializer. Raises ImportError if pyarrow not installed.""" + global _ArrowSerializer + if _ArrowSerializer is None: + from .arrow_serializer import ArrowSerializer + + _ArrowSerializer = ArrowSerializer + return _ArrowSerializer + + # Validate ByteStorage works correctly test_storage = ByteStorage("msgpack") test_data = b"test validation data" @@ -36,7 +54,7 @@ "auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization) "default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches "std": StandardSerializer, # Explicit StandardSerializer alias - "arrow": ArrowSerializer, + "arrow": None, # Lazy-loaded: requires pyarrow from [data] extra "orjson": OrjsonSerializer, "encrypted": EncryptionWrapper, # AutoSerializer + AES-256-GCM encryption } @@ -96,8 +114,13 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali f"@cache(serializer=MySerializer())" ) + # Get serializer class (lazy-load arrow if needed) + if name == "arrow": + serializer_class = _get_arrow_serializer() + else: + serializer_class = SERIALIZER_REGISTRY[name] + # Instantiate with integrity checking configuration - serializer_class = SERIALIZER_REGISTRY[name] if name in ("default", "std", "auto", "arrow", "orjson"): # All core serializers use enable_integrity_checking parameter serializer = serializer_class(enable_integrity_checking=enable_integrity_checking) @@ -167,9 +190,9 @@ def get_available_serializers() -> dict[str, Any]: def benchmark_serializers() -> dict[str, Any]: """Get instantiated serializers for benchmarking.""" serializers = {} - for name, cls in get_available_serializers().items(): + for name in SERIALIZER_REGISTRY: try: - serializers[name] = cls() + serializers[name] = get_serializer(name) except Exception as e: logger.warning(f"Failed to instantiate {name} serializer: {e}") return serializers @@ -178,28 +201,42 @@ def benchmark_serializers() -> dict[str, Any]: def get_serializer_info() -> dict[str, dict[str, Any]]: """Get information about available serializers.""" info = {} - for name, cls in get_available_serializers().items(): + for name in SERIALIZER_REGISTRY: try: - instance = cls() + instance = get_serializer(name) info[name] = { - "class": cls.__name__, - "module": cls.__module__, + "class": type(instance).__name__, + "module": type(instance).__module__, "available": True, - "description": cls.__doc__ or "No description available", + "description": type(instance).__doc__ or "No description available", } # Add method info if available if hasattr(instance, "get_info"): info[name].update(instance.get_info()) + except ImportError as e: + info[name] = { + "class": "ArrowSerializer" if name == "arrow" else "Unknown", + "module": "cachekit.serializers.arrow_serializer", + "available": False, + "error": str(e), + } except Exception as e: info[name] = { - "class": cls.__name__, - "module": cls.__module__, + "class": "Unknown", + "module": "unknown", "available": False, "error": str(e), } return info +def __getattr__(name: str) -> Any: + """Lazy attribute access for optional ArrowSerializer.""" + if name == "ArrowSerializer": + return _get_arrow_serializer() + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + # Export the main interface __all__ = [ "ArrowSerializer", From c47dc36e8c66f7e3b381ffefae81bf63fa2c02be Mon Sep 17 00:00:00 2001 From: 27bslash6 <2221076+27bslash6@users.noreply.github.com> Date: Tue, 13 Jan 2026 22:15:42 +1100 Subject: [PATCH 2/3] test: add coverage for lazy ArrowSerializer loading - Add 14 unit tests for lazy loading mechanism - Test _get_arrow_serializer(), __getattr__, get_serializer('arrow') - Test benchmark_serializers() and get_serializer_info() with lazy loading - Add unit conftest to skip Redis isolation for pure unit tests - Include Windows platform.node() fix (cherry-picked from #43) --- src/cachekit/logging.py | 3 +- tests/unit/conftest.py | 20 ++++ tests/unit/test_serializer_lazy_loading.py | 131 +++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 tests/unit/conftest.py create mode 100644 tests/unit/test_serializer_lazy_loading.py diff --git a/src/cachekit/logging.py b/src/cachekit/logging.py index c8d8719..ad700c3 100644 --- a/src/cachekit/logging.py +++ b/src/cachekit/logging.py @@ -7,6 +7,7 @@ import json import logging import os +import platform import random import threading import time @@ -170,7 +171,7 @@ def __init__(self, name: str, mask_sensitive: bool = True): # Pre-computed values for performance self._sampling_threshold = int(SAMPLING_RATE * 100) - self._hostname = os.uname().nodename + self._hostname = platform.node() self._pid = os.getpid() # PII patterns to mask (pre-compiled for speed) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..624826e --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,20 @@ +"""Unit test configuration - no Redis required. + +Unit tests are fast, in-memory tests that don't require Redis. +This conftest overrides the root conftest's autouse fixture to +skip Redis setup for pure unit tests. +""" + +import pytest + + +@pytest.fixture(autouse=True) +def setup_di_for_redis_isolation(request): + """Override root conftest's Redis isolation for pure unit tests. + + Unit tests don't need Redis - they test in-memory functionality. + This fixture overrides the parent conftest's autouse fixture + by having the same name. + """ + # No-op for unit tests - just yield without Redis setup + yield diff --git a/tests/unit/test_serializer_lazy_loading.py b/tests/unit/test_serializer_lazy_loading.py new file mode 100644 index 0000000..25a5a72 --- /dev/null +++ b/tests/unit/test_serializer_lazy_loading.py @@ -0,0 +1,131 @@ +"""Unit tests for lazy loading of optional serializers. + +Tests the lazy import mechanism for ArrowSerializer which requires +the optional [data] extra (pyarrow). +""" + +from __future__ import annotations + +import pytest + +from cachekit.serializers import ( + SERIALIZER_REGISTRY, + _get_arrow_serializer, + benchmark_serializers, + get_available_serializers, + get_serializer, + get_serializer_info, +) +from cachekit.serializers.arrow_serializer import ArrowSerializer +from cachekit.serializers.base import SerializerProtocol + + +class TestLazyArrowSerializerLoading: + """Test lazy loading mechanism for ArrowSerializer.""" + + def test_registry_has_none_for_arrow(self): + """SERIALIZER_REGISTRY stores None for arrow (lazy placeholder).""" + assert "arrow" in SERIALIZER_REGISTRY + assert SERIALIZER_REGISTRY["arrow"] is None + + def test_get_arrow_serializer_returns_class(self): + """_get_arrow_serializer() returns ArrowSerializer class.""" + cls = _get_arrow_serializer() + assert cls is ArrowSerializer + + def test_get_arrow_serializer_caches_result(self): + """_get_arrow_serializer() caches the imported class.""" + cls1 = _get_arrow_serializer() + cls2 = _get_arrow_serializer() + assert cls1 is cls2 + + def test_get_serializer_arrow_returns_instance(self): + """get_serializer('arrow') returns ArrowSerializer instance.""" + serializer = get_serializer("arrow") + assert isinstance(serializer, ArrowSerializer) + assert isinstance(serializer, SerializerProtocol) + + def test_get_serializer_arrow_with_integrity_checking(self): + """get_serializer('arrow', enable_integrity_checking=False) works.""" + serializer = get_serializer("arrow", enable_integrity_checking=False) + assert isinstance(serializer, ArrowSerializer) + assert serializer.enable_integrity_checking is False + + def test_module_getattr_returns_arrow_serializer(self): + """Module __getattr__ returns ArrowSerializer for lazy access.""" + from cachekit import serializers + + # Access via module attribute (triggers __getattr__) + cls = serializers.ArrowSerializer + assert cls is ArrowSerializer + + def test_module_getattr_raises_for_unknown(self): + """Module __getattr__ raises AttributeError for unknown names.""" + from cachekit import serializers + + with pytest.raises(AttributeError, match="has no attribute"): + _ = serializers.NonExistentSerializer + + +class TestBenchmarkSerializersWithLazyLoading: + """Test benchmark_serializers handles lazy loading.""" + + def test_benchmark_serializers_includes_arrow(self): + """benchmark_serializers() successfully instantiates arrow.""" + serializers = benchmark_serializers() + assert "arrow" in serializers + assert isinstance(serializers["arrow"], ArrowSerializer) + + def test_benchmark_serializers_returns_available_serializers(self): + """benchmark_serializers() returns serializers that can be instantiated.""" + serializers = benchmark_serializers() + # Should have core serializers (encrypted needs master key, so excluded) + assert "auto" in serializers + assert "default" in serializers + assert "arrow" in serializers + assert "orjson" in serializers + # encrypted may be missing if no master key configured + + +class TestGetSerializerInfoWithLazyLoading: + """Test get_serializer_info handles lazy loading.""" + + def test_get_serializer_info_includes_arrow(self): + """get_serializer_info() includes arrow with availability info.""" + info = get_serializer_info() + assert "arrow" in info + assert info["arrow"]["available"] is True + assert info["arrow"]["class"] == "ArrowSerializer" + + def test_get_serializer_info_returns_all_serializers(self): + """get_serializer_info() returns info for all registered serializers.""" + info = get_serializer_info() + for name in SERIALIZER_REGISTRY: + assert name in info + assert "available" in info[name] + assert "class" in info[name] + + def test_get_serializer_info_includes_get_info_data(self): + """get_serializer_info() includes data from serializer.get_info() if available.""" + info = get_serializer_info() + # ArrowSerializer has get_info method + arrow_info = info["arrow"] + assert arrow_info["available"] is True + # get_info data should be merged in + assert "module" in arrow_info + + +class TestGetAvailableSerializers: + """Test get_available_serializers returns registry copy.""" + + def test_returns_registry_copy(self): + """get_available_serializers() returns a copy of the registry.""" + available = get_available_serializers() + assert available == SERIALIZER_REGISTRY + # Should be a copy, not the same object + assert available is not SERIALIZER_REGISTRY + + def test_arrow_is_none_in_registry(self): + """Arrow entry is None in the raw registry (lazy placeholder).""" + available = get_available_serializers() + assert available["arrow"] is None From 4a9f8cba1556b64d2b50b43295ad6879444ef6e1 Mon Sep 17 00:00:00 2001 From: 27bslash6 <2221076+27bslash6@users.noreply.github.com> Date: Tue, 13 Jan 2026 22:30:39 +1100 Subject: [PATCH 3/3] chore: fix CI issues (format, CVE, type check) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Format: whitespace in conftest.py - CVE: urllib3 2.6.1 → 2.6.3 (GHSA-g4mx-q9vg-27p4) - Type: add type:ignore for hasattr-guarded get_info() call --- src/cachekit/serializers/__init__.py | 2 +- tests/unit/conftest.py | 2 +- uv.lock | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cachekit/serializers/__init__.py b/src/cachekit/serializers/__init__.py index ee55510..9c30ba9 100644 --- a/src/cachekit/serializers/__init__.py +++ b/src/cachekit/serializers/__init__.py @@ -212,7 +212,7 @@ def get_serializer_info() -> dict[str, dict[str, Any]]: } # Add method info if available if hasattr(instance, "get_info"): - info[name].update(instance.get_info()) + info[name].update(instance.get_info()) # type: ignore[attr-defined] except ImportError as e: info[name] = { "class": "ArrowSerializer" if name == "arrow" else "Unknown", diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 624826e..f224fbc 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -11,7 +11,7 @@ @pytest.fixture(autouse=True) def setup_di_for_redis_isolation(request): """Override root conftest's Redis isolation for pure unit tests. - + Unit tests don't need Redis - they test in-memory functionality. This fixture overrides the parent conftest's autouse fixture by having the same name. diff --git a/uv.lock b/uv.lock index 1c85f77..466c988 100644 --- a/uv.lock +++ b/uv.lock @@ -235,7 +235,7 @@ filecache = [ [[package]] name = "cachekit" -version = "0.2.2" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "blake3" }, @@ -2369,11 +2369,11 @@ wheels = [ [[package]] name = "urllib3" -version = "2.6.1" +version = "2.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/1d/0f3a93cca1ac5e8287842ed4eebbd0f7a991315089b1a0b01c7788aa7b63/urllib3-2.6.1.tar.gz", hash = "sha256:5379eb6e1aba4088bae84f8242960017ec8d8e3decf30480b3a1abdaa9671a3f", size = 432678, upload-time = "2025-12-08T15:25:26.773Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/56/190ceb8cb10511b730b564fb1e0293fa468363dbad26145c34928a60cb0c/urllib3-2.6.1-py3-none-any.whl", hash = "sha256:e67d06fe947c36a7ca39f4994b08d73922d40e6cca949907be05efa6fd75110b", size = 131138, upload-time = "2025-12-08T15:25:25.51Z" }, + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] [[package]]