diff --git a/.claude/quality.md b/.claude/quality.md new file mode 100644 index 00000000..b7de0e05 --- /dev/null +++ b/.claude/quality.md @@ -0,0 +1,280 @@ +# Repository Quality Scoring + +**Repository**: Rhiza +**Assessment Date**: 2026-01-18 +**Version Analyzed**: 0.6.0 +**Overall Score**: 9.3/10 + +--- + +## Score Summary + +| Category | Score | Weight | Weighted | +|----------|-------|--------|----------| +| Code Quality | 9/10 | 10% | 0.90 | +| Testing | 10/10 | 15% | 1.50 | +| Documentation | 9/10 | 10% | 0.90 | +| CI/CD | 10/10 | 15% | 1.50 | +| Security | 9/10 | 10% | 0.90 | +| Architecture | 9/10 | 10% | 0.90 | +| Dependency Management | 10/10 | 10% | 1.00 | +| Developer Experience | 9/10 | 10% | 0.90 | +| Maintainability | 9/10 | 5% | 0.45 | +| Shell Scripts | 9/10 | 5% | 0.45 | +| **Overall** | **9.3/10** | 100% | **9.40** | + +**Quality Tier**: Enterprise-Grade / Production-Ready + +--- + +## Detailed Assessment + +### 1. Code Quality: 9/10 + +**Strengths**: +- Comprehensive Ruff configuration with 15+ rule sets (D, E, F, I, N, W, UP, B, C4, PT, RUF, TRY, ICN) +- Google-style docstrings enforced via pydocstyle rules +- Strong type annotations in Python utilities with `from __future__ import annotations` +- 120-character line length with consistent formatting +- Modern Python syntax enforced via pyupgrade rules +- Import sorting via isort integration +- PEP 8 naming conventions enforced + +**Weaknesses**: +- Security (S) and complexity (SIM) rule sets intentionally disabled +- Broad per-file exceptions for tests and notebooks + +--- + +### 2. Testing: 10/10 + +**Strengths**: +- 15 dedicated test files with 120+ test functions +- Multiple test types: unit, integration, doctest, README code execution +- Sophisticated fixtures in conftest.py for git repository mocking +- README code blocks validated via test_readme.py +- Release script tested with mock git environments +- Multi-Python version testing (3.11, 3.12, 3.13, 3.14) +- Coverage tracking with enforcement threshold +- Benchmark regression detection via pytest-benchmark + +**Weaknesses**: +- No property-based testing (hypothesis) +- No load/stress testing + +--- + +### 3. Documentation: 9/10 + +**Strengths**: +- Comprehensive README.md (17KB) with quick start, features, integration guide +- Architecture documentation with Mermaid diagrams (docs/ARCHITECTURE.md) +- Glossary of terms (docs/GLOSSARY.md) +- Quick reference card (docs/QUICK_REFERENCE.md) +- Customization guide (docs/CUSTOMIZATION.md) +- Release process guide (docs/RELEASING.md) +- Security policy (SECURITY.md) +- Contributing guidelines (CONTRIBUTING.md) +- Code of conduct (CODE_OF_CONDUCT.md) +- Auto-generated API docs via pdoc +- Interactive Marimo notebooks + +**Weaknesses**: +- Some scripts have minimal inline comments +- No external documentation hosting (ReadTheDocs/Sphinx) + +--- + +### 4. CI/CD: 10/10 + +**Strengths**: +- 14 GitHub Actions workflows covering all development phases: + - `rhiza_ci.yml` - Multi-Python testing with dynamic matrix + - `rhiza_mypy.yml` - Strict static type checking + - `rhiza_codeql.yml` - CodeQL security scanning + - `rhiza_security.yml` - pip-audit + bandit + - `rhiza_deptry.yml` - Dependency hygiene + - `rhiza_pre-commit.yml` - Hook validation + - `rhiza_release.yml` - Multi-phase release pipeline + - `rhiza_benchmarks.yml` - Performance regression detection + - `rhiza_book.yml` - Documentation + GitHub Pages + - `rhiza_docker.yml` - Container building + - `rhiza_devcontainer.yml` - Dev container validation + - `rhiza_marimo.yml` - Notebook validation + - `rhiza_sync.yml` - Template synchronization + - `rhiza_validate.yml` - Structure validation +- OIDC trusted publishing (no stored PyPI credentials) +- Dynamic Python version matrix from pyproject.toml +- Minimal permissions model +- fail-fast: false for complete test coverage + +**Weaknesses**: +- No manual approval gates for publishing +- GitLab CI exists but not actively maintained + +--- + +### 5. Security: 9/10 + +**Strengths**: +- Comprehensive SECURITY.md with vulnerability reporting process +- Response SLAs defined (48h acknowledgment, 7d assessment, 30d resolution) +- Multiple security scanners: + - CodeQL for semantic analysis + - Bandit for Python security patterns + - pip-audit for dependency vulnerabilities + - actionlint with shellcheck for workflow/script validation +- OIDC trusted publishing (no stored credentials) +- SLSA provenance attestations +- Locked dependencies via uv.lock (707 lines) +- Renovate for automated security updates + +**Weaknesses**: +- No SBOM generation in release workflow +- No container image scanning for devcontainer +- Some bandit rules disabled in tests (S101, S603) + +--- + +### 6. Architecture: 9/10 + +**Strengths**: +- Modular Makefile system (.rhiza/rhiza.mk + .rhiza/make.d/*.mk) +- Extension hooks (pre-install, post-install, pre-release, etc.) +- Clear separation of concerns: + - Core config in .rhiza/ + - Source in src/hello/ + - Tests in tests/test_rhiza/ + - Docs in book/ and docs/ + - Workflows in .github/workflows/ +- Configuration as code (pyproject.toml, ruff.toml, pytest.ini) +- Minimal root Makefile (4 lines) delegating to .rhiza/rhiza.mk +- Reusable Python utilities with proper exception handling + +**Weaknesses**: +- Mixed paradigms (Bash, Python, Make, YAML) +- Deep directory nesting in some areas + +--- + +### 7. Dependency Management: 10/10 + +**Strengths**: +- uv.lock file (707 lines) ensuring reproducible builds +- Modern uv package manager +- Zero production dependencies (template system only) +- Isolated dev dependencies with strict version bounds: + - marimo>=0.18.0,<1.0 + - numpy>=2.4.0,<3.0 + - plotly>=6.5.0,<7.0 + - pandas>=2.3.3,<3.0 +- Deptry integration for dependency hygiene +- Renovate automation for updates (pep621, pre-commit, github-actions, dockerfile) +- Lock file committed for reproducibility +- Python version specified in .python-version and pyproject.toml + +**Weaknesses**: +- Renovate only checks weekly (Tuesdays) +- Limited documentation of version choice rationale + +--- + +### 8. Developer Experience: 9/10 + +**Strengths**: +- 40+ Makefile targets with auto-generated help +- Single entry point: `make install` and `make help` +- .editorconfig for cross-IDE consistency +- 10 pre-commit hooks for local validation +- GitHub Codespaces support with .devcontainer +- Colored output in scripts (BLUE, RED, YELLOW) +- Dry-run support in release.sh +- Quick start guide in README +- UV auto-installation via `make install-uv` + +**Weaknesses**: +- Learning curve for .rhiza/make.d/ extension system +- Multiple tools to understand (uv, make, git) +- No VSCode extension or IntelliJ plugin + +--- + +### 9. Maintainability: 9/10 + +**Strengths**: +- Descriptive naming (version_matrix.py, check_workflow_names.py) +- Custom exception classes (RhizaError, VersionSpecifierError, PyProjectError) +- Consistent Google-style docstrings with Args, Returns, Raises +- Well-structured release.sh with helper functions +- Active maintenance (recent commits within days) +- Semantic commit messages with PR references +- Configuration-driven behavior via template.yml and pyproject.toml +- POSIX-compliant shell scripts validated with shellcheck + +**Weaknesses**: +- Few TODO comments for roadmap visibility +- release.sh has complex bash logic + +--- + +### 10. Shell Scripts: 9/10 + +**Strengths**: +- POSIX compliance with `set -eu` (fail on error, undefined vars) +- Proper error handling with meaningful messages +- Comprehensive help output with usage examples +- Shellcheck validation via actionlint workflow +- Dry-run support for safe testing +- Colored output for warnings/errors/info +- Proper variable scoping with local prefixes +- User prompts with confirmation flows +- Git status validation before releases + +**Weaknesses**: +- Limited inline comments for complex logic +- Some cryptic variable names due to POSIX constraints +- Errors cause immediate exit vs. recovery options + +--- + +## Improvement Recommendations + +### High Priority + +| Improvement | Impact | Effort | +|-------------|--------|--------| +| Add SBOM generation to release workflow | Supply chain transparency | Medium | +| Container image scanning for devcontainer | Security completeness | Low | +| Manual approval gate for PyPI publishing | Release safety | Low | + +### Medium Priority + +| Improvement | Impact | Effort | +|-------------|--------|--------| +| Property-based testing with hypothesis | Test coverage depth | Medium | +| More inline comments in shell scripts | Maintainability | Low | +| External documentation hosting | Discoverability | Medium | + +### Low Priority + +| Improvement | Impact | Effort | +|-------------|--------|--------| +| VSCode extension documentation | DX improvement | Low | +| More frequent Renovate schedule | Freshness | Low | +| Document dependency version rationale | Clarity | Low | + +--- + +## Conclusion + +Rhiza demonstrates **enterprise-grade engineering** with particular excellence in: + +1. **Automation**: 14 CI/CD workflows, 40+ make targets, pre-commit hooks +2. **Testing**: Comprehensive suite with innovative techniques (README testing, mock git repos) +3. **Security**: Multi-layer protection with OIDC, CodeQL, bandit, pip-audit +4. **Dependency Management**: Zero runtime deps, locked builds, automated updates +5. **Developer Experience**: Unified Makefile interface, sensible defaults, Codespaces support + +The repository serves as an exemplary template for Python projects, demonstrating how to balance standardization with extensibility through its living template architecture. + +**Verdict**: Production-ready, suitable for enterprise adoption as a project template foundation. diff --git a/tests/test_rhiza/test_version_matrix.py b/tests/test_rhiza/test_version_matrix.py index 6c7bbdc2..c7347c95 100644 --- a/tests/test_rhiza/test_version_matrix.py +++ b/tests/test_rhiza/test_version_matrix.py @@ -1,13 +1,15 @@ """Tests for version_matrix.py utility. Tests cover version parsing, specifier validation, and edge cases -for malformed inputs. +for malformed inputs. Includes property-based tests using hypothesis. """ import sys from pathlib import Path import pytest +from hypothesis import given, settings +from hypothesis import strategies as st # Add the utils directory to the path for imports sys.path.insert(0, str(Path(__file__).parent.parent.parent / ".rhiza" / "utils")) @@ -262,3 +264,158 @@ def test_specifier_with_multiple_commas(self): """Handle multiple constraints.""" assert satisfies("3.12", ">=3.11,<3.14,!=3.13") is True assert satisfies("3.13", ">=3.11,<3.14,!=3.13") is False + + +# ============================================================================= +# Property-based tests using Hypothesis +# ============================================================================= + +# Custom strategies for generating version-like data +version_component = st.integers(min_value=0, max_value=999) +version_tuple = st.tuples(version_component, version_component).map(lambda t: t) | st.tuples( + version_component, version_component, version_component +).map(lambda t: t) + + +def tuple_to_version_str(t: tuple[int, ...]) -> str: + """Convert a version tuple to a version string.""" + return ".".join(str(x) for x in t) + + +# Strategy for valid version strings (2 or 3 components) +valid_version_str = version_tuple.map(tuple_to_version_str) + + +class TestParseVersionProperties: + """Property-based tests for parse_version function.""" + + @given(components=st.lists(version_component, min_size=1, max_size=5)) + def test_output_length_equals_component_count(self, components: list[int]): + """Parsing a valid version string produces a tuple with the same number of components.""" + version_str = ".".join(str(c) for c in components) + result = parse_version(version_str) + assert len(result) == len(components) + + @given(components=st.lists(version_component, min_size=1, max_size=5)) + def test_all_output_elements_are_non_negative_integers(self, components: list[int]): + """All elements in the parsed tuple are non-negative integers.""" + version_str = ".".join(str(c) for c in components) + result = parse_version(version_str) + assert all(isinstance(x, int) and x >= 0 for x in result) + + @given(components=st.lists(version_component, min_size=1, max_size=5)) + def test_roundtrip_preserves_values(self, components: list[int]): + """Parsing a version string and converting back gives the same values.""" + version_str = ".".join(str(c) for c in components) + result = parse_version(version_str) + # The values should match (leading zeros are stripped by int()) + assert result == tuple(components) + + @given(components=st.lists(version_component, min_size=1, max_size=5)) + def test_parsing_is_idempotent(self, components: list[int]): + """Parsing, converting to string, and parsing again gives the same result.""" + version_str = ".".join(str(c) for c in components) + first_parse = parse_version(version_str) + reconstructed = ".".join(str(x) for x in first_parse) + second_parse = parse_version(reconstructed) + assert first_parse == second_parse + + @given( + components=st.lists(version_component, min_size=1, max_size=3), + suffix=st.sampled_from(["", "rc1", "a1", "b2", "alpha", "beta", "dev1"]), + ) + def test_suffix_is_stripped_from_last_component(self, components: list[int], suffix: str): + """Suffixes on the last component are stripped, keeping the numeric prefix.""" + parts = [str(c) for c in components] + parts[-1] = parts[-1] + suffix # Add suffix to last component + version_str = ".".join(parts) + result = parse_version(version_str) + # The numeric values should be preserved + assert result == tuple(components) + + @given(garbage=st.text(alphabet="abcdefghijklmnopqrstuvwxyz", min_size=1, max_size=10)) + def test_non_numeric_prefix_raises_error(self, garbage: str): + """Version components without numeric prefix raise VersionSpecifierError.""" + with pytest.raises(VersionSpecifierError): + parse_version(garbage) + + +class TestSatisfiesProperties: + """Property-based tests for satisfies function.""" + + @given(v=valid_version_str) + def test_reflexivity_equality(self, v: str): + """A version always satisfies equality with itself.""" + assert satisfies(v, f"=={v}") is True + + @given(v=valid_version_str) + def test_reflexivity_greater_or_equal(self, v: str): + """A version always satisfies >= itself.""" + assert satisfies(v, f">={v}") is True + + @given(v=valid_version_str) + def test_reflexivity_less_or_equal(self, v: str): + """A version always satisfies <= itself.""" + assert satisfies(v, f"<={v}") is True + + @given(v=valid_version_str) + def test_strict_inequality_never_satisfied_by_self(self, v: str): + """A version never satisfies strict inequality with itself.""" + assert satisfies(v, f">{v}") is False + assert satisfies(v, f"<{v}") is False + + @given(v=valid_version_str) + def test_not_equal_to_self_is_false(self, v: str): + """A version never satisfies != itself.""" + assert satisfies(v, f"!={v}") is False + + @given(v=valid_version_str, s=valid_version_str) + def test_greater_equal_opposite_of_less_than(self, v: str, s: str): + """V >= s is equivalent to not (v < s).""" + assert satisfies(v, f">={s}") == (not satisfies(v, f"<{s}")) + + @given(v=valid_version_str, s=valid_version_str) + def test_less_equal_opposite_of_greater_than(self, v: str, s: str): + """V <= s is equivalent to not (v > s).""" + assert satisfies(v, f"<={s}") == (not satisfies(v, f">{s}")) + + @given(v=valid_version_str, s=valid_version_str) + def test_equal_opposite_of_not_equal(self, v: str, s: str): + """V == s is equivalent to not (v != s).""" + assert satisfies(v, f"=={s}") == (not satisfies(v, f"!={s}")) + + @given(v=valid_version_str, s=valid_version_str) + def test_trichotomy(self, v: str, s: str): + """Exactly one of <, ==, > holds for any two versions.""" + lt = satisfies(v, f"<{s}") + eq = satisfies(v, f"=={s}") + gt = satisfies(v, f">{s}") + assert sum([lt, eq, gt]) == 1 + + @given(v=valid_version_str, s1=valid_version_str, s2=valid_version_str) + def test_conjunction_of_constraints(self, v: str, s1: str, s2: str): + """Comma-separated constraints are a conjunction (AND).""" + combined = satisfies(v, f">={s1},<={s2}") + separate = satisfies(v, f">={s1}") and satisfies(v, f"<={s2}") + assert combined == separate + + @given( + major=st.integers(min_value=0, max_value=99), + minor=st.integers(min_value=0, max_value=99), + ) + def test_ordering_consistency(self, major: int, minor: int): + """If v1 < v2, then satisfies reflects this ordering correctly.""" + v1 = f"{major}.{minor}" + v2 = f"{major}.{minor + 1}" + # v1 < v2 should always hold + assert satisfies(v1, f"<{v2}") is True + assert satisfies(v2, f">{v1}") is True + assert satisfies(v1, f"<={v2}") is True + assert satisfies(v2, f">={v1}") is True + + @given(v=valid_version_str, s=valid_version_str) + @settings(max_examples=50) + def test_whitespace_tolerance(self, v: str, s: str): + """Whitespace around operators doesn't affect the result.""" + assert satisfies(v, f">={s}") == satisfies(v, f">= {s}") + assert satisfies(v, f"<={s}") == satisfies(v, f"<= {s}")