Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion .github/workflows/pr_code_changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,32 @@ jobs:
fail_ci_if_error: false
verbose: true
- name: Test documentation builds
run: make documentation
run: make documentation

Pandas-Compatibility:
name: Pandas ${{ matrix.pandas-version }} Compatibility
runs-on: ubuntu-latest
strategy:
matrix:
pandas-version: ["2", "3"]
fail-fast: false
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Install package
run: uv pip install -e .[dev] --system
- name: Install pandas ${{ matrix.pandas-version }}
run: |
if [ "${{ matrix.pandas-version }}" = "2" ]; then
uv pip install "pandas>=2,<3" --system
else
uv pip install "pandas>=3,<4" --system
fi
- name: Run pandas compatibility tests
run: pytest policyengine_uk/tests/core/test_pandas3_compatibility.py -v
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: patch
changes:
added:
- Added pandas 2 and 3 CI compatibility testing to ensure both major versions work.
147 changes: 147 additions & 0 deletions policyengine_uk/tests/core/test_pandas3_compatibility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
"""
Tests for pandas 3.0.0 compatibility in policyengine-uk.

These tests verify that policyengine-uk works correctly with pandas 3.0.0,
which introduces PyArrow-backed strings as default (StringDtype).

These tests will FAIL if policyengine-core < 3.9.1 is used, which doesn't
have the pandas 3 compatibility fixes.
"""

import numpy as np

from policyengine_uk import Simulation


class TestRegionParameterLookupWithPandas3:
"""
Test that region-based parameter lookup works with pandas 3 StringArray.

In pandas 3, string columns use StringDtype by default. When looking up
region-specific parameters using vectorial indexing, the region codes
may be StringArray instead of numpy array.

policyengine-core >= 3.9.1 converts StringArray to numpy before lookup.
"""

def test_region_parameter_lookup(self):
"""
Test that region-based parameter lookup works for multiple regions.

This exercises the VectorialParameterNodeAtInstant.__getitem__ fix
that converts pandas StringArray to numpy array.
"""
# Create a simulation with households in different regions
sim = Simulation(
situation={
"people": {
"person1": {
"age": {"2024": 30},
"employment_income": {"2024": 30000},
},
"person2": {
"age": {"2024": 40},
"employment_income": {"2024": 50000},
},
},
"benunits": {
"benunit1": {
"members": ["person1"],
},
"benunit2": {
"members": ["person2"],
},
},
"households": {
"household1": {
"members": ["person1"],
"region": {"2024": "LONDON"},
},
"household2": {
"members": ["person2"],
"region": {"2024": "SCOTLAND"},
},
},
}
)

# Calculate income_tax which uses region-based rates (Scotland
# has different income tax rates). This exercises vectorial
# parameter lookup with string arrays.
# If pandas 3 StringArray handling is broken, this would raise:
# TypeError: unhashable type: 'StringArray'
result = sim.calculate("income_tax", "2024")

# Basic sanity check - should return an array with values
assert isinstance(result, np.ndarray)
assert len(result) == 2 # Two people
assert np.all(result >= 0)


class TestFilledArrayWithStringDtype:
"""
Test that population.filled_array works with pandas StringDtype.

In pandas 3, numpy.full() cannot handle StringDtype. policyengine-core
>= 3.9.1 converts StringDtype to object dtype before calling numpy.full().
"""

def test_string_variable_default_value(self):
"""
Test that string-typed variables work correctly.

Variables with value_type=str use filled_array with a string dtype.
In pandas 3, this would fail with:
TypeError: Cannot interpret '<StringDtype>' as a data type
"""
# Create a simple simulation
sim = Simulation(
situation={
"people": {
"person1": {"age": {"2024": 30}},
},
"households": {
"household1": {
"members": ["person1"],
},
},
}
)

# region is a string/enum variable - calculating it exercises filled_array
result = sim.calculate("region", "2024")

# Should return valid results without error
assert len(result) == 1


class TestEnumVariableWithPandas3:
"""
Test that Enum variables work correctly with pandas 3.

Enum variables involve string-based parameter lookups which can
trigger the StringArray issue in pandas 3.
"""

def test_tenure_type_enum(self):
"""
Test that tenure_type enum works correctly.
"""
sim = Simulation(
situation={
"people": {
"person1": {"age": {"2024": 30}},
},
"households": {
"household1": {
"members": ["person1"],
},
},
}
)

# tenure_type is an enum variable
result = sim.calculate("tenure_type", "2024")

# Should return valid results
assert len(result) == 1