From 1f2b78744f8ef51532ee07a96c1f1f2764dca3c5 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 4 Feb 2026 02:48:03 +0300 Subject: [PATCH 1/5] Add place-level region support and remove city region type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add support for place/ prefix regions with format place/{STATE_ABBREV}-{PLACE_FIPS} (e.g., place/NJ-57000 for Newark, NJ) - Remove city/ prefix and city region type (previously only supported NYC) - Update _validate_us_region to validate place codes: - Validates state abbreviation - Validates 5-digit FIPS code format - Update normalize_us_region to pass through place/ prefix - Remove city/nyc from metadata region list - Update all related tests Fixes #3194 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- changelog_entry.yaml | 6 +++ policyengine_api/constants.py | 4 +- policyengine_api/country.py | 1 - .../data/congressional_districts.py | 11 ++---- policyengine_api/services/economy_service.py | 24 +++++++++--- tests/fixtures/services/economy_service.py | 11 +++--- .../unit/data/test_congressional_districts.py | 8 ++-- tests/unit/services/test_economy_service.py | 39 ++++++++++++++----- tests/unit/services/test_metadata_service.py | 3 +- tests/unit/test_constants.py | 8 ++-- 10 files changed, 74 insertions(+), 41 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..e64db84b5 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,6 @@ +- bump: minor + changes: + added: + - Add place-level region support for US Census places with format place/{STATE_ABBREV}-{PLACE_FIPS} + removed: + - Remove city region type (city/nyc) in favor of place regions diff --git a/policyengine_api/constants.py b/policyengine_api/constants.py index a9f13df5c..a8437314b 100644 --- a/policyengine_api/constants.py +++ b/policyengine_api/constants.py @@ -30,7 +30,7 @@ US_REGION_TYPES = ( "national", # National level (e.g., "us") "state", # US states (e.g., "state/ca", "state/ny") - "city", # US cities (e.g., "city/nyc") + "place", # US Census places (e.g., "place/NJ-57000") "congressional_district", # US congressional districts (e.g., "congressional_district/CA-37") ) @@ -46,7 +46,7 @@ REGION_PREFIXES = { "us": [ "state/", # US states (e.g., "state/ca", "state/ny") - "city/", # US cities (e.g., "city/nyc") + "place/", # US Census places (e.g., "place/NJ-57000") "congressional_district/", # US congressional districts (e.g., "congressional_district/CA-37") ], "uk": [ diff --git a/policyengine_api/country.py b/policyengine_api/country.py index 29f64fbbe..680280167 100644 --- a/policyengine_api/country.py +++ b/policyengine_api/country.py @@ -157,7 +157,6 @@ def build_microsimulation_options(self) -> dict: dict(name="state/nj", label="New Jersey", type="state"), dict(name="state/nm", label="New Mexico", type="state"), dict(name="state/ny", label="New York", type="state"), - dict(name="city/nyc", label="New York City", type="city"), dict(name="state/nc", label="North Carolina", type="state"), dict(name="state/nd", label="North Dakota", type="state"), dict(name="state/oh", label="Ohio", type="state"), diff --git a/policyengine_api/data/congressional_districts.py b/policyengine_api/data/congressional_districts.py index 8c52c4e64..2e728991b 100644 --- a/policyengine_api/data/congressional_districts.py +++ b/policyengine_api/data/congressional_districts.py @@ -730,12 +730,13 @@ def normalize_us_region(region: str) -> str: Args: region: A region string that may be in legacy or standard format. - Examples: "ca", "state/ca", "nyc", "city/nyc", + Examples: "ca", "state/ca", "place/NJ-57000", "congressional_district/CA-01", "us" Returns: The normalized region string with appropriate prefix. - Examples: "state/ca", "city/nyc", "congressional_district/CA-01", "us" + Examples: "state/ca", "place/NJ-57000", + "congressional_district/CA-01", "us" Note: This function does NOT validate that the region is valid - it only @@ -744,7 +745,7 @@ def normalize_us_region(region: str) -> str: # Already has a valid prefix - return as-is if ( region.startswith("state/") - or region.startswith("city/") + or region.startswith("place/") or region.startswith("congressional_district/") ): return region @@ -753,10 +754,6 @@ def normalize_us_region(region: str) -> str: if region == "us": return region - # Legacy NYC format - if region == "nyc": - return "city/nyc" - # Legacy bare state code (e.g., "ca", "tx", "NY") # Check if it's a valid state code before adding prefix if region.lower() in get_valid_state_codes(): diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index d22a36eff..f42bedbf4 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -520,11 +520,25 @@ def _validate_us_region(self, region: str) -> None: state_code = region[len("state/") :] if state_code.lower() not in get_valid_state_codes(): raise ValueError(f"Invalid US state: '{state_code}'") - elif region.startswith("city/"): - # Currently only NYC is supported - city_code = region[len("city/") :] - if city_code != "nyc": - raise ValueError(f"Invalid US city: '{city_code}'") + elif region.startswith("place/"): + # Place format: place/{STATE_ABBREV}-{PLACE_FIPS} + # e.g., place/NJ-57000 for Newark, NJ + place_code = region[len("place/") :] + if "-" not in place_code: + raise ValueError( + f"Invalid place format: '{place_code}'. " + "Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)" + ) + state_abbrev, place_fips = place_code.split("-", 1) + if state_abbrev.lower() not in get_valid_state_codes(): + raise ValueError( + f"Invalid state in place code: '{state_abbrev}'" + ) + if not place_fips.isdigit() or len(place_fips) != 5: + raise ValueError( + f"Invalid FIPS code in place: '{place_fips}'. " + "Expected 5-digit FIPS code" + ) elif region.startswith("congressional_district/"): district_id = region[len("congressional_district/") :] if district_id.lower() not in get_valid_congressional_districts(): diff --git a/tests/fixtures/services/economy_service.py b/tests/fixtures/services/economy_service.py index 96ee736fa..9ea1ca24a 100644 --- a/tests/fixtures/services/economy_service.py +++ b/tests/fixtures/services/economy_service.py @@ -233,9 +233,7 @@ def mock_simulation_api_modal(): MOCK_US_NATIONWIDE_DATASET = "gs://policyengine-us-data/cps_2023.h5" MOCK_US_STATE_CA_DATASET = "gs://policyengine-us-data/states/CA.h5" MOCK_US_STATE_UT_DATASET = "gs://policyengine-us-data/states/UT.h5" -MOCK_US_CITY_NYC_DATASET = ( - "gs://policyengine-us-data/pooled_3_year_cps_2023.h5" -) +MOCK_US_PLACE_NJ_57000_DATASET = "gs://policyengine-us-data/states/NJ.h5" MOCK_US_DISTRICT_CA37_DATASET = "gs://policyengine-us-data/districts/CA-37.h5" MOCK_UK_DATASET = "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5" @@ -251,8 +249,11 @@ def mock_get_default_dataset_fn(country: str, region: str | None) -> str: return MOCK_US_STATE_CA_DATASET elif region == "state/ut": return MOCK_US_STATE_UT_DATASET - elif region == "city/nyc": - return MOCK_US_CITY_NYC_DATASET + elif region.startswith("place/"): + # Place uses parent state's dataset + place_code = region.split("/")[1] + state_abbrev = place_code.split("-")[0].upper() + return f"gs://policyengine-us-data/states/{state_abbrev}.h5" elif region == "congressional_district/CA-37": return MOCK_US_DISTRICT_CA37_DATASET elif region.startswith("state/"): diff --git a/tests/unit/data/test_congressional_districts.py b/tests/unit/data/test_congressional_districts.py index 05819916a..91b415705 100644 --- a/tests/unit/data/test_congressional_districts.py +++ b/tests/unit/data/test_congressional_districts.py @@ -359,8 +359,9 @@ def test__prefixed_state_unchanged(self): assert normalize_us_region("state/ca") == "state/ca" assert normalize_us_region("state/TX") == "state/TX" - def test__prefixed_city_unchanged(self): - assert normalize_us_region("city/nyc") == "city/nyc" + def test__prefixed_place_unchanged(self): + assert normalize_us_region("place/NJ-57000") == "place/NJ-57000" + assert normalize_us_region("place/ca-44000") == "place/ca-44000" def test__prefixed_congressional_district_unchanged(self): assert ( @@ -372,9 +373,6 @@ def test__prefixed_congressional_district_unchanged(self): == "congressional_district/tx-14" ) - def test__legacy_nyc_converted(self): - assert normalize_us_region("nyc") == "city/nyc" - def test__legacy_state_code_lowercase_converted(self): assert normalize_us_region("ca") == "state/ca" assert normalize_us_region("tx") == "state/tx" diff --git a/tests/unit/services/test_economy_service.py b/tests/unit/services/test_economy_service.py index 4cb010ce6..49c0fe39b 100644 --- a/tests/unit/services/test_economy_service.py +++ b/tests/unit/services/test_economy_service.py @@ -942,11 +942,32 @@ def test__given_invalid_bare_value__raises_value_error(self): service._setup_region("us", "invalid_value") assert "Invalid US region: 'invalid_value'" in str(exc_info.value) - def test__given_city_nyc__returns_unchanged(self): - # Test normalized "city/nyc" format passes through + def test__given_place_region__returns_unchanged(self): + # Test normalized "place/STATE-FIPS" format passes through service = EconomyService() - result = service._setup_region("us", "city/nyc") - assert result == "city/nyc" + result = service._setup_region("us", "place/NJ-57000") + assert result == "place/NJ-57000" + + def test__given_invalid_place_format__raises_value_error(self): + # Test place without hyphen raises error + service = EconomyService() + with pytest.raises(ValueError) as exc_info: + service._setup_region("us", "place/invalid") + assert "Invalid place format" in str(exc_info.value) + + def test__given_invalid_place_state__raises_value_error(self): + # Test place with invalid state code raises error + service = EconomyService() + with pytest.raises(ValueError) as exc_info: + service._setup_region("us", "place/XX-57000") + assert "Invalid state in place code" in str(exc_info.value) + + def test__given_invalid_place_fips__raises_value_error(self): + # Test place with invalid FIPS code raises error + service = EconomyService() + with pytest.raises(ValueError) as exc_info: + service._setup_region("us", "place/NJ-abc") + assert "Invalid FIPS code" in str(exc_info.value) class TestSetupData: """Tests for _setup_data method. @@ -955,13 +976,11 @@ class TestSetupData: to return GCS paths for all region types (not None). """ - def test__given_us_city_nyc__returns_pooled_cps(self): - # Test with normalized city/nyc format + def test__given_us_place__returns_state_dataset(self): + # Test with place region - uses parent state's dataset service = EconomyService() - result = service._setup_data("us", "city/nyc") - assert ( - result == "gs://policyengine-us-data/pooled_3_year_cps_2023.h5" - ) + result = service._setup_data("us", "place/NJ-57000") + assert result == "gs://policyengine-us-data/states/NJ.h5" def test__given_us_state_ca__returns_state_dataset(self): # Test with US state - returns state-specific dataset diff --git a/tests/unit/services/test_metadata_service.py b/tests/unit/services/test_metadata_service.py index 70ea9262e..40c6805de 100644 --- a/tests/unit/services/test_metadata_service.py +++ b/tests/unit/services/test_metadata_service.py @@ -55,7 +55,6 @@ def test_get_metadata_empty_country_id(self): "state/ny", "state/tx", "state/fl", - "city/nyc", ], ), ("ca", 3, ["ca"]), @@ -124,7 +123,7 @@ def test_verify_metadata_for_given_country( "country_id, expected_types", [ ("uk", ["national", "country", "constituency", "local_authority"]), - ("us", ["national", "state", "city", "congressional_district"]), + ("us", ["national", "state", "place", "congressional_district"]), ], ) def test_verify_region_types_for_given_country( diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py index 439d5a239..f83d964ca 100644 --- a/tests/unit/test_constants.py +++ b/tests/unit/test_constants.py @@ -35,8 +35,8 @@ def test__contains_national(self): def test__contains_state(self): assert "state" in US_REGION_TYPES - def test__contains_city(self): - assert "city" in US_REGION_TYPES + def test__contains_place(self): + assert "place" in US_REGION_TYPES def test__contains_congressional_district(self): assert "congressional_district" in US_REGION_TYPES @@ -75,8 +75,8 @@ def test__us_key_exists(self): def test__contains_state_prefix(self): assert "state/" in REGION_PREFIXES["us"] - def test__contains_city_prefix(self): - assert "city/" in REGION_PREFIXES["us"] + def test__contains_place_prefix(self): + assert "place/" in REGION_PREFIXES["us"] def test__contains_congressional_district_prefix(self): assert "congressional_district/" in REGION_PREFIXES["us"] From 01df9ffe432f51f7f3fce062a4360f881f31d18c Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 4 Feb 2026 02:58:01 +0300 Subject: [PATCH 2/5] Refactor: Extract place code parsing into reusable helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add parse_place_code() to extract state and FIPS from place codes - Add validate_place_code() for reusable place code validation - Update _validate_us_region to use the new helpers - Add comprehensive unit tests for both helper functions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../data/congressional_districts.py | 43 +++++++++++++ policyengine_api/services/economy_service.py | 19 +----- .../unit/data/test_congressional_districts.py | 61 +++++++++++++++++++ 3 files changed, 106 insertions(+), 17 deletions(-) diff --git a/policyengine_api/data/congressional_districts.py b/policyengine_api/data/congressional_districts.py index 2e728991b..dc1432174 100644 --- a/policyengine_api/data/congressional_districts.py +++ b/policyengine_api/data/congressional_districts.py @@ -719,6 +719,49 @@ def get_valid_congressional_districts() -> set[str]: } +def parse_place_code(place_code: str) -> tuple[str, str]: + """ + Parse a place code into its state abbreviation and FIPS components. + + Args: + place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") + + Returns: + Tuple of (state_abbrev, place_fips) + + Raises: + ValueError: If the place code format is invalid + """ + if "-" not in place_code: + raise ValueError( + f"Invalid place format: '{place_code}'. " + "Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)" + ) + return place_code.split("-", 1) + + +def validate_place_code(place_code: str) -> None: + """ + Validate a place code has valid state abbreviation and FIPS format. + + Args: + place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") + + Raises: + ValueError: If the state abbreviation or FIPS code is invalid + """ + state_abbrev, place_fips = parse_place_code(place_code) + + if state_abbrev.lower() not in get_valid_state_codes(): + raise ValueError(f"Invalid state in place code: '{state_abbrev}'") + + if not place_fips.isdigit() or len(place_fips) != 5: + raise ValueError( + f"Invalid FIPS code in place: '{place_fips}'. " + "Expected 5-digit FIPS code" + ) + + def normalize_us_region(region: str) -> str: """ Normalize a US region string to the standard prefixed format. diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index f42bedbf4..bc8eea4b1 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -16,6 +16,7 @@ get_valid_state_codes, get_valid_congressional_districts, normalize_us_region, + validate_place_code, ) from policyengine.simulation import SimulationOptions from policyengine.utils.data.datasets import get_default_dataset @@ -521,24 +522,8 @@ def _validate_us_region(self, region: str) -> None: if state_code.lower() not in get_valid_state_codes(): raise ValueError(f"Invalid US state: '{state_code}'") elif region.startswith("place/"): - # Place format: place/{STATE_ABBREV}-{PLACE_FIPS} - # e.g., place/NJ-57000 for Newark, NJ place_code = region[len("place/") :] - if "-" not in place_code: - raise ValueError( - f"Invalid place format: '{place_code}'. " - "Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)" - ) - state_abbrev, place_fips = place_code.split("-", 1) - if state_abbrev.lower() not in get_valid_state_codes(): - raise ValueError( - f"Invalid state in place code: '{state_abbrev}'" - ) - if not place_fips.isdigit() or len(place_fips) != 5: - raise ValueError( - f"Invalid FIPS code in place: '{place_fips}'. " - "Expected 5-digit FIPS code" - ) + validate_place_code(place_code) elif region.startswith("congressional_district/"): district_id = region[len("congressional_district/") :] if district_id.lower() not in get_valid_congressional_districts(): diff --git a/tests/unit/data/test_congressional_districts.py b/tests/unit/data/test_congressional_districts.py index 91b415705..88eb3a4cd 100644 --- a/tests/unit/data/test_congressional_districts.py +++ b/tests/unit/data/test_congressional_districts.py @@ -9,6 +9,8 @@ build_congressional_district_metadata, get_valid_state_codes, get_valid_congressional_districts, + parse_place_code, + validate_place_code, normalize_us_region, ) @@ -390,3 +392,62 @@ def test__unknown_region_returned_unchanged(self): # Unknown regions are returned as-is for validation to catch assert normalize_us_region("invalid") == "invalid" assert normalize_us_region("mb") == "mb" # Manitoba (Canadian) + + +class TestParsePlaceCode: + """Tests for the parse_place_code function.""" + + def test__given_valid_place_code__returns_tuple(self): + state, fips = parse_place_code("NJ-57000") + assert state == "NJ" + assert fips == "57000" + + def test__given_lowercase_place_code__returns_tuple(self): + state, fips = parse_place_code("ca-44000") + assert state == "ca" + assert fips == "44000" + + def test__given_no_hyphen__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + parse_place_code("NJ57000") + assert "Invalid place format" in str(exc_info.value) + + def test__given_empty_string__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + parse_place_code("") + assert "Invalid place format" in str(exc_info.value) + + +class TestValidatePlaceCode: + """Tests for the validate_place_code function.""" + + def test__given_valid_place_code__no_error(self): + # Should not raise + validate_place_code("NJ-57000") + validate_place_code("ca-44000") + validate_place_code("TX-35000") + + def test__given_invalid_state__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("XX-57000") + assert "Invalid state in place code" in str(exc_info.value) + + def test__given_non_digit_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-abcde") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_short_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-5700") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_long_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-570001") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_no_hyphen__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ57000") + assert "Invalid place format" in str(exc_info.value) From 54df1f1e2152b24c923a7c98af01c5c643fe1bb4 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 4 Feb 2026 03:02:16 +0300 Subject: [PATCH 3/5] Move place helpers to dedicated places.py module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create policyengine_api/data/places.py with parse_place_code and validate_place_code - Remove place functions from congressional_districts.py - Update economy_service.py import to use new module - Create tests/unit/data/test_places.py with dedicated tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../data/congressional_districts.py | 43 ------------ policyengine_api/data/places.py | 51 +++++++++++++++ policyengine_api/services/economy_service.py | 2 +- .../unit/data/test_congressional_districts.py | 61 ----------------- tests/unit/data/test_places.py | 65 +++++++++++++++++++ 5 files changed, 117 insertions(+), 105 deletions(-) create mode 100644 policyengine_api/data/places.py create mode 100644 tests/unit/data/test_places.py diff --git a/policyengine_api/data/congressional_districts.py b/policyengine_api/data/congressional_districts.py index dc1432174..2e728991b 100644 --- a/policyengine_api/data/congressional_districts.py +++ b/policyengine_api/data/congressional_districts.py @@ -719,49 +719,6 @@ def get_valid_congressional_districts() -> set[str]: } -def parse_place_code(place_code: str) -> tuple[str, str]: - """ - Parse a place code into its state abbreviation and FIPS components. - - Args: - place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") - - Returns: - Tuple of (state_abbrev, place_fips) - - Raises: - ValueError: If the place code format is invalid - """ - if "-" not in place_code: - raise ValueError( - f"Invalid place format: '{place_code}'. " - "Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)" - ) - return place_code.split("-", 1) - - -def validate_place_code(place_code: str) -> None: - """ - Validate a place code has valid state abbreviation and FIPS format. - - Args: - place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") - - Raises: - ValueError: If the state abbreviation or FIPS code is invalid - """ - state_abbrev, place_fips = parse_place_code(place_code) - - if state_abbrev.lower() not in get_valid_state_codes(): - raise ValueError(f"Invalid state in place code: '{state_abbrev}'") - - if not place_fips.isdigit() or len(place_fips) != 5: - raise ValueError( - f"Invalid FIPS code in place: '{place_fips}'. " - "Expected 5-digit FIPS code" - ) - - def normalize_us_region(region: str) -> str: """ Normalize a US region string to the standard prefixed format. diff --git a/policyengine_api/data/places.py b/policyengine_api/data/places.py new file mode 100644 index 000000000..e588489fe --- /dev/null +++ b/policyengine_api/data/places.py @@ -0,0 +1,51 @@ +""" +US Census place code parsing and validation utilities. + +Place codes follow the format: STATE_ABBREV-PLACE_FIPS +Example: NJ-57000 for Newark, NJ +""" + +from policyengine_api.data.congressional_districts import get_valid_state_codes + + +def parse_place_code(place_code: str) -> tuple[str, str]: + """ + Parse a place code into its state abbreviation and FIPS components. + + Args: + place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") + + Returns: + Tuple of (state_abbrev, place_fips) + + Raises: + ValueError: If the place code format is invalid + """ + if "-" not in place_code: + raise ValueError( + f"Invalid place format: '{place_code}'. " + "Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)" + ) + return place_code.split("-", 1) + + +def validate_place_code(place_code: str) -> None: + """ + Validate a place code has valid state abbreviation and FIPS format. + + Args: + place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") + + Raises: + ValueError: If the state abbreviation or FIPS code is invalid + """ + state_abbrev, place_fips = parse_place_code(place_code) + + if state_abbrev.lower() not in get_valid_state_codes(): + raise ValueError(f"Invalid state in place code: '{state_abbrev}'") + + if not place_fips.isdigit() or len(place_fips) != 5: + raise ValueError( + f"Invalid FIPS code in place: '{place_fips}'. " + "Expected 5-digit FIPS code" + ) diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index bc8eea4b1..6f7ab5ab1 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -16,8 +16,8 @@ get_valid_state_codes, get_valid_congressional_districts, normalize_us_region, - validate_place_code, ) +from policyengine_api.data.places import validate_place_code from policyengine.simulation import SimulationOptions from policyengine.utils.data.datasets import get_default_dataset import json diff --git a/tests/unit/data/test_congressional_districts.py b/tests/unit/data/test_congressional_districts.py index 88eb3a4cd..91b415705 100644 --- a/tests/unit/data/test_congressional_districts.py +++ b/tests/unit/data/test_congressional_districts.py @@ -9,8 +9,6 @@ build_congressional_district_metadata, get_valid_state_codes, get_valid_congressional_districts, - parse_place_code, - validate_place_code, normalize_us_region, ) @@ -392,62 +390,3 @@ def test__unknown_region_returned_unchanged(self): # Unknown regions are returned as-is for validation to catch assert normalize_us_region("invalid") == "invalid" assert normalize_us_region("mb") == "mb" # Manitoba (Canadian) - - -class TestParsePlaceCode: - """Tests for the parse_place_code function.""" - - def test__given_valid_place_code__returns_tuple(self): - state, fips = parse_place_code("NJ-57000") - assert state == "NJ" - assert fips == "57000" - - def test__given_lowercase_place_code__returns_tuple(self): - state, fips = parse_place_code("ca-44000") - assert state == "ca" - assert fips == "44000" - - def test__given_no_hyphen__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - parse_place_code("NJ57000") - assert "Invalid place format" in str(exc_info.value) - - def test__given_empty_string__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - parse_place_code("") - assert "Invalid place format" in str(exc_info.value) - - -class TestValidatePlaceCode: - """Tests for the validate_place_code function.""" - - def test__given_valid_place_code__no_error(self): - # Should not raise - validate_place_code("NJ-57000") - validate_place_code("ca-44000") - validate_place_code("TX-35000") - - def test__given_invalid_state__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - validate_place_code("XX-57000") - assert "Invalid state in place code" in str(exc_info.value) - - def test__given_non_digit_fips__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - validate_place_code("NJ-abcde") - assert "Invalid FIPS code" in str(exc_info.value) - - def test__given_short_fips__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - validate_place_code("NJ-5700") - assert "Invalid FIPS code" in str(exc_info.value) - - def test__given_long_fips__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - validate_place_code("NJ-570001") - assert "Invalid FIPS code" in str(exc_info.value) - - def test__given_no_hyphen__raises_value_error(self): - with pytest.raises(ValueError) as exc_info: - validate_place_code("NJ57000") - assert "Invalid place format" in str(exc_info.value) diff --git a/tests/unit/data/test_places.py b/tests/unit/data/test_places.py new file mode 100644 index 000000000..fc73e7e56 --- /dev/null +++ b/tests/unit/data/test_places.py @@ -0,0 +1,65 @@ +import pytest + +from policyengine_api.data.places import ( + parse_place_code, + validate_place_code, +) + + +class TestParsePlaceCode: + """Tests for the parse_place_code function.""" + + def test__given_valid_place_code__returns_tuple(self): + state, fips = parse_place_code("NJ-57000") + assert state == "NJ" + assert fips == "57000" + + def test__given_lowercase_place_code__returns_tuple(self): + state, fips = parse_place_code("ca-44000") + assert state == "ca" + assert fips == "44000" + + def test__given_no_hyphen__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + parse_place_code("NJ57000") + assert "Invalid place format" in str(exc_info.value) + + def test__given_empty_string__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + parse_place_code("") + assert "Invalid place format" in str(exc_info.value) + + +class TestValidatePlaceCode: + """Tests for the validate_place_code function.""" + + def test__given_valid_place_code__no_error(self): + # Should not raise + validate_place_code("NJ-57000") + validate_place_code("ca-44000") + validate_place_code("TX-35000") + + def test__given_invalid_state__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("XX-57000") + assert "Invalid state in place code" in str(exc_info.value) + + def test__given_non_digit_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-abcde") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_short_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-5700") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_long_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-570001") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_no_hyphen__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ57000") + assert "Invalid place format" in str(exc_info.value) From a1109cf6502b4365a5cc6c99b3f409e4436d97d8 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 4 Feb 2026 03:16:37 +0300 Subject: [PATCH 4/5] Fix: Handle place regions in _setup_data by using parent state's dataset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Place regions use their parent state's dataset for simulation. Extract the state abbreviation from the place code and use that to get the dataset path from get_default_dataset. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- policyengine_api/services/economy_service.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index 6f7ab5ab1..65ea69846 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -17,7 +17,7 @@ get_valid_congressional_districts, normalize_us_region, ) -from policyengine_api.data.places import validate_place_code +from policyengine_api.data.places import parse_place_code, validate_place_code from policyengine.simulation import SimulationOptions from policyengine.utils.data.datasets import get_default_dataset import json @@ -549,13 +549,22 @@ def _setup_data( If the dataset is in PASSTHROUGH_DATASETS, it will be passed directly to the simulation API. Otherwise, uses policyengine's get_default_dataset to resolve the appropriate GCS path. + + For place regions, uses the parent state's dataset. """ # If the dataset is a recognized passthrough keyword, use it directly if dataset in self.PASSTHROUGH_DATASETS: return dataset + # Place regions use their parent state's dataset + dataset_region = region + if region.startswith("place/"): + place_code = region[len("place/") :] + state_abbrev, _ = parse_place_code(place_code) + dataset_region = f"state/{state_abbrev}" + try: - return get_default_dataset(country_id, region) + return get_default_dataset(country_id, dataset_region) except ValueError as e: logger.log_struct( { From f2cd180bf849df6ad4c645311527494c9c79970b Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 4 Feb 2026 03:18:41 +0300 Subject: [PATCH 5/5] Revert "Fix: Handle place regions in _setup_data by using parent state's dataset" This reverts commit a1109cf6502b4365a5cc6c99b3f409e4436d97d8. --- policyengine_api/services/economy_service.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index 65ea69846..6f7ab5ab1 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -17,7 +17,7 @@ get_valid_congressional_districts, normalize_us_region, ) -from policyengine_api.data.places import parse_place_code, validate_place_code +from policyengine_api.data.places import validate_place_code from policyengine.simulation import SimulationOptions from policyengine.utils.data.datasets import get_default_dataset import json @@ -549,22 +549,13 @@ def _setup_data( If the dataset is in PASSTHROUGH_DATASETS, it will be passed directly to the simulation API. Otherwise, uses policyengine's get_default_dataset to resolve the appropriate GCS path. - - For place regions, uses the parent state's dataset. """ # If the dataset is a recognized passthrough keyword, use it directly if dataset in self.PASSTHROUGH_DATASETS: return dataset - # Place regions use their parent state's dataset - dataset_region = region - if region.startswith("place/"): - place_code = region[len("place/") :] - state_abbrev, _ = parse_place_code(place_code) - dataset_region = f"state/{state_abbrev}" - try: - return get_default_dataset(country_id, dataset_region) + return get_default_dataset(country_id, region) except ValueError as e: logger.log_struct( {