diff --git a/Makefile b/Makefile index dbc3b434..f0976333 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,69 @@ -.PHONY: default uv-sync check test-all test docformat doctest mypy reset-baseline-schemas +.PHONY: default uv-sync check test-all test docformat doctest mypy reset-baseline-schemas \ + publish publish-all publish-core publish-system publish-annex publish-themes publish-cli publish-meta default: test-all +# Publish all packages in dependency order +publish-all: publish-core publish-system publish-annex publish-themes publish-cli publish-meta + @echo "All packages published successfully!" + +# Level 0: No internal dependencies +publish-core: + @echo "Publishing overture-schema-core..." + uv build --package overture-schema-core + uv publish --index overture dist/overture_schema_core-*.whl dist/overture_schema_core-*.tar.gz + @rm -rf dist/overture_schema_core-* + +publish-system: + @echo "Publishing overture-schema-system..." + uv build --package overture-schema-system + uv publish --index overture dist/overture_schema_system-*.whl dist/overture_schema_system-*.tar.gz + @rm -rf dist/overture_schema_system-* + +# Level 1: Depends on core +publish-annex: publish-core + @echo "Publishing overture-schema-annex..." + uv build --package overture-schema-annex + uv publish --index overture dist/overture_schema_annex-*.whl dist/overture_schema_annex-*.tar.gz + @rm -rf dist/overture_schema_annex-* + +publish-themes: publish-core + @echo "Publishing theme packages..." + uv build --package overture-schema-addresses-theme + uv publish --index overture dist/overture_schema_addresses_theme-*.whl dist/overture_schema_addresses_theme-*.tar.gz + @rm -rf dist/overture_schema_addresses_theme-* + uv build --package overture-schema-base-theme + uv publish --index overture dist/overture_schema_base_theme-*.whl dist/overture_schema_base_theme-*.tar.gz + @rm -rf dist/overture_schema_base_theme-* + uv build --package overture-schema-buildings-theme + uv publish --index overture dist/overture_schema_buildings_theme-*.whl dist/overture_schema_buildings_theme-*.tar.gz + @rm -rf dist/overture_schema_buildings_theme-* + uv build --package overture-schema-divisions-theme + uv publish --index overture dist/overture_schema_divisions_theme-*.whl dist/overture_schema_divisions_theme-*.tar.gz + @rm -rf dist/overture_schema_divisions_theme-* + uv build --package overture-schema-places-theme + uv publish --index overture dist/overture_schema_places_theme-*.whl dist/overture_schema_places_theme-*.tar.gz + @rm -rf dist/overture_schema_places_theme-* + uv build --package overture-schema-transportation-theme + uv publish --index overture dist/overture_schema_transportation_theme-*.whl dist/overture_schema_transportation_theme-*.tar.gz + @rm -rf dist/overture_schema_transportation_theme-* + +publish-cli: publish-core + @echo "Publishing overture-schema-cli..." + uv build --package overture-schema-cli + uv publish --index overture dist/overture_schema_cli-*.whl dist/overture_schema_cli-*.tar.gz + @rm -rf dist/overture_schema_cli-* + +# Level 2: Meta-package depends on all others +publish-meta: publish-themes publish-cli + @echo "Publishing overture-schema (meta-package)..." + uv build --package overture-schema + uv publish --index overture dist/overture_schema-*.whl dist/overture_schema-*.tar.gz + @rm -rf dist/overture_schema-* + +# Convenience alias +publish: publish-all + uv-sync: @uv sync --all-packages 2> /dev/null diff --git a/packages/overture-schema-cli/pyproject.toml b/packages/overture-schema-cli/pyproject.toml index 62ed4c3e..a2c07785 100644 --- a/packages/overture-schema-cli/pyproject.toml +++ b/packages/overture-schema-cli/pyproject.toml @@ -21,11 +21,15 @@ overture-schema-core = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] +[project.optional-dependencies] +parquet = ["pyarrow>=14.0"] + [dependency-groups] dev = [ "pytest>=7.0", "ruff", "mypy", + "pyarrow>=14.0", ] [tool.hatch.version] diff --git a/packages/overture-schema-cli/src/overture/schema/cli/arrow_schema.py b/packages/overture-schema-cli/src/overture/schema/cli/arrow_schema.py new file mode 100644 index 00000000..168baa9d --- /dev/null +++ b/packages/overture-schema-cli/src/overture/schema/cli/arrow_schema.py @@ -0,0 +1,625 @@ +""" +Pydantic to PyArrow schema conversion and comparison for Overture models. + +This module provides functions to convert Pydantic models to PyArrow schemas, +enabling generation of empty Parquet files with correct schema definitions, +and to compare Arrow schemas for compatibility checking. +""" + +from __future__ import annotations + +from enum import Enum +from types import NoneType, UnionType +from typing import TYPE_CHECKING, Annotated, Any, Union, get_args, get_origin + +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .format_adapters import FieldDiff, SchemaDiff + +if TYPE_CHECKING: + import pyarrow as pa + +# Re-export for backwards compatibility +__all__ = ["FieldDiff", "SchemaDiff", "compare_schemas", "pydantic_model_to_arrow_schema"] + + +_PRIMITIVE_TYPES: set[str] = { + "int8", "int16", "int32", "int64", + "uint8", "uint16", "uint32", + "float32", "float64", +} + + +def _is_newtype(tp: Any) -> bool: + """Check if a type is a NewType.""" + return callable(tp) and hasattr(tp, "__supertype__") + + +def _get_newtype_name(tp: Any) -> str | None: + """Get the name of a NewType, or None if not a NewType.""" + if _is_newtype(tp): + return getattr(tp, "__name__", None) + return None + + +def _unwrap_annotated(tp: Any) -> tuple[Any, list[Any]]: + """ + Unwrap Annotated type, returning base type and collected metadata. + + Parameters + ---------- + tp : Any + A type annotation, possibly Annotated[T, ...] + + Returns + ------- + tuple[Any, list[Any]] + Tuple of (base_type, metadata_list) + """ + metadata: list[Any] = [] + while get_origin(tp) is Annotated: + args = get_args(tp) + tp = args[0] + metadata.extend(args[1:]) + return tp, metadata + + +def _unwrap_optional(tp: Any) -> tuple[bool, Any]: + """ + Check if type is T | None (Optional), return (is_optional, inner_type). + + Parameters + ---------- + tp : Any + A type annotation + + Returns + ------- + tuple[bool, Any] + Tuple of (is_optional, inner_type_or_original) + """ + origin = get_origin(tp) + if origin is Union or origin is UnionType: + args = get_args(tp) + non_none = [a for a in args if a is not NoneType and a is not type(None)] + if len(non_none) == 1 and len(args) == 2: + return True, non_none[0] + return False, tp + + +def _is_pydantic_missing(tp: Any) -> bool: + """Check if type involves Pydantic's MISSING sentinel (Omitable).""" + try: + from pydantic.experimental.missing_sentinel import MISSING + + origin = get_origin(tp) + if origin is Union or origin is UnionType: + args = get_args(tp) + return any(a is type(MISSING) or a is MISSING for a in args) + except ImportError: + pass + return False + + +def _unwrap_missing(tp: Any) -> tuple[bool, Any]: + """ + Unwrap Omitable[T] which is Annotated[T | MISSING, Field(default=MISSING)]. + + Returns (is_omitable, inner_type). + """ + # First unwrap Annotated + inner, _ = _unwrap_annotated(tp) + + # Check for MISSING in union + if _is_pydantic_missing(inner): + try: + from pydantic.experimental.missing_sentinel import MISSING + + origin = get_origin(inner) + if origin is Union or origin is UnionType: + args = get_args(inner) + non_missing = [ + a for a in args if a is not type(MISSING) and a is not MISSING + ] + if len(non_missing) == 1: + return True, non_missing[0] + except ImportError: + pass + return False, tp + + +def pydantic_to_arrow_type( + tp: Any, + field_info: FieldInfo | None = None, +) -> "pa.DataType": + """ + Convert a Python/Pydantic type annotation to a PyArrow data type. + + Parameters + ---------- + tp : Any + A Python type annotation (may include Annotated, Union, etc.) + field_info : FieldInfo | None + Optional Pydantic field info for additional context + + Returns + ------- + pa.DataType + The corresponding PyArrow data type + """ + import pyarrow as pa + + # Import Overture types for comparison + from overture.schema.system.primitive import BBox, Geometry + + # Unwrap Annotated to get base type + tp, _metadata = _unwrap_annotated(tp) + + # Handle Omitable[T] (T | MISSING) + is_omitable, inner = _unwrap_missing(tp) + if is_omitable: + tp = inner + + # Handle Optional[T] (T | None) + is_optional, inner = _unwrap_optional(tp) + if is_optional: + tp = inner + # Unwrap again in case of Annotated[T, ...] | None + tp, _ = _unwrap_annotated(tp) + + # Check for NewType primitives (int8, int32, float64, etc.) + newtype_name = _get_newtype_name(tp) + if newtype_name and newtype_name in _PRIMITIVE_TYPES: + return getattr(pa, newtype_name)() + + # If it's a NewType but not a known primitive, unwrap it + if _is_newtype(tp): + tp = tp.__supertype__ + # Recurse with the unwrapped type + return pydantic_to_arrow_type(tp, field_info) + + # Geometry -> binary (WKB encoding) + if tp is Geometry or (isinstance(tp, type) and issubclass(tp, Geometry)): + return pa.binary() + + # BBox -> struct + if tp is BBox or (isinstance(tp, type) and issubclass(tp, BBox)): + return pa.struct( + [ + pa.field("xmin", pa.float32()), + pa.field("ymin", pa.float32()), + pa.field("xmax", pa.float32()), + pa.field("ymax", pa.float32()), + ] + ) + + # String enums -> utf8 + if isinstance(tp, type) and issubclass(tp, Enum): + if issubclass(tp, str): + return pa.utf8() + # Non-string enums: use the value type + return pa.utf8() # Default to string representation + + # Basic Python types + if tp is str: + return pa.utf8() + if tp is int: + return pa.int64() + if tp is float: + return pa.float64() + if tp is bool: + return pa.bool_() + if tp is bytes: + return pa.binary() + + # Handle list[T] + origin = get_origin(tp) + if origin is list: + args = get_args(tp) + if args: + element_type = pydantic_to_arrow_type(args[0]) + return pa.list_(element_type) + return pa.list_(pa.utf8()) # Fallback for untyped list + + # Handle dict[K, V] -> map + if origin is dict: + args = get_args(tp) + if len(args) == 2: + key_type = pydantic_to_arrow_type(args[0]) + value_type = pydantic_to_arrow_type(args[1]) + return pa.map_(key_type, value_type) + return pa.map_(pa.utf8(), pa.utf8()) # Fallback + + # Handle Pydantic BaseModel -> struct + if isinstance(tp, type) and issubclass(tp, BaseModel): + return _model_to_struct_type(tp) + + # Fallback: string + return pa.utf8() + + +def _model_to_struct_type(model: type[BaseModel]) -> "pa.DataType": + """ + Convert a Pydantic model to a PyArrow struct type. + + Parameters + ---------- + model : type[BaseModel] + A Pydantic model class + + Returns + ------- + pa.DataType + A PyArrow struct type with fields matching the model + """ + import pyarrow as pa + + fields: list[pa.Field] = [] + + for field_name, field_info in model.model_fields.items(): + annotation = field_info.annotation + if annotation is None: + continue + + # Determine nullability + nullable = _is_field_nullable(field_info) + + # Get the arrow type + arrow_type = pydantic_to_arrow_type(annotation, field_info) + + # Get field metadata (description, etc.) + metadata = _get_field_metadata(field_info) + + # Handle aliased fields (e.g., class_ -> class) + output_name = field_info.alias if field_info.alias else field_name + + fields.append(pa.field(output_name, arrow_type, nullable=nullable, metadata=metadata)) + + return pa.struct(fields) + + +def _is_field_nullable(field_info: FieldInfo) -> bool: + """Determine if a Pydantic field should be nullable in Arrow.""" + # Check if field has a default (making it optional) + if not field_info.is_required(): + return True + + # Check if annotation includes None or MISSING + annotation = field_info.annotation + if annotation is None: + return True + + # Unwrap and check for optional/omitable + is_omitable, _ = _unwrap_missing(annotation) + if is_omitable: + return True + + inner, _ = _unwrap_annotated(annotation) + is_optional, _ = _unwrap_optional(inner) + return is_optional + + +def _get_field_metadata(field_info: FieldInfo) -> dict[bytes, bytes] | None: + """ + Extract metadata from Pydantic FieldInfo for Arrow field metadata. + + Parameters + ---------- + field_info : FieldInfo + Pydantic field info + + Returns + ------- + dict[bytes, bytes] | None + Arrow-compatible metadata dict, or None if no metadata + """ + metadata: dict[str, str] = {} + + if field_info.description: + metadata["description"] = field_info.description + + if field_info.title: + metadata["title"] = field_info.title + + if not metadata: + return None + + # Convert to bytes for Arrow + return {k.encode(): v.encode() for k, v in metadata.items()} + + +def _extract_model_from_union(tp: Any) -> type[BaseModel] | None: + """ + If the type is a Union of BaseModels, extract the first one. + + Returns None if the type is not a Union or doesn't contain BaseModels. + """ + # Already a model class + if isinstance(tp, type) and issubclass(tp, BaseModel): + return tp + + # Unwrap Annotated + inner, _ = _unwrap_annotated(tp) + + origin = get_origin(inner) + if origin is Union or origin is UnionType: + args = get_args(inner) + for arg in args: + # Unwrap Annotated from union members + unwrapped, _ = _unwrap_annotated(arg) + if isinstance(unwrapped, type) and issubclass(unwrapped, BaseModel): + return unwrapped + + return None + + +def pydantic_model_to_arrow_schema( + model: type[BaseModel] | Any, + include_version_metadata: bool = True, +) -> "pa.Schema": + """ + Convert a Pydantic model class to a PyArrow schema. + + Parameters + ---------- + model : type[BaseModel] | Any + A Pydantic model class, or a Union type containing models + include_version_metadata : bool + Whether to include schema version in metadata + + Returns + ------- + pa.Schema + A PyArrow schema with fields matching the model + + Raises + ------ + TypeError + If the model is a Union type that cannot be converted to a single schema + """ + import pyarrow as pa + + # Handle Union types (like Segment = RoadSegment | RailSegment | WaterSegment) + if not (isinstance(model, type) and issubclass(model, BaseModel)): + extracted = _extract_model_from_union(model) + if extracted is not None: + model = extracted + else: + raise TypeError( + f"Cannot generate Parquet schema for Union type. " + f"The type '{model}' is a union of multiple models. " + f"Please specify a more specific type." + ) + + fields: list[pa.Field] = [] + + for field_name, field_info in model.model_fields.items(): + annotation = field_info.annotation + if annotation is None: + continue + + # Determine nullability + nullable = _is_field_nullable(field_info) + + # Get the arrow type + arrow_type = pydantic_to_arrow_type(annotation, field_info) + + # Get field metadata + metadata = _get_field_metadata(field_info) + + # Handle aliased fields + output_name = field_info.alias if field_info.alias else field_name + + fields.append(pa.field(output_name, arrow_type, nullable=nullable, metadata=metadata)) + + # Build schema metadata + schema_metadata: dict[bytes, bytes] = {} + + if include_version_metadata: + try: + from overture.schema.cli.__about__ import __version__ + + schema_metadata[b"overture_schema_version"] = __version__.encode() + except ImportError: + pass + + schema_metadata[b"model_name"] = model.__name__.encode() + if model.__module__: + schema_metadata[b"model_module"] = model.__module__.encode() + + return pa.schema(fields, metadata=schema_metadata if schema_metadata else None) + + +# --------------------------------------------------------------------------- +# Schema comparison +# --------------------------------------------------------------------------- + + +def _describe_type(arrow_type: "pa.DataType") -> str: + """Return a concise, human-readable description of an Arrow data type.""" + import pyarrow as pa + + if isinstance(arrow_type, pa.StructType): + return f"struct<{arrow_type.num_fields} fields>" + if isinstance(arrow_type, pa.ListType): + return f"list<{_describe_type(arrow_type.value_type)}>" + if isinstance(arrow_type, pa.MapType): + return f"map<{_describe_type(arrow_type.key_type)}, {_describe_type(arrow_type.item_type)}>" + return str(arrow_type) + + +def _compare_types( + expected_type: "pa.DataType", + actual_type: "pa.DataType", + path: str, + expected_nullable: bool, + actual_nullable: bool, +) -> list[FieldDiff]: + """Recursively compare two Arrow data types, returning all differences.""" + import pyarrow as pa + + diffs: list[FieldDiff] = [] + + # Required in expected but nullable in actual is a problem + if not expected_nullable and actual_nullable: + diffs.append(FieldDiff( + path=path, + kind="nullability", + expected="non-nullable (required)", + actual="nullable", + )) + + # Both structs: compare children recursively + if isinstance(expected_type, pa.StructType) and isinstance(actual_type, pa.StructType): + actual_children: dict[str, pa.Field] = {} + for i in range(actual_type.num_fields): + f = actual_type.field(i) + actual_children[f.name] = f + + for i in range(expected_type.num_fields): + ef = expected_type.field(i) + child_path = f"{path}.{ef.name}" + if ef.name not in actual_children: + diffs.append(FieldDiff( + path=child_path, + kind="missing", + expected=_describe_type(ef.type), + )) + else: + af = actual_children[ef.name] + diffs.extend(_compare_types( + ef.type, af.type, child_path, ef.nullable, af.nullable, + )) + + # Extra children within structs + expected_child_names = { + expected_type.field(i).name for i in range(expected_type.num_fields) + } + for name, af in actual_children.items(): + if name not in expected_child_names: + diffs.append(FieldDiff( + path=f"{path}.{name}", + kind="extra", + actual=_describe_type(af.type), + )) + + return diffs + + # Both lists: compare element types + if isinstance(expected_type, pa.ListType) and isinstance(actual_type, pa.ListType): + diffs.extend(_compare_types( + expected_type.value_type, + actual_type.value_type, + f"{path}.item", + expected_type.value_field.nullable, + actual_type.value_field.nullable, + )) + return diffs + + # Both maps: compare key and value types + if isinstance(expected_type, pa.MapType) and isinstance(actual_type, pa.MapType): + diffs.extend(_compare_types( + expected_type.key_type, + actual_type.key_type, + f"{path}.key", + expected_type.key_field.nullable, + actual_type.key_field.nullable, + )) + diffs.extend(_compare_types( + expected_type.item_type, + actual_type.item_type, + f"{path}.value", + expected_type.item_field.nullable, + actual_type.item_field.nullable, + )) + return diffs + + # Primitive / category-mismatch comparison + if expected_type != actual_type: + diffs.append(FieldDiff( + path=path, + kind="type_mismatch", + expected=_describe_type(expected_type), + actual=_describe_type(actual_type), + )) + + return diffs + + +def compare_schemas( + expected: "pa.Schema", + actual: "pa.Schema", + *, + ignore_fields: set[str] | None = None, +) -> SchemaDiff: + """Compare an expected Arrow schema against an actual (file) schema. + + Parameters + ---------- + expected : pa.Schema + The schema generated from the Pydantic model. + actual : pa.Schema + The schema read from a Parquet file. + ignore_fields : set[str] | None + Top-level field names to skip entirely during comparison. + + Returns + ------- + SchemaDiff + Complete diff result. + """ + ignore = ignore_fields or set() + missing: list[FieldDiff] = [] + extra: list[FieldDiff] = [] + type_mismatches: list[FieldDiff] = [] + nullability_issues: list[FieldDiff] = [] + + actual_fields_by_name = {f.name: f for f in actual} + + for expected_field in expected: + name = expected_field.name + if name in ignore: + continue + if name not in actual_fields_by_name: + missing.append(FieldDiff( + path=name, + kind="missing", + expected=_describe_type(expected_field.type), + )) + continue + + actual_field = actual_fields_by_name[name] + for d in _compare_types( + expected_field.type, + actual_field.type, + path=name, + expected_nullable=expected_field.nullable, + actual_nullable=actual_field.nullable, + ): + if d.kind == "nullability": + nullability_issues.append(d) + elif d.kind == "missing": + missing.append(d) + elif d.kind == "extra": + extra.append(d) + else: + type_mismatches.append(d) + + expected_field_names = set(expected.names) + for actual_field in actual: + if actual_field.name in ignore: + continue + if actual_field.name not in expected_field_names: + extra.append(FieldDiff( + path=actual_field.name, + kind="extra", + actual=_describe_type(actual_field.type), + )) + + return SchemaDiff( + missing_fields=missing, + extra_fields=extra, + type_mismatches=type_mismatches, + nullability_issues=nullability_issues, + ) diff --git a/packages/overture-schema-cli/src/overture/schema/cli/commands.py b/packages/overture-schema-cli/src/overture/schema/cli/commands.py index 8fdd8bdf..dbd59a14 100644 --- a/packages/overture-schema-cli/src/overture/schema/cli/commands.py +++ b/packages/overture-schema-cli/src/overture/schema/cli/commands.py @@ -767,6 +767,367 @@ def json_schema_command( raise click.UsageError(str(e)) from e +def resolve_single_type( + namespace: str | None, + theme_name: str | None, + type_name: str, +) -> type[BaseModel]: + """Resolve CLI options to a single model type. + + Args + ---- + namespace: Namespace to filter by (e.g., "overture", "annex") + theme_name: Theme name (e.g., "buildings") + type_name: Type name (e.g., "building") + + Returns + ------- + Single Pydantic model class + + Raises + ------ + ValueError: If no model or multiple models match + """ + all_models = discover_models(namespace=namespace) + + matching = [ + (key, model) + for key, model in all_models.items() + if key.type == type_name and (theme_name is None or key.theme == theme_name) + ] + + if not matching: + msg = f"No model found for type '{type_name}'" + if theme_name: + msg += f" in theme '{theme_name}'" + raise ValueError(msg) + + if len(matching) > 1: + themes = [k.theme for k, _ in matching] + raise ValueError( + f"Multiple models found for type '{type_name}': {themes}. " + "Specify --theme to disambiguate." + ) + + return matching[0][1] + + +@cli.command("parquet-schema") +@click.option( + "--theme", + help="Theme to generate schema for (e.g., buildings, places)", +) +@click.option( + "--type", + "type_name", + required=True, + help="Specific type to generate schema for (e.g., building, segment)", +) +@click.option( + "--namespace", + help="Namespace to filter by (e.g., overture, annex)", +) +@click.option( + "--format", + "output_format", + type=click.Choice(["parquet", "text"]), + default="parquet", + help="Output format: 'parquet' for empty .parquet file, 'text' for schema description", +) +@click.option( + "--output", + "-o", + type=click.Path(path_type=Path), + help="Output file path (required for parquet format)", +) +def parquet_schema_command( + theme: str | None, + type_name: str, + namespace: str | None, + output_format: str, + output: Path | None, +) -> None: + r"""Generate Parquet schema for an Overture Maps type. + + Outputs an empty Parquet file or text schema representation that can be used + to compare against existing Parquet files or for documentation purposes. + + Requires pyarrow. Install with: pip install overture-schema-cli[parquet] + + \b + Examples: + # Generate empty Parquet file for building type + $ overture-schema parquet-schema --theme buildings --type building -o building.parquet + \b + # Print schema as text + $ overture-schema parquet-schema --theme buildings --type building --format text + \b + # Transportation segment + $ overture-schema parquet-schema --theme transportation --type segment -o segment.parquet + """ + # Check for pyarrow availability + try: + import pyarrow as pa + import pyarrow.parquet as pq + except ImportError: + raise click.UsageError( + "pyarrow is required for this command. " + "Install with: pip install overture-schema-cli[parquet]" + ) from None + + # Import conversion module + from .arrow_schema import pydantic_model_to_arrow_schema + + # Resolve to single model + try: + model_class = resolve_single_type(namespace, theme, type_name) + except ValueError as e: + raise click.UsageError(str(e)) from e + + # Convert to Arrow schema + arrow_schema = pydantic_model_to_arrow_schema(model_class) + + # Output based on format + if output_format == "text": + # Print schema to stdout + print(arrow_schema.to_string()) + if output: + with open(output, "w") as f: + f.write(arrow_schema.to_string()) + stdout.print(f"Schema written to {output}") + else: # parquet format + if not output: + raise click.UsageError("--output is required when using parquet format") + + # Write empty Parquet file with schema (preserve nullability) + empty_table = pa.table( + {field.name: pa.array([], type=field.type) for field in arrow_schema}, + schema=arrow_schema, + ) + pq.write_table(empty_table, output) + stdout.print(f"Wrote Parquet schema to {output}") + + +@cli.command("validate-schema") +@click.argument("filename") +@click.option( + "--theme", + help="Theme to check against (e.g., buildings, places)", +) +@click.option( + "--type", + "type_name", + required=True, + help="Specific type to check against (e.g., building, segment)", +) +@click.option( + "--namespace", + help="Namespace to filter by (e.g., overture, annex)", +) +@click.option( + "--strict", + is_flag=True, + default=False, + help="Require exact match: no extra fields allowed", +) +@click.option( + "--ignore", + "ignore_fields", + multiple=True, + help="Field name to skip during comparison (repeatable)", +) +@click.option( + "--skip-check", + "skip_checks", + multiple=True, + type=click.Choice(["missing", "extra", "type-mismatch", "nullability"]), + help="Difference category to exclude from pass/fail (repeatable)", +) +@click.option( + "-o", "--output", + "output", + type=click.Path(path_type=Path), + default=None, + help="Write diff to a file (.csv or .parquet)", +) +def validate_schema_command( + filename: str, + theme: str | None, + type_name: str, + namespace: str | None, + strict: bool, + ignore_fields: tuple[str, ...], + skip_checks: tuple[str, ...], + output: Path | None, +) -> None: + r"""Check whether a Parquet file's schema matches an Overture type. + + FILENAME can be a local path or a remote URI (s3://, gs://, etc.). + Reads schema metadata from FILENAME (no row data loaded) and compares it + against the expected Arrow schema generated from the specified type. + + By default, performs a subset check: the file must have all expected fields + with compatible types. Extra columns in the file are allowed. + + With --strict, requires an exact match with no extra or missing fields. + + Use --ignore to skip specific fields (e.g., fields added later in your + pipeline). Use --skip-check to exclude entire difference categories from + the pass/fail decision (they are still printed). + + Requires pyarrow. Install with: pip install overture-schema-cli[parquet] + + \b + Examples: + # Check a building Parquet file (subset mode) + $ overture-schema validate-schema buildings.parquet --theme buildings --type building + \b + # Check a file from S3 + $ overture-schema validate-schema s3://bucket/path/to/file.parquet --type building + \b + # Strict check (no extra fields allowed) + $ overture-schema validate-schema data.parquet --type place --strict + \b + # Skip version and bbox fields + $ overture-schema validate-schema data.parquet --type building --ignore version --ignore bbox + \b + # Check everything except nullability differences + $ overture-schema validate-schema data.parquet --type division --skip-check nullability + """ + from .format_adapters import FormatValidator + + # Get validator for file format + try: + validator = FormatValidator.for_file(filename) + except ValueError as e: + raise click.UsageError(str(e)) from e + + # Resolve to single model + try: + model_class = resolve_single_type(namespace, theme, type_name) + except ValueError as e: + raise click.UsageError(str(e)) from e + + # Validate file against model + try: + diff = validator.validate(filename, model_class, ignore_fields=set(ignore_fields)) + except ImportError: + raise click.UsageError( + "pyarrow is required for this command. " + "Install with: pip install overture-schema-cli[parquet]" + ) from None + except Exception as e: + raise click.UsageError(f"Failed to read file '{filename}': {e}") from e + skipped = set(skip_checks) + ok = diff.passed(strict=strict, skip=skipped) + + # Output + _print_schema_diff(diff, strict=strict, skip=skipped, filename=filename, type_name=type_name) + + if output is not None: + _write_diff(diff, output) + + if not ok: + sys.exit(1) + + +def _write_diff(diff: "SchemaDiff", output: Path) -> None: # noqa: F821 + """Write schema diff rows to a CSV or Parquet file.""" + rows = diff.to_rows() + suffix = output.suffix.lower() + + if suffix == ".csv": + import csv + + with open(output, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["path", "kind", "expected", "actual"]) + writer.writeheader() + writer.writerows(rows) + stdout.print(f"Wrote {len(rows)} diff(s) to {output}") + + elif suffix == ".parquet": + import pyarrow as pa + import pyarrow.parquet as pq + + table = pa.table({ + "path": [r["path"] for r in rows], + "kind": [r["kind"] for r in rows], + "expected": [r["expected"] for r in rows], + "actual": [r["actual"] for r in rows], + }) + pq.write_table(table, output) + stdout.print(f"Wrote {len(rows)} diff(s) to {output}") + + else: + raise click.UsageError( + f"Unsupported output format '{suffix}'. Use .csv or .parquet" + ) + + +def _print_schema_diff( + diff: "SchemaDiff", # noqa: F821 + *, + strict: bool, + skip: set[str] | None = None, + filename: Path, + type_name: str, +) -> None: + """Print a human-readable schema diff.""" + skip = skip or set() + ok = diff.passed(strict=strict, skip=skip) + mode = "strict" if strict else "subset" + + if ok: + stdout.print(f"SUCCESS, schema of '{filename}' matches type '{type_name}' ({mode} check)") + if diff.extra_fields and not strict: + stdout.print( + f" [dim]{len(diff.extra_fields)} extra field(s) in file " + f"(OK in subset mode)[/dim]" + ) + return + + out = stderr + + out.print(f"FAILURE, schema mismatch: '{filename}' vs type '{type_name}'") + out.print() + + if diff.missing_fields: + skipped = " [dim](skipped)[/dim]" if "missing" in skip else "" + out.print(f"[bold red]Missing fields ({len(diff.missing_fields)}):{skipped}[/bold red]") + for f in diff.missing_fields: + out.print(f" [red]- {f.path}[/red] (expected: {f.expected})") + out.print() + + if diff.type_mismatches: + skipped = " [dim](skipped)[/dim]" if "type-mismatch" in skip else "" + out.print( + f"[bold yellow]Type mismatches ({len(diff.type_mismatches)}):{skipped}[/bold yellow]" + ) + for f in diff.type_mismatches: + out.print(f" [yellow]~ {f.path}[/yellow]") + out.print(f" expected: {f.expected}") + out.print(f" actual: {f.actual}") + out.print() + + if diff.nullability_issues: + skipped = " [dim](skipped)[/dim]" if "nullability" in skip else "" + out.print( + f"[bold yellow]Nullability issues ({len(diff.nullability_issues)}):{skipped}[/bold yellow]" + ) + for f in diff.nullability_issues: + out.print(f" [yellow]~ {f.path}[/yellow]") + out.print(f" expected: {f.expected}") + out.print(f" actual: {f.actual}") + out.print() + + if strict and diff.extra_fields: + skipped = " [dim](skipped)[/dim]" if "extra" in skip else "" + out.print(f"[bold blue]Extra fields ({len(diff.extra_fields)}):{skipped}[/bold blue]") + for f in diff.extra_fields: + out.print(f" [blue]+ {f.path}[/blue] (type: {f.actual})") + out.print() + + def dump_namespace( theme_types: dict[str | None, list[tuple[ModelKey, type[BaseModel]]]], ) -> None: @@ -809,7 +1170,18 @@ def dump_namespace( @cli.command("list-types") -def list_types() -> None: +@click.option( + "--theme", + multiple=True, + help="Filter by theme (e.g., buildings, transportation). Can be repeated.", + default=(), +) +@click.option( + "--simple", + is_flag=True, + help="Simple output: just list type names without descriptions", +) +def list_types(theme: tuple[str, ...], simple: bool) -> None: r"""List all available types grouped by theme with descriptions. Displays all registered Overture Maps types organized by theme, @@ -819,15 +1191,26 @@ def list_types() -> None: Examples: # List all types $ overture-schema list-types + \b + # List only buildings types + $ overture-schema list-types --theme buildings + \b + # List buildings and places, simple output + $ overture-schema list-types --theme buildings --theme places --simple """ try: models = discover_models() + theme_filter = set(theme) if theme else None # Group models by namespace and theme namespaces: dict[ str, dict[str | None, list[tuple[ModelKey, type[BaseModel]]]] ] = {} for key, model_class in models.items(): + # Filter by theme if specified + if theme_filter and key.theme not in theme_filter: + continue + if key.namespace not in namespaces: namespaces[key.namespace] = {} if key.theme not in namespaces[key.namespace]: @@ -837,19 +1220,29 @@ def list_types() -> None: # display Overture themes first if "overture" in namespaces: - stdout.print("[bold red]OVERTURE THEMES[/bold red]", justify="center") - stdout.print() - dump_namespace(namespaces["overture"]) + if simple: + for theme in sorted(namespaces["overture"].keys(), key=lambda x: (x is None, x)): + for key, _ in sorted(namespaces["overture"][theme], key=lambda x: x[0].type): + stdout.print(f"{key.type}") + else: + stdout.print("[bold red]OVERTURE THEMES[/bold red]", justify="center") + stdout.print() + dump_namespace(namespaces["overture"]) + stdout.print("[bold red]ADDITIONAL TYPES[/bold red]", justify="center") + stdout.print() - stdout.print("[bold red]ADDITIONAL TYPES[/bold red]", justify="center") - stdout.print() for namespace in sorted(namespaces.keys()): if namespace == "overture": continue - stdout.print(f"[bold blue]{namespace.upper()}[/bold blue]") + if simple: + for theme in sorted(namespaces[namespace].keys(), key=lambda x: (x is None, x)): + for key, _ in sorted(namespaces[namespace][theme], key=lambda x: x[0].type): + stdout.print(f"{key.type}") + else: + stdout.print(f"[bold blue]{namespace.upper()}[/bold blue]") dump_namespace(namespaces[namespace]) except Exception as e: diff --git a/packages/overture-schema-cli/src/overture/schema/cli/format_adapters.py b/packages/overture-schema-cli/src/overture/schema/cli/format_adapters.py new file mode 100644 index 00000000..b588626e --- /dev/null +++ b/packages/overture-schema-cli/src/overture/schema/cli/format_adapters.py @@ -0,0 +1,191 @@ +""" +Format validators for schema validation against different file types. + +Each validator takes a file and a Pydantic model class and returns a SchemaDiff +indicating any mismatches. +""" + +from __future__ import annotations + +import os +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import TYPE_CHECKING +from urllib.parse import urlparse + +if TYPE_CHECKING: + from pydantic import BaseModel + +# Type alias for paths that can be local files or remote URIs +FilePath = str | os.PathLike[str] + + +# --------------------------------------------------------------------------- +# Schema diff types +# --------------------------------------------------------------------------- + + +@dataclass +class FieldDiff: + """A single difference found when comparing two schema fields.""" + + path: str + kind: str # "missing", "extra", "type_mismatch", "nullability" + expected: str | None = None + actual: str | None = None + + +@dataclass +class SchemaDiff: + """Complete result of comparing expected vs actual schema.""" + + missing_fields: list[FieldDiff] = field(default_factory=list) + extra_fields: list[FieldDiff] = field(default_factory=list) + type_mismatches: list[FieldDiff] = field(default_factory=list) + nullability_issues: list[FieldDiff] = field(default_factory=list) + + @property + def is_compatible(self) -> bool: + """True if no missing fields, type mismatches, or nullability issues.""" + return ( + not self.missing_fields + and not self.type_mismatches + and not self.nullability_issues + ) + + @property + def is_exact_match(self) -> bool: + """True if compatible and no extra fields.""" + return self.is_compatible and not self.extra_fields + + def passed(self, *, strict: bool = False, skip: set[str] | None = None) -> bool: + """Check if the diff passes with optional skipped categories. + + Parameters + ---------- + strict : bool + If True, extra fields also cause failure. + skip : set[str] | None + Categories to ignore: "missing", "extra", "type-mismatch", "nullability". + """ + skip = skip or set() + checks = { + "missing": self.missing_fields, + "type-mismatch": self.type_mismatches, + "nullability": self.nullability_issues, + } + if strict: + checks["extra"] = self.extra_fields + return all( + not diffs for name, diffs in checks.items() if name not in skip + ) + + def to_rows(self) -> list[dict[str, str | None]]: + """Flatten all diffs into a list of row dicts for tabular export.""" + return [ + {"path": d.path, "kind": d.kind, "expected": d.expected, "actual": d.actual} + for d in ( + self.missing_fields + self.extra_fields + + self.type_mismatches + self.nullability_issues + ) + ] + + +# --------------------------------------------------------------------------- +# Format validators +# --------------------------------------------------------------------------- + + +def _get_file_extension(path: FilePath) -> str: + """Extract file extension from a local path or remote URI. + """ + path_str = str(path) + parsed = urlparse(path_str) + + # For URIs with a scheme (s3://, gs://, etc.), use the path component + if parsed.scheme and parsed.scheme not in ("", "file"): + file_path = parsed.path + else: + file_path = path_str + + # Extract extension from the path portion + _, ext = os.path.splitext(file_path) + return ext.lower() + + +class FormatValidator(ABC): + """Base class for format-specific schema validators.""" + + @abstractmethod + def validate( + self, + path: FilePath, + model: type["BaseModel"], + *, + ignore_fields: set[str] | None = None, + ) -> SchemaDiff: + """ + Validate a file's schema against a Pydantic model. + + Parameters + ---------- + path : FilePath + Path to the file (local path or remote URI like s3://) + model : type[BaseModel] + Pydantic model class defining the expected schema + ignore_fields : set[str] | None + Field names to skip during comparison + + Returns + ------- + SchemaDiff + Differences between expected and actual schema + """ + pass + + @classmethod + def for_file(cls, path: FilePath) -> "FormatValidator": + """Select appropriate validator based on file extension. + + Directories (no extension) default to Parquet for dataset support. + """ + validators: dict[str, type[FormatValidator]] = { + ".parquet": ParquetValidator, + "": ParquetValidator, # directories default to parquet + } + suffix = _get_file_extension(path) + if suffix not in validators: + supported = ", ".join(sorted(k for k in validators.keys() if k)) + raise ValueError( + f"Unsupported file format '{suffix}'. Supported: {supported}" + ) + return validators[suffix]() + + +class ParquetValidator(FormatValidator): + """Validator for Parquet files. + + Supports both local paths and remote URIs (s3://, gs://, etc.). + """ + + def validate( + self, + path: FilePath, + model: type[BaseModel], + *, + ignore_fields: set[str] | None = None, + ) -> SchemaDiff: + import pyarrow.parquet as pq + + from .arrow_schema import compare_schemas, pydantic_model_to_arrow_schema + + # Convert model to Arrow schema + expected_schema = pydantic_model_to_arrow_schema(model) + + # Read actual schema from file/directory/dataset + # ParquetDataset handles single files, directories, and partitioned datasets + dataset = pq.ParquetDataset(str(path)) + actual_schema = dataset.schema + + # Compare + return compare_schemas(expected_schema, actual_schema, ignore_fields=ignore_fields) diff --git a/packages/overture-schema-cli/tests/test_parquet_schema.py b/packages/overture-schema-cli/tests/test_parquet_schema.py new file mode 100644 index 00000000..95566de6 --- /dev/null +++ b/packages/overture-schema-cli/tests/test_parquet_schema.py @@ -0,0 +1,701 @@ +"""Tests for parquet-schema command and arrow conversion.""" + +from pathlib import Path + +import pytest +from click.testing import CliRunner + +# Skip all tests if pyarrow not available +pa = pytest.importorskip("pyarrow") +pq = pytest.importorskip("pyarrow.parquet") + +from overture.schema.cli.arrow_schema import ( + _describe_type, + compare_schemas, + pydantic_model_to_arrow_schema, + pydantic_to_arrow_type, +) +from overture.schema.cli.commands import cli +from overture.schema.cli.format_adapters import _get_file_extension + + +class TestArrowSchemaConversion: + """Tests for Pydantic to Arrow schema conversion.""" + + def test_primitive_int_types(self) -> None: + """Test that integer primitive types map correctly.""" + from overture.schema.system.primitive import int8, int16, int32, int64 + + assert pydantic_to_arrow_type(int8) == pa.int8() + assert pydantic_to_arrow_type(int16) == pa.int16() + assert pydantic_to_arrow_type(int32) == pa.int32() + assert pydantic_to_arrow_type(int64) == pa.int64() + + def test_primitive_uint_types(self) -> None: + """Test that unsigned integer primitive types map correctly.""" + from overture.schema.system.primitive import uint8, uint16, uint32 + + assert pydantic_to_arrow_type(uint8) == pa.uint8() + assert pydantic_to_arrow_type(uint16) == pa.uint16() + assert pydantic_to_arrow_type(uint32) == pa.uint32() + + def test_primitive_float_types(self) -> None: + """Test that float primitive types map correctly.""" + from overture.schema.system.primitive import float32, float64 + + assert pydantic_to_arrow_type(float32) == pa.float32() + assert pydantic_to_arrow_type(float64) == pa.float64() + + def test_basic_python_types(self) -> None: + """Test that basic Python types map correctly.""" + assert pydantic_to_arrow_type(str) == pa.utf8() + assert pydantic_to_arrow_type(int) == pa.int64() + assert pydantic_to_arrow_type(float) == pa.float64() + assert pydantic_to_arrow_type(bool) == pa.bool_() + assert pydantic_to_arrow_type(bytes) == pa.binary() + + def test_string_newtypes(self) -> None: + """Test that string newtypes map to utf8.""" + from overture.schema.system.string import CountryCodeAlpha2, LanguageTag + + assert pydantic_to_arrow_type(CountryCodeAlpha2) == pa.utf8() + assert pydantic_to_arrow_type(LanguageTag) == pa.utf8() + + def test_geometry_to_binary(self) -> None: + """Test geometry converts to binary for WKB encoding.""" + from overture.schema.system.primitive import Geometry + + assert pydantic_to_arrow_type(Geometry) == pa.binary() + + def test_bbox_to_struct(self) -> None: + """Test BBox converts to struct with four float32 fields.""" + from overture.schema.system.primitive import BBox + + arrow_type = pydantic_to_arrow_type(BBox) + expected = pa.struct( + [ + pa.field("xmin", pa.float32()), + pa.field("ymin", pa.float32()), + pa.field("xmax", pa.float32()), + pa.field("ymax", pa.float32()), + ] + ) + assert arrow_type == expected + + def test_list_type(self) -> None: + """Test list[T] converts to pa.list_(T).""" + arrow_type = pydantic_to_arrow_type(list[str]) + assert arrow_type == pa.list_(pa.utf8()) + + arrow_type = pydantic_to_arrow_type(list[int]) + assert arrow_type == pa.list_(pa.int64()) + + def test_optional_type(self) -> None: + """Test that T | None still returns the base type (nullable handled separately).""" + # The nullability is handled at the field level, not the type level + assert pydantic_to_arrow_type(str | None) == pa.utf8() + assert pydantic_to_arrow_type(int | None) == pa.int64() + + def test_enum_to_utf8(self) -> None: + """Test that string enums convert to utf8.""" + from enum import Enum + + class Color(str, Enum): + RED = "red" + GREEN = "green" + BLUE = "blue" + + assert pydantic_to_arrow_type(Color) == pa.utf8() + + def test_nested_model_to_struct(self) -> None: + """Test that nested BaseModel converts to struct.""" + from pydantic import BaseModel + + class Inner(BaseModel): + value: str + count: int + + arrow_type = pydantic_to_arrow_type(Inner) + assert isinstance(arrow_type, pa.StructType) + assert "value" in [f.name for f in arrow_type] + assert "count" in [f.name for f in arrow_type] + + def test_building_model_schema(self) -> None: + """Test full Building model converts to valid schema.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building) + + # Check essential fields exist + field_names = schema.names + assert "id" in field_names + assert "geometry" in field_names + assert "theme" in field_names + assert "type" in field_names + assert "version" in field_names + + # Check geometry is binary (WKB) + geometry_field = schema.field("geometry") + assert geometry_field.type == pa.binary() + + def test_place_model_schema(self) -> None: + """Test Place model converts to valid schema.""" + from overture.schema.places import Place + + schema = pydantic_model_to_arrow_schema(Place) + + field_names = schema.names + assert "id" in field_names + assert "geometry" in field_names + assert "operating_status" in field_names + assert "names" in field_names + + def test_schema_includes_metadata(self) -> None: + """Test that schema includes model metadata.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building, include_version_metadata=True) + + assert schema.metadata is not None + assert b"model_name" in schema.metadata + assert schema.metadata[b"model_name"] == b"Building" + + +class TestParquetSchemaCommand: + """Tests for the parquet-schema CLI command.""" + + @pytest.fixture + def cli_runner(self) -> CliRunner: + """Provide a CliRunner within an isolated filesystem.""" + runner = CliRunner() + with runner.isolated_filesystem(): + yield runner + + def test_parquet_schema_text_output(self, cli_runner: CliRunner) -> None: + """Test parquet-schema with text format outputs schema description.""" + result = cli_runner.invoke( + cli, + ["parquet-schema", "--theme", "buildings", "--type", "building", "--format", "text"], + ) + assert result.exit_code == 0 + assert "id:" in result.output + assert "geometry:" in result.output + assert "theme:" in result.output + + def test_parquet_schema_file_output(self, cli_runner: CliRunner) -> None: + """Test parquet-schema with parquet file output creates valid file.""" + result = cli_runner.invoke( + cli, + [ + "parquet-schema", + "--theme", + "buildings", + "--type", + "building", + "--format", + "parquet", + "-o", + "building.parquet", + ], + ) + assert result.exit_code == 0 + + # Verify file was created + assert Path("building.parquet").exists() + + # Verify schema + table = pq.read_table("building.parquet") + assert len(table) == 0 # Empty table + assert "id" in table.schema.names + assert "geometry" in table.schema.names + + def test_parquet_schema_requires_type(self, cli_runner: CliRunner) -> None: + """Test that --type is required.""" + result = cli_runner.invoke( + cli, ["parquet-schema", "--theme", "buildings", "--format", "text"] + ) + assert result.exit_code != 0 + assert "Missing option" in result.output or "required" in result.output.lower() + + def test_parquet_schema_requires_output_for_parquet_format( + self, cli_runner: CliRunner + ) -> None: + """Test that parquet format requires output file.""" + result = cli_runner.invoke( + cli, + [ + "parquet-schema", + "--theme", + "buildings", + "--type", + "building", + "--format", + "parquet", + ], + ) + assert result.exit_code != 0 + assert "--output" in result.output or "required" in result.output.lower() + + def test_parquet_schema_invalid_type(self, cli_runner: CliRunner) -> None: + """Test error handling for invalid type.""" + result = cli_runner.invoke( + cli, ["parquet-schema", "--type", "nonexistent_type", "--format", "text"] + ) + assert result.exit_code != 0 + assert "No model found" in result.output + + def test_parquet_schema_ambiguous_type(self, cli_runner: CliRunner) -> None: + """Test that ambiguous type without theme gives helpful error.""" + # This test assumes there might be types with same name in different themes + # If not, the test will just verify the happy path works + result = cli_runner.invoke( + cli, ["parquet-schema", "--theme", "buildings", "--type", "building", "--format", "text"] + ) + # Should succeed with theme specified + assert result.exit_code == 0 + + def test_parquet_schema_segment_type(self, cli_runner: CliRunner) -> None: + """Test parquet-schema works for transportation segment.""" + result = cli_runner.invoke( + cli, + [ + "parquet-schema", + "--theme", + "transportation", + "--type", + "segment", + "--format", + "text", + ], + ) + assert result.exit_code == 0 + assert "geometry:" in result.output + + def test_parquet_schema_with_namespace(self, cli_runner: CliRunner) -> None: + """Test parquet-schema with namespace filter.""" + result = cli_runner.invoke( + cli, + [ + "parquet-schema", + "--namespace", + "overture", + "--theme", + "buildings", + "--type", + "building", + "--format", + "text", + ], + ) + assert result.exit_code == 0 + + +class TestSchemaComparison: + """Tests for Arrow schema comparison logic.""" + + def test_identical_schemas_match(self) -> None: + """Two identical schemas should produce no diffs.""" + schema = pa.schema([ + pa.field("id", pa.utf8(), nullable=False), + pa.field("value", pa.int64()), + ]) + diff = compare_schemas(schema, schema) + assert diff.is_compatible + assert diff.is_exact_match + + def test_missing_field_detected(self) -> None: + """A field in expected but not in actual is reported as missing.""" + expected = pa.schema([ + pa.field("id", pa.utf8()), + pa.field("name", pa.utf8()), + ]) + actual = pa.schema([ + pa.field("id", pa.utf8()), + ]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.missing_fields) == 1 + assert diff.missing_fields[0].path == "name" + + def test_extra_field_subset_compatible(self) -> None: + """Extra fields are tracked but is_compatible still returns True.""" + expected = pa.schema([ + pa.field("id", pa.utf8()), + ]) + actual = pa.schema([ + pa.field("id", pa.utf8()), + pa.field("extra", pa.int64()), + ]) + diff = compare_schemas(expected, actual) + assert diff.is_compatible + assert not diff.is_exact_match + assert len(diff.extra_fields) == 1 + assert diff.extra_fields[0].path == "extra" + + def test_type_mismatch_detected(self) -> None: + """A field with different types is reported as type_mismatch.""" + expected = pa.schema([pa.field("height", pa.float64())]) + actual = pa.schema([pa.field("height", pa.utf8())]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.type_mismatches) == 1 + assert diff.type_mismatches[0].path == "height" + assert diff.type_mismatches[0].expected == "double" + assert diff.type_mismatches[0].actual == "string" + + def test_nullable_expected_nonnullable_actual_ok(self) -> None: + """Expected nullable, actual non-nullable is fine (stricter).""" + expected = pa.schema([pa.field("id", pa.utf8(), nullable=True)]) + actual = pa.schema([pa.field("id", pa.utf8(), nullable=False)]) + diff = compare_schemas(expected, actual) + assert diff.is_compatible + assert len(diff.nullability_issues) == 0 + + def test_nonnullable_expected_nullable_actual_fails(self) -> None: + """Expected non-nullable, actual nullable is a nullability issue.""" + expected = pa.schema([pa.field("id", pa.utf8(), nullable=False)]) + actual = pa.schema([pa.field("id", pa.utf8(), nullable=True)]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.nullability_issues) == 1 + assert diff.nullability_issues[0].path == "id" + + def test_nested_struct_comparison(self) -> None: + """Fields within nested structs are compared recursively.""" + struct_type = pa.struct([ + pa.field("x", pa.float64()), + pa.field("y", pa.float64()), + ]) + expected = pa.schema([pa.field("point", struct_type)]) + actual = pa.schema([pa.field("point", struct_type)]) + diff = compare_schemas(expected, actual) + assert diff.is_compatible + + def test_nested_struct_missing_child(self) -> None: + """Missing child in nested struct reported with dotted path.""" + expected_struct = pa.struct([ + pa.field("x", pa.float64()), + pa.field("y", pa.float64()), + ]) + actual_struct = pa.struct([ + pa.field("x", pa.float64()), + ]) + expected = pa.schema([pa.field("point", expected_struct)]) + actual = pa.schema([pa.field("point", actual_struct)]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.missing_fields) == 1 + assert diff.missing_fields[0].path == "point.y" + + def test_nested_struct_type_mismatch(self) -> None: + """Type mismatch within nested struct uses dotted path.""" + expected_struct = pa.struct([pa.field("value", pa.int64())]) + actual_struct = pa.struct([pa.field("value", pa.utf8())]) + expected = pa.schema([pa.field("data", expected_struct)]) + actual = pa.schema([pa.field("data", actual_struct)]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.type_mismatches) == 1 + assert diff.type_mismatches[0].path == "data.value" + + def test_list_element_type_comparison(self) -> None: + """List element types are compared.""" + expected = pa.schema([pa.field("tags", pa.list_(pa.utf8()))]) + actual = pa.schema([pa.field("tags", pa.list_(pa.int64()))]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.type_mismatches) == 1 + assert diff.type_mismatches[0].path == "tags.item" + + def test_map_key_value_type_comparison(self) -> None: + """Map key and value types are compared.""" + expected = pa.schema([pa.field("props", pa.map_(pa.utf8(), pa.int64()))]) + actual = pa.schema([pa.field("props", pa.map_(pa.utf8(), pa.utf8()))]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.type_mismatches) == 1 + assert diff.type_mismatches[0].path == "props.value" + + def test_struct_vs_primitive_mismatch(self) -> None: + """A struct expected but primitive found reports type_mismatch.""" + struct_type = pa.struct([pa.field("x", pa.float64())]) + expected = pa.schema([pa.field("data", struct_type)]) + actual = pa.schema([pa.field("data", pa.utf8())]) + diff = compare_schemas(expected, actual) + assert not diff.is_compatible + assert len(diff.type_mismatches) == 1 + assert "struct" in diff.type_mismatches[0].expected + + def test_describe_type_primitives(self) -> None: + """_describe_type returns readable strings for primitive types.""" + assert _describe_type(pa.utf8()) == "string" + assert _describe_type(pa.int64()) == "int64" + assert _describe_type(pa.float64()) == "double" + assert _describe_type(pa.bool_()) == "bool" + + def test_describe_type_complex(self) -> None: + """_describe_type returns readable strings for struct/list/map.""" + struct_type = pa.struct([ + pa.field("a", pa.utf8()), + pa.field("b", pa.int64()), + ]) + assert _describe_type(struct_type) == "struct<2 fields>" + assert _describe_type(pa.list_(pa.utf8())) == "list" + assert _describe_type(pa.map_(pa.utf8(), pa.int64())) == "map" + + def test_ignore_missing_field(self) -> None: + """Ignored fields are not reported as missing.""" + expected = pa.schema([ + pa.field("id", pa.utf8()), + pa.field("version", pa.int32()), + pa.field("bbox", pa.binary()), + ]) + actual = pa.schema([ + pa.field("id", pa.utf8()), + ]) + diff = compare_schemas(expected, actual, ignore_fields={"version", "bbox"}) + assert diff.is_compatible + assert diff.is_exact_match + assert len(diff.missing_fields) == 0 + + def test_ignore_extra_field(self) -> None: + """Ignored fields are not reported as extra.""" + expected = pa.schema([ + pa.field("id", pa.utf8()), + ]) + actual = pa.schema([ + pa.field("id", pa.utf8()), + pa.field("version", pa.int32()), + ]) + diff = compare_schemas(expected, actual, ignore_fields={"version"}) + assert diff.is_compatible + assert diff.is_exact_match + assert len(diff.extra_fields) == 0 + + def test_ignore_does_not_affect_other_fields(self) -> None: + """Ignoring one field does not affect checks on other fields.""" + expected = pa.schema([ + pa.field("id", pa.utf8()), + pa.field("version", pa.int32()), + pa.field("name", pa.utf8()), + ]) + actual = pa.schema([ + pa.field("id", pa.utf8()), + ]) + diff = compare_schemas(expected, actual, ignore_fields={"version"}) + assert not diff.is_compatible + assert len(diff.missing_fields) == 1 + assert diff.missing_fields[0].path == "name" + + def test_building_schema_self_check(self) -> None: + """Building schema compared to itself should be an exact match.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building) + diff = compare_schemas(schema, schema) + assert diff.is_compatible + assert diff.is_exact_match + + +class TestCheckSchemaCommand: + """Tests for the validate-schema CLI command.""" + + @pytest.fixture + def cli_runner(self) -> CliRunner: + """Provide a CliRunner within an isolated filesystem.""" + runner = CliRunner() + with runner.isolated_filesystem(): + yield runner + + @pytest.fixture + def building_parquet(self, cli_runner: CliRunner) -> Path: + """Create a valid building Parquet file for testing.""" + cli_runner.invoke(cli, [ + "parquet-schema", "--theme", "buildings", + "--type", "building", "-o", "building.parquet", + ]) + return Path("building.parquet") + + def test_matching_schema_passes(self, cli_runner: CliRunner, building_parquet: Path) -> None: + """A file generated from the same model should pass subset check.""" + result = cli_runner.invoke(cli, [ + "validate-schema", str(building_parquet), + "--theme", "buildings", "--type", "building", + ]) + assert result.exit_code == 0 + + def test_matching_schema_strict_passes(self, cli_runner: CliRunner, building_parquet: Path) -> None: + """A file generated from the same model should pass strict check.""" + result = cli_runner.invoke(cli, [ + "validate-schema", str(building_parquet), + "--theme", "buildings", "--type", "building", "--strict", + ]) + assert result.exit_code == 0 + + def test_extra_columns_subset_passes(self, cli_runner: CliRunner) -> None: + """File with extra columns passes in subset (default) mode.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building) + # Add an extra column, preserving nullability from original schema + fields = list(schema) + [pa.field("custom_col", pa.utf8())] + extended_schema = pa.schema(fields) + table = pa.table( + {f.name: pa.array([], type=f.type) for f in extended_schema}, + schema=extended_schema, + ) + pq.write_table(table, "extended.parquet") + + result = cli_runner.invoke(cli, [ + "validate-schema", "extended.parquet", + "--theme", "buildings", "--type", "building", + ]) + assert result.exit_code == 0 + + def test_extra_columns_strict_fails(self, cli_runner: CliRunner) -> None: + """File with extra columns fails in strict mode.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building) + fields = list(schema) + [pa.field("custom_col", pa.utf8())] + extended_schema = pa.schema(fields) + table = pa.table( + {f.name: pa.array([], type=f.type) for f in extended_schema}, + schema=extended_schema, + ) + pq.write_table(table, "extended.parquet") + + result = cli_runner.invoke(cli, [ + "validate-schema", "extended.parquet", + "--theme", "buildings", "--type", "building", "--strict", + ]) + assert result.exit_code != 0 + + def test_missing_field_fails(self, cli_runner: CliRunner) -> None: + """File missing required fields fails check.""" + schema = pa.schema([ + pa.field("id", pa.utf8()), + pa.field("geometry", pa.binary()), + ]) + table = pa.table( + {f.name: pa.array([], type=f.type) for f in schema}, + ) + pq.write_table(table, "partial.parquet") + + result = cli_runner.invoke(cli, [ + "validate-schema", "partial.parquet", + "--theme", "buildings", "--type", "building", + ]) + assert result.exit_code != 0 + + def test_invalid_type_fails(self, cli_runner: CliRunner, building_parquet: Path) -> None: + """Invalid --type gives clear error.""" + result = cli_runner.invoke(cli, [ + "validate-schema", str(building_parquet), "--type", "nonexistent_type", + ]) + assert result.exit_code != 0 + assert "No model found" in result.output + + def test_exit_code_zero_on_match(self, cli_runner: CliRunner, building_parquet: Path) -> None: + """Exit code is 0 when schema matches.""" + result = cli_runner.invoke(cli, [ + "validate-schema", str(building_parquet), + "--theme", "buildings", "--type", "building", + ]) + assert result.exit_code == 0 + + def test_exit_code_nonzero_on_mismatch(self, cli_runner: CliRunner) -> None: + """Exit code is non-zero when schema doesn't match.""" + schema = pa.schema([pa.field("wrong", pa.utf8())]) + table = pa.table({"wrong": pa.array([], type=pa.utf8())}) + pq.write_table(table, "wrong.parquet") + + result = cli_runner.invoke(cli, [ + "validate-schema", "wrong.parquet", + "--theme", "buildings", "--type", "building", + ]) + assert result.exit_code != 0 + + def test_ignore_missing_fields_passes(self, cli_runner: CliRunner) -> None: + """--ignore allows a file missing those fields to pass.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building) + # Remove version and bbox from the file's schema + fields = [f for f in schema if f.name not in ("version", "bbox")] + reduced_schema = pa.schema(fields) + table = pa.table( + {f.name: pa.array([], type=f.type) for f in reduced_schema}, + schema=reduced_schema, + ) + pq.write_table(table, "no_version_bbox.parquet") + + result = cli_runner.invoke(cli, [ + "validate-schema", "no_version_bbox.parquet", + "--theme", "buildings", "--type", "building", + "--ignore", "version", "--ignore", "bbox", + ]) + assert result.exit_code == 0 + + def test_ignore_without_flag_still_fails(self, cli_runner: CliRunner) -> None: + """Without --ignore, missing version/bbox causes failure.""" + from overture.schema.buildings import Building + + schema = pydantic_model_to_arrow_schema(Building) + fields = [f for f in schema if f.name not in ("version", "bbox")] + reduced_schema = pa.schema(fields) + table = pa.table( + {f.name: pa.array([], type=f.type) for f in reduced_schema}, + schema=reduced_schema, + ) + pq.write_table(table, "no_version_bbox.parquet") + + result = cli_runner.invoke(cli, [ + "validate-schema", "no_version_bbox.parquet", + "--theme", "buildings", "--type", "building", + ]) + assert result.exit_code != 0 + + +class TestFileExtensionParsing: + """Tests for _get_file_extension with local paths and remote URIs.""" + + def test_local_path_string(self) -> None: + """Local path as string returns correct extension.""" + assert _get_file_extension("data/file.parquet") == ".parquet" + assert _get_file_extension("/absolute/path/file.parquet") == ".parquet" + + def test_local_path_object(self) -> None: + """Local Path object returns correct extension.""" + from pathlib import Path + assert _get_file_extension(Path("data/file.parquet")) == ".parquet" + + def test_s3_uri(self) -> None: + """S3 URI returns correct extension.""" + uri = "s3://bucket/path/to/file.parquet" + assert _get_file_extension(uri) == ".parquet" + + def test_s3_uri_with_partition(self) -> None: + """S3 URI with partition path returns correct extension.""" + uri = "s3://overturemaps-us-west-2/release/2026-01-21.0/theme=addresses/type=address/part-00000.zstd.parquet" + assert _get_file_extension(uri) == ".parquet" + + def test_gs_uri(self) -> None: + """Google Cloud Storage URI returns correct extension.""" + uri = "gs://bucket/path/to/file.parquet" + assert _get_file_extension(uri) == ".parquet" + + def test_file_uri(self) -> None: + """file:// URI returns correct extension.""" + uri = "file:///home/user/data.parquet" + assert _get_file_extension(uri) == ".parquet" + + def test_unsupported_extension(self) -> None: + """Unsupported extension is returned as-is.""" + assert _get_file_extension("s3://bucket/file.csv") == ".csv" + assert _get_file_extension("data.json") == ".json" + + def test_no_extension(self) -> None: + """Path with no extension returns empty string.""" + assert _get_file_extension("s3://bucket/noext") == "" + assert _get_file_extension("/path/to/noext") == "" diff --git a/pyproject.toml b/pyproject.toml index 92201b71..b335b6a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,8 +50,10 @@ dev = [ "mypy>=1.17.0", "pdoc>=15.0.4", "pydocstyle>=6.3.0", + "pyarrow>=14.0", "pytest>=8.4.1", "pytest-cov>=7.0.0", + "pytest-subtests>=0.14.0", "ruff>=0.12.4", ] @@ -61,6 +63,7 @@ pythonpath = [ "packages/overture-schema-annex/tests", "packages/overture-schema-base-theme/tests", "packages/overture-schema-buildings-theme/tests", + "packages/overture-schema-cli/tests", "packages/overture-schema-core/tests", "packages/overture-schema-divisions-theme/tests", "packages/overture-schema-places-theme/tests", @@ -68,3 +71,9 @@ pythonpath = [ "packages/overture-schema-transportation-theme/tests", "packages/overture-schema/tests", ] + +[[tool.uv.index]] +name = "overture" +url = "https://overture-pypi-505071440022.d.codeartifact.us-west-2.amazonaws.com/pypi/overture/simple/" +publish-url = "https://overture-pypi-505071440022.d.codeartifact.us-west-2.amazonaws.com/pypi/overture/" +explicit = true diff --git a/uv.lock b/uv.lock index 7748ad79..33b77039 100644 --- a/uv.lock +++ b/uv.lock @@ -683,9 +683,15 @@ dependencies = [ { name = "yamlcore" }, ] +[package.optional-dependencies] +parquet = [ + { name = "pyarrow" }, +] + [package.dev-dependencies] dev = [ { name = "mypy" }, + { name = "pyarrow" }, { name = "pytest" }, { name = "ruff" }, ] @@ -694,15 +700,18 @@ dev = [ requires-dist = [ { name = "click", specifier = ">=8.0" }, { name = "overture-schema-core", editable = "packages/overture-schema-core" }, + { name = "pyarrow", marker = "extra == 'parquet'", specifier = ">=14.0" }, { name = "pydantic", specifier = ">=2.0" }, { name = "pyyaml", specifier = ">=6.0.2" }, { name = "rich", specifier = ">=13.0" }, { name = "yamlcore", specifier = ">=0.0.4" }, ] +provides-extras = ["parquet"] [package.metadata.requires-dev] dev = [ { name = "mypy" }, + { name = "pyarrow", specifier = ">=14.0" }, { name = "pytest", specifier = ">=7.0" }, { name = "ruff" }, ] @@ -821,11 +830,12 @@ source = { virtual = "." } dev = [ { name = "mypy" }, { name = "pdoc" }, + { name = "pyarrow" }, { name = "pydocstyle" }, { name = "pytest" }, { name = "pytest-cov" }, + { name = "pytest-subtests" }, { name = "ruff" }, - { name = "semver" }, ] [package.metadata] @@ -834,11 +844,12 @@ dev = [ dev = [ { name = "mypy", specifier = ">=1.17.0" }, { name = "pdoc", specifier = ">=15.0.4" }, + { name = "pyarrow", specifier = ">=14.0" }, { name = "pydocstyle", specifier = ">=6.3.0" }, { name = "pytest", specifier = ">=8.4.1" }, { name = "pytest-cov", specifier = ">=7.0.0" }, + { name = "pytest-subtests", specifier = ">=0.14.0" }, { name = "ruff", specifier = ">=0.12.4" }, - { name = "semver", specifier = ">=3.0.4" }, ] [[package]] @@ -891,6 +902,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, ] +[[package]] +name = "pyarrow" +version = "23.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/2f/23e042a5aa99bcb15e794e14030e8d065e00827e846e53a66faec73c7cd6/pyarrow-23.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cbdc2bf5947aa4d462adcf8453cf04aee2f7932653cb67a27acd96e5e8528a67", size = 34281861, upload-time = "2026-01-18T16:13:34.332Z" }, + { url = "https://files.pythonhosted.org/packages/8b/65/1651933f504b335ec9cd8f99463718421eb08d883ed84f0abd2835a16cad/pyarrow-23.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4d38c836930ce15cd31dce20114b21ba082da231c884bdc0a7b53e1477fe7f07", size = 35825067, upload-time = "2026-01-18T16:13:42.549Z" }, + { url = "https://files.pythonhosted.org/packages/84/ec/d6fceaec050c893f4e35c0556b77d4cc9973fcc24b0a358a5781b1234582/pyarrow-23.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:4222ff8f76919ecf6c716175a0e5fddb5599faeed4c56d9ea41a2c42be4998b2", size = 44458539, upload-time = "2026-01-18T16:13:52.975Z" }, + { url = "https://files.pythonhosted.org/packages/fd/d9/369f134d652b21db62fe3ec1c5c2357e695f79eb67394b8a93f3a2b2cffa/pyarrow-23.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:87f06159cbe38125852657716889296c83c37b4d09a5e58f3d10245fd1f69795", size = 47535889, upload-time = "2026-01-18T16:14:03.693Z" }, + { url = "https://files.pythonhosted.org/packages/a3/95/f37b6a252fdbf247a67a78fb3f61a529fe0600e304c4d07741763d3522b1/pyarrow-23.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1675c374570d8b91ea6d4edd4608fa55951acd44e0c31bd146e091b4005de24f", size = 48157777, upload-time = "2026-01-18T16:14:12.483Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ab/fb94923108c9c6415dab677cf1f066d3307798eafc03f9a65ab4abc61056/pyarrow-23.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:247374428fde4f668f138b04031a7e7077ba5fa0b5b1722fdf89a017bf0b7ee0", size = 50580441, upload-time = "2026-01-18T16:14:20.187Z" }, + { url = "https://files.pythonhosted.org/packages/ae/78/897ba6337b517fc8e914891e1bd918da1c4eb8e936a553e95862e67b80f6/pyarrow-23.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:de53b1bd3b88a2ee93c9af412c903e57e738c083be4f6392288294513cd8b2c1", size = 27530028, upload-time = "2026-01-18T16:14:27.353Z" }, + { url = "https://files.pythonhosted.org/packages/aa/c0/57fe251102ca834fee0ef69a84ad33cc0ff9d5dfc50f50b466846356ecd7/pyarrow-23.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5574d541923efcbfdf1294a2746ae3b8c2498a2dc6cd477882f6f4e7b1ac08d3", size = 34276762, upload-time = "2026-01-18T16:14:34.128Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4e/24130286548a5bc250cbed0b6bbf289a2775378a6e0e6f086ae8c68fc098/pyarrow-23.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:2ef0075c2488932e9d3c2eb3482f9459c4be629aa673b725d5e3cf18f777f8e4", size = 35821420, upload-time = "2026-01-18T16:14:40.699Z" }, + { url = "https://files.pythonhosted.org/packages/ee/55/a869e8529d487aa2e842d6c8865eb1e2c9ec33ce2786eb91104d2c3e3f10/pyarrow-23.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:65666fc269669af1ef1c14478c52222a2aa5c907f28b68fb50a203c777e4f60c", size = 44457412, upload-time = "2026-01-18T16:14:49.051Z" }, + { url = "https://files.pythonhosted.org/packages/36/81/1de4f0edfa9a483bbdf0082a05790bd6a20ed2169ea12a65039753be3a01/pyarrow-23.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:4d85cb6177198f3812db4788e394b757223f60d9a9f5ad6634b3e32be1525803", size = 47534285, upload-time = "2026-01-18T16:14:56.748Z" }, + { url = "https://files.pythonhosted.org/packages/f2/04/464a052d673b5ece074518f27377861662449f3c1fdb39ce740d646fd098/pyarrow-23.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1a9ff6fa4141c24a03a1a434c63c8fa97ce70f8f36bccabc18ebba905ddf0f17", size = 48157913, upload-time = "2026-01-18T16:15:05.114Z" }, + { url = "https://files.pythonhosted.org/packages/f4/1b/32a4de9856ee6688c670ca2def588382e573cce45241a965af04c2f61687/pyarrow-23.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:84839d060a54ae734eb60a756aeacb62885244aaa282f3c968f5972ecc7b1ecc", size = 50582529, upload-time = "2026-01-18T16:15:12.846Z" }, + { url = "https://files.pythonhosted.org/packages/db/c7/d6581f03e9b9e44ea60b52d1750ee1a7678c484c06f939f45365a45f7eef/pyarrow-23.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a149a647dbfe928ce8830a713612aa0b16e22c64feac9d1761529778e4d4eaa5", size = 27542646, upload-time = "2026-01-18T16:15:18.89Z" }, + { url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" }, + { url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" }, + { url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" }, + { url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" }, + { url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" }, + { url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" }, + { url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" }, + { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" }, + { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" }, + { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" }, + { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" }, + { url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" }, + { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" }, + { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" }, + { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" }, + { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" }, + { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" }, + { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" }, + { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" }, + { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" }, + { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" }, + { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" }, + { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" }, + { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" }, + { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" }, + { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" }, + { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" }, +] + [[package]] name = "pydantic" version = "2.12.2" @@ -1194,15 +1262,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/2a/65880dfd0e13f7f13a775998f34703674a4554906167dce02daf7865b954/ruff-0.14.0-py3-none-win_arm64.whl", hash = "sha256:f42c9495f5c13ff841b1da4cb3c2a42075409592825dada7c5885c2c844ac730", size = 12565142, upload-time = "2025-10-07T18:21:53.577Z" }, ] -[[package]] -name = "semver" -version = "3.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/d1/d3159231aec234a59dd7d601e9dd9fe96f3afff15efd33c1070019b26132/semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602", size = 269730, upload-time = "2025-01-24T13:19:27.617Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/24/4d91e05817e92e3a61c8a21e08fd0f390f5301f1c448b137c57c4bc6e543/semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746", size = 17912, upload-time = "2025-01-24T13:19:24.949Z" }, -] - [[package]] name = "shapely" version = "2.1.2"