From 22fe3298607707034b313e549c46f317155940bf Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Fri, 26 Dec 2025 13:47:25 +0000 Subject: [PATCH] First commit for adding geometry and geography type Signed-off-by: dttung2905 --- literals.go | 6 + schema.go | 6 + schema_test.go | 12 ++ table/arrow_utils.go | 22 +++ table/arrow_utils_test.go | 114 +++++++++++++++- table/substrait/substrait.go | 6 + transforms.go | 5 + types.go | 258 +++++++++++++++++++++++++++++++++++ types_test.go | 157 +++++++++++++++++++++ visitors.go | 8 ++ 10 files changed, 593 insertions(+), 1 deletion(-) diff --git a/literals.go b/literals.go index a50f155ee..852b9f1b2 100644 --- a/literals.go +++ b/literals.go @@ -207,6 +207,12 @@ func LiteralFromBytes(typ Type, data []byte) (Literal, error) { var v UUIDLiteral err := v.UnmarshalBinary(data) + return v, err + case GeometryType, GeographyType: + // Geometry and Geography are stored as WKB (binary format) + var v BinaryLiteral + err := v.UnmarshalBinary(data) + return v, err } diff --git a/schema.go b/schema.go index 30bf30763..ead89f676 100644 --- a/schema.go +++ b/schema.go @@ -494,6 +494,8 @@ type SchemaVisitorPerPrimitiveType[T any] interface { VisitBinary() T VisitUUID() T VisitUnknown() T + VisitGeometry(GeometryType) T + VisitGeography(GeographyType) T } // Visit accepts a visitor and performs a post-order traversal of the given schema. @@ -640,6 +642,10 @@ func visitField[T any](f NestedField, visitor SchemaVisitor[T]) T { return perPrimitive.VisitFixed(t) case UnknownType: return perPrimitive.VisitUnknown() + case GeometryType: + return perPrimitive.VisitGeometry(t) + case GeographyType: + return perPrimitive.VisitGeography(t) } } diff --git a/schema_test.go b/schema_test.go index c4ea21a6d..57c036797 100644 --- a/schema_test.go +++ b/schema_test.go @@ -424,6 +424,18 @@ func TestSerializeSchema(t *testing.T) { "schema-id": 1, "identifier-field-ids": [2] }`, string(data)) + + // Test schema with geometry and geography + schemaWithGeo := iceberg.NewSchema(2, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 2, Name: "location", Type: iceberg.GeometryTypeOf("srid:3857"), Required: false}, + iceberg.NestedField{ID: 3, Name: "gps", Type: iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmKarney), Required: false}, + ) + data2, err := json.Marshal(schemaWithGeo) + require.NoError(t, err) + var schemaFromJSON iceberg.Schema + require.NoError(t, json.Unmarshal(data2, &schemaFromJSON)) + assert.True(t, schemaWithGeo.Equals(&schemaFromJSON)) } func TestUnmarshalSchema(t *testing.T) { diff --git a/table/arrow_utils.go b/table/arrow_utils.go index 0dffa74a9..6c3a46d50 100644 --- a/table/arrow_utils.go +++ b/table/arrow_utils.go @@ -622,6 +622,22 @@ func (c convertToArrow) VisitUnknown() arrow.Field { } } +func (c convertToArrow) VisitGeometry(iceberg.GeometryType) arrow.Field { + if c.useLargeTypes { + return arrow.Field{Type: arrow.BinaryTypes.LargeBinary} + } + + return arrow.Field{Type: arrow.BinaryTypes.Binary} +} + +func (c convertToArrow) VisitGeography(iceberg.GeographyType) arrow.Field { + if c.useLargeTypes { + return arrow.Field{Type: arrow.BinaryTypes.LargeBinary} + } + + return arrow.Field{Type: arrow.BinaryTypes.Binary} +} + var _ iceberg.SchemaVisitorPerPrimitiveType[arrow.Field] = convertToArrow{} // SchemaToArrowSchema converts an Iceberg schema to an Arrow schema. If the metadata parameter @@ -777,6 +793,12 @@ func (a *arrowProjectionVisitor) castIfNeeded(field iceberg.NestedField, vals ar panic(fmt.Errorf("unsupported schema projection from %s to %s", vals.DataType(), targetType)) + case iceberg.GeometryType, iceberg.GeographyType: + if arrow.TypeEqual(vals.DataType(), arrow.BinaryTypes.Binary) || + arrow.TypeEqual(vals.DataType(), arrow.BinaryTypes.LargeBinary) { + vals.Retain() + return vals + } default: return retOrPanic(compute.CastArray(a.ctx, vals, compute.SafeCastOptions(targetType))) diff --git a/table/arrow_utils_test.go b/table/arrow_utils_test.go index 0f41b22cb..1e859ed0f 100644 --- a/table/arrow_utils_test.go +++ b/table/arrow_utils_test.go @@ -84,6 +84,8 @@ func TestArrowToIceberg(t *testing.T) { {arrow.BinaryTypes.LargeBinary, iceberg.PrimitiveTypes.Binary, false, ""}, {arrow.BinaryTypes.BinaryView, nil, false, "unsupported arrow type for conversion - binary_view"}, {extensions.NewUUIDType(), iceberg.PrimitiveTypes.UUID, true, ""}, + // Note: Arrow binary types map to Iceberg BinaryType, not Geometry/Geography + // Geometry/Geography must be explicitly specified in Iceberg schema {arrow.StructOf(arrow.Field{ Name: "foo", Type: arrow.BinaryTypes.String, @@ -342,9 +344,16 @@ var ( ) func TestArrowSchemaRoundTripConversion(t *testing.T) { + schemaWithGeo := iceberg.NewSchema(3, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 2, Name: "location", Type: iceberg.GeometryTypeOf("srid:3857"), Required: false}, + iceberg.NestedField{ID: 3, Name: "gps", Type: iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmKarney), Required: false}, + ) + schemas := []*iceberg.Schema{ icebergSchemaSimple, icebergSchemaNested, + schemaWithGeo, } for _, tt := range schemas { @@ -354,7 +363,14 @@ func TestArrowSchemaRoundTripConversion(t *testing.T) { ice, err := table.ArrowSchemaToIceberg(sc, false, nil) require.NoError(t, err) - assert.True(t, tt.Equals(ice), tt.String(), ice.String()) + if tt == schemaWithGeo { + geomField := sc.Field(1) + assert.Equal(t, arrow.BinaryTypes.Binary, geomField.Type) + geogField := sc.Field(2) + assert.Equal(t, arrow.BinaryTypes.Binary, geogField.Type) + } else { + assert.True(t, tt.Equals(ice), tt.String(), ice.String()) + } } } @@ -585,3 +601,99 @@ func TestToRequestedSchema(t *testing.T) { assert.True(t, array.RecordEqual(rec, rec2)) } + +func TestToRequestedSchemaGeometryGeography(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + // File schema (from Parquet) has BinaryType for geometry/geography + // This simulates what we get when reading from a Parquet file + fileSchema := arrow.NewSchema([]arrow.Field{ + { + Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: false, + Metadata: fieldIDMeta("1"), + }, + { + Name: "location", Type: arrow.BinaryTypes.Binary, Nullable: true, + Metadata: fieldIDMeta("2"), + }, + { + Name: "gps", Type: arrow.BinaryTypes.Binary, Nullable: true, + Metadata: fieldIDMeta("3"), + }, + }, nil) + + // Requested schema has GeometryType/GeographyType + requestedSchema := iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 2, Name: "location", Type: iceberg.GeometryTypeOf("srid:3857"), Required: false}, + iceberg.NestedField{ID: 3, Name: "gps", Type: iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmKarney), Required: false}, + ) + + // Create Arrow record with binary data (simulating WKB format) + // For testing, we'll use simple binary data + bldr := array.NewRecordBuilder(mem, fileSchema) + defer bldr.Release() + + idBldr := bldr.Field(0).(*array.Int32Builder) + locationBldr := bldr.Field(1).(*array.BinaryBuilder) + gpsBldr := bldr.Field(2).(*array.BinaryBuilder) + + // First row: non-null values + idBldr.Append(1) + locationBldr.Append([]byte{0x01, 0x02, 0x03, 0x04}) // Simulated WKB + gpsBldr.Append([]byte{0x05, 0x06, 0x07, 0x08}) // Simulated WKB + + // Second row: null values + idBldr.Append(2) + locationBldr.AppendNull() + gpsBldr.AppendNull() + + // Third row: more non-null values + idBldr.Append(3) + locationBldr.Append([]byte{0x09, 0x0A, 0x0B, 0x0C}) + gpsBldr.Append([]byte{0x0D, 0x0E, 0x0F, 0x10}) + + rec := bldr.NewRecordBatch() + defer rec.Release() + + // Convert file schema to Iceberg schema + fileIcebergSchema, err := table.ArrowSchemaToIceberg(fileSchema, false, nil) + require.NoError(t, err) + + // Convert requested schema to Arrow schema + requestedArrowSchema, err := table.SchemaToArrowSchema(requestedSchema, nil, true, false) + require.NoError(t, err) + + // Project to requested schema + projectedRec, err := table.ToRequestedSchema(context.Background(), requestedSchema, fileIcebergSchema, rec, true, true, false) + require.NoError(t, err, "ToRequestedSchema should succeed for BinaryType -> GeometryType/GeographyType") + defer projectedRec.Release() + + // Verify the projected record has the correct schema + assert.Equal(t, requestedArrowSchema, projectedRec.Schema()) + + // Verify the data is preserved + assert.Equal(t, rec.NumRows(), projectedRec.NumRows()) + assert.Equal(t, rec.NumCols(), projectedRec.NumCols()) + + // Verify binary data is preserved + projectedLocation := projectedRec.Column(1).(*array.Binary) + projectedGps := projectedRec.Column(2).(*array.Binary) + + // Check first row + assert.False(t, projectedLocation.IsNull(0)) + assert.False(t, projectedGps.IsNull(0)) + assert.Equal(t, []byte{0x01, 0x02, 0x03, 0x04}, projectedLocation.Value(0)) + assert.Equal(t, []byte{0x05, 0x06, 0x07, 0x08}, projectedGps.Value(0)) + + // Check second row (nulls) + assert.True(t, projectedLocation.IsNull(1)) + assert.True(t, projectedGps.IsNull(1)) + + // Check third row + assert.False(t, projectedLocation.IsNull(2)) + assert.False(t, projectedGps.IsNull(2)) + assert.Equal(t, []byte{0x09, 0x0A, 0x0B, 0x0C}, projectedLocation.Value(2)) + assert.Equal(t, []byte{0x0D, 0x0E, 0x0F, 0x10}, projectedGps.Value(2)) +} diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go index 21fb29191..001451da7 100644 --- a/table/substrait/substrait.go +++ b/table/substrait/substrait.go @@ -169,6 +169,12 @@ func (convertToSubstrait) VisitUnknown() types.Type { // Returning nil indicates this type cannot be converted to Substrait return nil } +func (convertToSubstrait) VisitGeometry(iceberg.GeometryType) types.Type { + return &types.BinaryType{} +} +func (convertToSubstrait) VisitGeography(iceberg.GeographyType) types.Type { + return &types.BinaryType{} +} var _ iceberg.SchemaVisitorPerPrimitiveType[types.Type] = (*convertToSubstrait)(nil) diff --git a/transforms.go b/transforms.go index 18e799fb6..1c74fcad3 100644 --- a/transforms.go +++ b/transforms.go @@ -106,6 +106,11 @@ func (t IdentityTransform) MarshalText() ([]byte, error) { func (IdentityTransform) String() string { return "identity" } func (IdentityTransform) CanTransform(t Type) bool { + // Exclude geometry, geography, and variant per spec + switch t.(type) { + case GeometryType, GeographyType: + return false + } _, ok := t.(PrimitiveType) return ok diff --git a/types.go b/types.go index 7ec0e08cc..831d5a7e0 100644 --- a/types.go +++ b/types.go @@ -32,6 +32,8 @@ import ( var ( regexFromBrackets = regexp.MustCompile(`^\w+\[(\d+)\]$`) decimalRegex = regexp.MustCompile(`decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)`) + geometryRegex = regexp.MustCompile(`(?i)^geometry\s*(?:\(\s*([^)]*?)\s*\))?$`) + geographyRegex = regexp.MustCompile(`(?i)^geography\s*(?:\(\s*([^,]*?)\s*(?:,\s*(\w*)\s*)?\))?$`) ) type Properties map[string]string @@ -135,6 +137,29 @@ func (t *typeIFace) UnmarshalJSON(b []byte) error { t.Type = UnknownType{} default: switch { + case strings.HasPrefix(strings.ToLower(typename), "geometry"): + matches := geometryRegex.FindStringSubmatch(typename) + if len(matches) < 2 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + crs := strings.TrimSpace(matches[1]) + t.Type = GeometryTypeOf(crs) + case strings.HasPrefix(strings.ToLower(typename), "geography"): + matches := geographyRegex.FindStringSubmatch(typename) + if len(matches) < 2 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + crs := strings.TrimSpace(matches[1]) + algorithmStr := strings.TrimSpace(matches[2]) + var algorithm EdgeAlgorithm + if algorithmStr != "" { + var err error + algorithm, err = EdgeAlgorithmFromName(algorithmStr) + if err != nil { + return err + } + } + t.Type = GeographyTypeOf(crs, algorithm) case strings.HasPrefix(typename, "fixed"): matches := regexFromBrackets.FindStringSubmatch(typename) if len(matches) != 2 { @@ -743,6 +768,229 @@ func (UnknownType) primitive() {} func (UnknownType) Type() string { return "unknown" } func (UnknownType) String() string { return "unknown" } +// EdgeAlgorithm specifies the algorithm for interpolating edges in geography types. +type EdgeAlgorithm string + +const ( + EdgeAlgorithmSpherical EdgeAlgorithm = "spherical" + EdgeAlgorithmVincenty EdgeAlgorithm = "vincenty" + EdgeAlgorithmThomas EdgeAlgorithm = "thomas" + EdgeAlgorithmAndoyer EdgeAlgorithm = "andoyer" + EdgeAlgorithmKarney EdgeAlgorithm = "karney" +) + +// FromName returns the EdgeAlgorithm for the given name (case-insensitive). +// Returns an error if the name is invalid. +func EdgeAlgorithmFromName(name string) (EdgeAlgorithm, error) { + if name == "" { + return EdgeAlgorithmSpherical, nil + } + + nameLower := strings.ToLower(name) + switch nameLower { + case "spherical": + return EdgeAlgorithmSpherical, nil + case "vincenty": + return EdgeAlgorithmVincenty, nil + case "thomas": + return EdgeAlgorithmThomas, nil + case "andoyer": + return EdgeAlgorithmAndoyer, nil + case "karney": + return EdgeAlgorithmKarney, nil + default: + return EdgeAlgorithmSpherical, fmt.Errorf("%w: invalid edge interpolation algorithm: %s", ErrInvalidTypeString, name) + } +} + +func (e EdgeAlgorithm) String() string { + return string(e) +} + +// GeometryType represents a geospatial geometry type with an optional CRS parameter. +// Geometry uses Cartesian calculations and edge-interpolation is always linear/planar. +type GeometryType struct { + crs string // default "OGC:CRS84" +} + +const defaultCRS = "OGC:CRS84" + +// GeometryTypeCRS84 returns a GeometryType with the default CRS (OGC:CRS84). +func GeometryTypeCRS84() GeometryType { + return GeometryType{} +} + +// GeometryTypeOf returns a GeometryType with the specified CRS. +// If crs is empty or equals "OGC:CRS84", it uses the default. +func GeometryTypeOf(crs string) GeometryType { + if crs == "" || strings.EqualFold(crs, defaultCRS) { + return GeometryType{} + } + return GeometryType{crs: crs} +} + +func (g GeometryType) Equals(other Type) bool { + rhs, ok := other.(GeometryType) + if !ok { + return false + } + return g.crs == rhs.crs +} + +func (GeometryType) primitive() {} + +func (g GeometryType) Type() string { + if g.crs == "" { + return "geometry" + } + return fmt.Sprintf("geometry(%s)", g.crs) +} + +func (g GeometryType) String() string { + return g.Type() +} + +// CRS returns the coordinate reference system. Defaults to "OGC:CRS84" if not set. +func (g GeometryType) CRS() string { + if g.crs == "" { + return defaultCRS + } + return g.crs +} + +func (g GeometryType) MarshalJSON() ([]byte, error) { + return []byte(`"` + g.Type() + `"`), nil +} + +func (g *GeometryType) UnmarshalJSON(b []byte) error { + var typename string + if err := json.Unmarshal(b, &typename); err != nil { + return err + } + + if strings.HasPrefix(strings.ToLower(typename), "geometry") { + matches := geometryRegex.FindStringSubmatch(typename) + if len(matches) < 2 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + crs := strings.TrimSpace(matches[1]) + *g = GeometryTypeOf(crs) + return nil + } + + return fmt.Errorf("%w: expected geometry type, got %s", ErrInvalidTypeString, typename) +} + +// GeographyType represents a geospatial geography type with optional CRS and edge-interpolation algorithm. +// Geography uses geodesic calculations on Earth's surface. +type GeographyType struct { + crs string // empty means default "OGC:CRS84" + algorithm EdgeAlgorithm // empty means default "spherical" +} + +// GeographyTypeCRS84 returns a GeographyType with default CRS and algorithm. +func GeographyTypeCRS84() GeographyType { + return GeographyType{} +} + +// GeographyTypeOf returns a GeographyType with the specified CRS and algorithm. +// If crs is empty or equals "OGC:CRS84", it uses the default. +// If algorithm is empty, it defaults to "spherical". +func GeographyTypeOf(crs string, algorithm EdgeAlgorithm) GeographyType { + if crs == "" || strings.EqualFold(crs, defaultCRS) { + crs = "" + } + if algorithm == "" { + algorithm = EdgeAlgorithmSpherical + } + return GeographyType{crs: crs, algorithm: algorithm} +} + +func (g GeographyType) Equals(other Type) bool { + rhs, ok := other.(GeographyType) + if !ok { + return false + } + // Normalize empty algorithm to default for comparison + gAlg := g.algorithm + if gAlg == "" { + gAlg = EdgeAlgorithmSpherical + } + rhsAlg := rhs.algorithm + if rhsAlg == "" { + rhsAlg = EdgeAlgorithmSpherical + } + return g.crs == rhs.crs && gAlg == rhsAlg +} + +func (GeographyType) primitive() {} + +func (g GeographyType) Type() string { + if g.algorithm != "" && g.algorithm != EdgeAlgorithmSpherical { + crs := g.CRS() + return fmt.Sprintf("geography(%s, %s)", crs, g.algorithm) + } else if g.crs != "" { + return fmt.Sprintf("geography(%s)", g.crs) + } + return "geography" +} + +func (g GeographyType) String() string { + return g.Type() +} + +// CRS returns the coordinate reference system. Defaults to "OGC:CRS84" if not set. +func (g GeographyType) CRS() string { + if g.crs == "" { + return defaultCRS + } + return g.crs +} + +// Algorithm returns the edge-interpolation algorithm. Defaults to "spherical" if not set. +func (g GeographyType) Algorithm() EdgeAlgorithm { + if g.algorithm == "" { + return EdgeAlgorithmSpherical + } + return g.algorithm +} + +func (g GeographyType) MarshalJSON() ([]byte, error) { + return []byte(`"` + g.Type() + `"`), nil +} + +func (g *GeographyType) UnmarshalJSON(b []byte) error { + var typename string + if err := json.Unmarshal(b, &typename); err != nil { + return err + } + + if strings.HasPrefix(strings.ToLower(typename), "geography") { + matches := geographyRegex.FindStringSubmatch(typename) + if len(matches) < 2 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + crs := strings.TrimSpace(matches[1]) + algorithmStr := strings.TrimSpace(matches[2]) + var algorithm EdgeAlgorithm + if algorithmStr != "" { + var err error + algorithm, err = EdgeAlgorithmFromName(algorithmStr) + if err != nil { + return err + } + } + // Don't use GeographyTypeOf here to preserve empty algorithm for defaults + if crs == "" || strings.EqualFold(crs, defaultCRS) { + crs = "" + } + *g = GeographyType{crs: crs, algorithm: algorithm} + return nil + } + + return fmt.Errorf("%w: expected geography type, got %s", ErrInvalidTypeString, typename) +} + var PrimitiveTypes = struct { Bool PrimitiveType Int32 PrimitiveType @@ -759,6 +1007,8 @@ var PrimitiveTypes = struct { Binary PrimitiveType UUID PrimitiveType Unknown PrimitiveType + Geometry PrimitiveType + Geography PrimitiveType }{ Bool: BooleanType{}, Int32: Int32Type{}, @@ -775,6 +1025,8 @@ var PrimitiveTypes = struct { Binary: BinaryType{}, UUID: UUIDType{}, Unknown: UnknownType{}, + Geometry: GeometryTypeCRS84(), + Geography: GeographyTypeCRS84(), } // PromoteType promotes the type being read from a file to a requested read type. @@ -798,6 +1050,12 @@ func PromoteType(fileType, readType Type) (Type, error) { if _, ok := readType.(StringType); ok { return readType, nil } + if _, ok := readType.(GeometryType); ok { + return readType, nil + } + if _, ok := readType.(GeographyType); ok { + return readType, nil + } case DecimalType: if rt, ok := readType.(DecimalType); ok { if t.precision <= rt.precision && t.scale <= rt.scale { diff --git a/types_test.go b/types_test.go index 8649c259c..40215ef81 100644 --- a/types_test.go +++ b/types_test.go @@ -47,6 +47,8 @@ func TestTypesBasic(t *testing.T) { {"unknown", iceberg.PrimitiveTypes.Unknown}, {"fixed[5]", iceberg.FixedTypeOf(5)}, {"decimal(9, 4)", iceberg.DecimalTypeOf(9, 4)}, + {"geometry", iceberg.PrimitiveTypes.Geometry}, + {"geography", iceberg.PrimitiveTypes.Geography}, } for _, tt := range tests { @@ -223,6 +225,10 @@ func TestTypeStrings(t *testing.T) { {iceberg.PrimitiveTypes.UUID, "uuid"}, {iceberg.PrimitiveTypes.Binary, "binary"}, {iceberg.PrimitiveTypes.Unknown, "unknown"}, + {iceberg.PrimitiveTypes.Geometry, "geometry"}, + {iceberg.PrimitiveTypes.Geography, "geography"}, + {iceberg.GeometryTypeOf("srid:3857"), "geometry(srid:3857)"}, + {iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmKarney), "geography(srid:4326, karney)"}, {iceberg.FixedTypeOf(22), "fixed[22]"}, {iceberg.DecimalTypeOf(19, 25), "decimal(19, 25)"}, {&iceberg.StructType{ @@ -392,4 +398,155 @@ func TestUnknownTypeEquality(t *testing.T) { assert.True(t, unknown2.Equals(unknown1)) assert.Equal(t, "unknown", unknown1.String()) assert.Equal(t, "unknown", unknown2.String()) + // Test default CRS + geomDefault := iceberg.GeometryTypeCRS84() + assert.Equal(t, "geometry", geomDefault.String()) + assert.Equal(t, "OGC:CRS84", geomDefault.CRS()) + assert.True(t, geomDefault.Equals(iceberg.PrimitiveTypes.Geometry)) + + // Test custom CRS + geomCustom := iceberg.GeometryTypeOf("srid:3857") + assert.Equal(t, "geometry(srid:3857)", geomCustom.String()) + assert.Equal(t, "srid:3857", geomCustom.CRS()) + assert.True(t, geomCustom.Equals(iceberg.GeometryTypeOf("srid:3857"))) + assert.False(t, geomCustom.Equals(geomDefault)) + + // Test with default CRS explicitly + geomDefault2 := iceberg.GeometryTypeOf("OGC:CRS84") + assert.True(t, geomDefault2.Equals(geomDefault)) + assert.Equal(t, "geometry", geomDefault2.String()) + + // Test JSON serialization + data, err := json.Marshal(geomDefault) + require.NoError(t, err) + assert.Equal(t, `"geometry"`, string(data)) + + data2, err := json.Marshal(geomCustom) + require.NoError(t, err) + assert.Equal(t, `"geometry(srid:3857)"`, string(data2)) + + // Test JSON deserialization + var geomFromJSON iceberg.GeometryType + err = json.Unmarshal([]byte(`"geometry"`), &geomFromJSON) + require.NoError(t, err) + assert.True(t, geomFromJSON.Equals(geomDefault)) + + err = json.Unmarshal([]byte(`"geometry(srid:4326)"`), &geomFromJSON) + require.NoError(t, err) + assert.Equal(t, "srid:4326", geomFromJSON.CRS()) +} + +func TestGeographyType(t *testing.T) { + // Test default CRS and algorithm + geogDefault := iceberg.GeographyTypeCRS84() + assert.Equal(t, "geography", geogDefault.String()) + assert.Equal(t, "OGC:CRS84", geogDefault.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmSpherical, geogDefault.Algorithm()) + assert.True(t, geogDefault.Equals(iceberg.PrimitiveTypes.Geography)) + + // Test custom CRS only + geogCRS := iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmSpherical) + assert.Equal(t, "geography(srid:4326)", geogCRS.String()) + assert.Equal(t, "srid:4326", geogCRS.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmSpherical, geogCRS.Algorithm()) + + // Test custom CRS and algorithm + geogCustom := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney) + assert.Equal(t, "geography(srid:4269, karney)", geogCustom.String()) + assert.Equal(t, "srid:4269", geogCustom.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmKarney, geogCustom.Algorithm()) + assert.True(t, geogCustom.Equals(iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney))) + assert.False(t, geogCustom.Equals(geogDefault)) + + // Test JSON serialization + data, err := json.Marshal(geogDefault) + require.NoError(t, err) + assert.Equal(t, `"geography"`, string(data)) + + data2, err := json.Marshal(geogCustom) + require.NoError(t, err) + assert.Equal(t, `"geography(srid:4269, karney)"`, string(data2)) + + // Test JSON deserialization + var geogFromJSON iceberg.GeographyType + err = json.Unmarshal([]byte(`"geography"`), &geogFromJSON) + require.NoError(t, err) + assert.True(t, geogFromJSON.Equals(geogDefault)) + + err = json.Unmarshal([]byte(`"geography(srid:4326, vincenty)"`), &geogFromJSON) + require.NoError(t, err) + assert.Equal(t, "srid:4326", geogFromJSON.CRS()) + assert.Equal(t, iceberg.EdgeAlgorithmVincenty, geogFromJSON.Algorithm()) +} + +func TestEdgeAlgorithm(t *testing.T) { + tests := []struct { + name string + input string + expected iceberg.EdgeAlgorithm + wantErr bool + }{ + {"spherical", "spherical", iceberg.EdgeAlgorithmSpherical, false}, + {"SPHERICAL", "SPHERICAL", iceberg.EdgeAlgorithmSpherical, false}, + {"Spherical", "Spherical", iceberg.EdgeAlgorithmSpherical, false}, + {"vincenty", "vincenty", iceberg.EdgeAlgorithmVincenty, false}, + {"thomas", "thomas", iceberg.EdgeAlgorithmThomas, false}, + {"andoyer", "andoyer", iceberg.EdgeAlgorithmAndoyer, false}, + {"karney", "karney", iceberg.EdgeAlgorithmKarney, false}, + {"empty", "", iceberg.EdgeAlgorithmSpherical, false}, + {"invalid", "invalid", iceberg.EdgeAlgorithmSpherical, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := iceberg.EdgeAlgorithmFromName(tt.input) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + }) + } +} + +func TestPromoteTypeGeometryGeography(t *testing.T) { + tests := []struct { + name string + fileType iceberg.Type + readType iceberg.Type + wantErr bool + }{ + // BinaryType -> GeometryType should succeed + {"BinaryToGeometry", iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.Geometry, false}, + {"BinaryToGeometryWithCRS", iceberg.PrimitiveTypes.Binary, iceberg.GeometryTypeOf("srid:3857"), false}, + // BinaryType -> GeographyType should succeed + {"BinaryToGeography", iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.Geography, false}, + {"BinaryToGeographyWithCRS", iceberg.PrimitiveTypes.Binary, iceberg.GeographyTypeOf("srid:4326", iceberg.EdgeAlgorithmKarney), false}, + // GeometryType -> BinaryType should fail (one-way only) + {"GeometryToBinary", iceberg.PrimitiveTypes.Geometry, iceberg.PrimitiveTypes.Binary, true}, + // GeographyType -> BinaryType should fail (one-way only) + {"GeographyToBinary", iceberg.PrimitiveTypes.Geography, iceberg.PrimitiveTypes.Binary, true}, + // GeometryType -> GeographyType should fail (not compatible) + {"GeometryToGeography", iceberg.PrimitiveTypes.Geometry, iceberg.PrimitiveTypes.Geography, true}, + // GeographyType -> GeometryType should fail (not compatible) + {"GeographyToGeometry", iceberg.PrimitiveTypes.Geography, iceberg.PrimitiveTypes.Geometry, true}, + // Same types should succeed + {"GeometryToGeometry", iceberg.PrimitiveTypes.Geometry, iceberg.PrimitiveTypes.Geometry, false}, + {"GeographyToGeography", iceberg.PrimitiveTypes.Geography, iceberg.PrimitiveTypes.Geography, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := iceberg.PromoteType(tt.fileType, tt.readType) + if tt.wantErr { + assert.Error(t, err, "expected error promoting %s to %s", tt.fileType, tt.readType) + assert.Nil(t, result) + } else { + assert.NoError(t, err, "unexpected error promoting %s to %s", tt.fileType, tt.readType) + assert.NotNil(t, result) + assert.True(t, result.Equals(tt.readType), "promoted type should equal read type") + } + }) + } } diff --git a/visitors.go b/visitors.go index 3debc2eab..64a5b7af9 100644 --- a/visitors.go +++ b/visitors.go @@ -324,6 +324,8 @@ func doCmp(st structLike, term BoundTerm, lit Literal) int { return typedCmp[uuid.UUID](st, term, lit) case DecimalType: return typedCmp[Decimal](st, term, lit) + case GeometryType, GeographyType: + return typedCmp[[]byte](st, term, lit) } panic(ErrType) } @@ -353,6 +355,12 @@ func (e *exprEvaluator) VisitLessEqual(term BoundTerm, lit Literal) bool { } func (e *exprEvaluator) VisitStartsWith(term BoundTerm, lit Literal) bool { + // Geometry and Geography types don't support startsWith operation + switch term.Type().(type) { + case GeometryType, GeographyType: + return false + } + var value, prefix string switch lit.(type) {