From 2d2e93b56120b303c3a8a1b3530e52525ab3f027 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Sat, 20 Dec 2025 20:42:45 +0530 Subject: [PATCH] feat: add IS NULL/NOT NULL predicate support for complex types --- crates/iceberg/src/expr/accessor.rs | 210 +++++++++++++++++++++++++- crates/iceberg/src/spec/schema/mod.rs | 24 ++- 2 files changed, 222 insertions(+), 12 deletions(-) diff --git a/crates/iceberg/src/expr/accessor.rs b/crates/iceberg/src/expr/accessor.rs index c4623def99..bbf59f12d0 100644 --- a/crates/iceberg/src/expr/accessor.rs +++ b/crates/iceberg/src/expr/accessor.rs @@ -19,13 +19,54 @@ use std::sync::Arc; use serde_derive::{Deserialize, Serialize}; -use crate::spec::{Datum, Literal, PrimitiveType, Struct}; +use crate::spec::{Datum, Literal, PrimitiveType, Struct, Type}; use crate::{Error, ErrorKind, Result}; +/// The type of field that an accessor points to. +/// Complex types (Struct, List, Map) can only be used for null checks, +/// while Primitive types can be used for value extraction. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub enum AccessorType { + /// Primitive type - supports value extraction and null checks + Primitive(PrimitiveType), + /// Struct type - only supports null checks + Struct, + /// List type - only supports null checks + List, + /// Map type - only supports null checks + Map, +} + +impl AccessorType { + /// Returns the primitive type if this is a primitive accessor, otherwise None + pub fn as_primitive(&self) -> Option<&PrimitiveType> { + match self { + AccessorType::Primitive(p) => Some(p), + _ => None, + } + } + + /// Returns true if this accessor type is complex (non-primitive) + pub fn is_complex(&self) -> bool { + !matches!(self, AccessorType::Primitive(_)) + } +} + +impl From<&Type> for AccessorType { + fn from(ty: &Type) -> Self { + match ty { + Type::Primitive(p) => AccessorType::Primitive(p.clone()), + Type::Struct(_) => AccessorType::Struct, + Type::List(_) => AccessorType::List, + Type::Map(_) => AccessorType::Map, + } + } +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct StructAccessor { position: usize, - r#type: PrimitiveType, + accessor_type: AccessorType, inner: Option>, } @@ -35,7 +76,17 @@ impl StructAccessor { pub(crate) fn new(position: usize, r#type: PrimitiveType) -> Self { StructAccessor { position, - r#type, + accessor_type: AccessorType::Primitive(r#type), + inner: None, + } + } + + /// Create a new accessor for a complex type (struct, list, or map). + /// Complex type accessors can only be used for null checks. + pub(crate) fn new_complex(position: usize, ty: &Type) -> Self { + StructAccessor { + position, + accessor_type: AccessorType::from(ty), inner: None, } } @@ -43,7 +94,7 @@ impl StructAccessor { pub(crate) fn wrap(position: usize, inner: Box) -> Self { StructAccessor { position, - r#type: inner.r#type().clone(), + accessor_type: inner.accessor_type().clone(), inner: Some(inner), } } @@ -52,8 +103,40 @@ impl StructAccessor { self.position } + /// Returns the accessor type (primitive or complex) + pub(crate) fn accessor_type(&self) -> &AccessorType { + &self.accessor_type + } + + /// Returns the primitive type if this is a primitive accessor. + /// For backward compatibility with code that expects a primitive type. pub(crate) fn r#type(&self) -> &PrimitiveType { - &self.r#type + match &self.accessor_type { + AccessorType::Primitive(p) => p, + // This should only be called for primitive accessors + // Return a placeholder for complex types to avoid breaking existing code + _ => &PrimitiveType::Boolean, // Placeholder, should not be used + } + } + + /// Check if the value at this accessor's position is null. + /// This works for both primitive and complex types. + pub(crate) fn is_null(&self, container: &Struct) -> Result { + match &self.inner { + None => Ok(container[self.position].is_none()), + Some(inner) => { + if let Some(Literal::Struct(wrapped)) = &container[self.position] { + inner.is_null(wrapped) + } else if container[self.position].is_none() { + Ok(true) + } else { + Err(Error::new( + ErrorKind::Unexpected, + "Nested accessor should only be wrapping a Struct", + )) + } + } + } } pub(crate) fn get<'a>(&'a self, container: &'a Struct) -> Result> { @@ -61,7 +144,14 @@ impl StructAccessor { None => match &container[self.position] { None => Ok(None), Some(Literal::Primitive(literal)) => { - Ok(Some(Datum::new(self.r#type().clone(), literal.clone()))) + if let AccessorType::Primitive(prim_type) = &self.accessor_type { + Ok(Some(Datum::new(prim_type.clone(), literal.clone()))) + } else { + Err(Error::new( + ErrorKind::Unexpected, + "Cannot extract Datum from complex type accessor", + )) + } } Some(_) => Err(Error::new( ErrorKind::Unexpected, @@ -84,8 +174,11 @@ impl StructAccessor { #[cfg(test)] mod tests { - use crate::expr::accessor::StructAccessor; - use crate::spec::{Datum, Literal, PrimitiveType, Struct}; + use std::sync::Arc; + + use crate::expr::accessor::{AccessorType, StructAccessor}; + use crate::spec::datatypes::{ListType, MapType, NestedField, StructType}; + use crate::spec::{Datum, Literal, PrimitiveType, Struct, Type}; #[test] fn test_single_level_accessor() { @@ -150,4 +243,105 @@ mod tests { assert_eq!(accessor.get(&test_struct).unwrap(), None); } + + #[test] + fn test_complex_type_accessor_struct() { + let struct_type = Type::Struct(StructType::new(vec![Arc::new(NestedField::required( + 1, + "inner", + Type::Primitive(PrimitiveType::String), + ))])); + let accessor = StructAccessor::new_complex(0, &struct_type); + + assert!(accessor.accessor_type().is_complex()); + assert!(matches!(accessor.accessor_type(), AccessorType::Struct)); + + // Test null check on non-null struct + let inner_struct = Struct::from_iter(vec![Some(Literal::string("test".to_string()))]); + let test_struct = Struct::from_iter(vec![Some(Literal::Struct(inner_struct))]); + assert!(!accessor.is_null(&test_struct).unwrap()); + + // Test null check on null struct + let null_struct = Struct::from_iter(vec![None]); + assert!(accessor.is_null(&null_struct).unwrap()); + } + + #[test] + fn test_complex_type_accessor_list() { + let list_type = Type::List(ListType::new(Arc::new(NestedField::list_element( + 1, + Type::Primitive(PrimitiveType::Int), + true, + )))); + let accessor = StructAccessor::new_complex(1, &list_type); + + assert!(accessor.accessor_type().is_complex()); + assert!(matches!(accessor.accessor_type(), AccessorType::List)); + + // Test null check on non-null list + let test_struct = Struct::from_iter(vec![ + Some(Literal::bool(false)), + Some(Literal::List(vec![Some(Literal::int(1)), Some(Literal::int(2))])), + ]); + assert!(!accessor.is_null(&test_struct).unwrap()); + + // Test null check on null list + let null_struct = Struct::from_iter(vec![Some(Literal::bool(false)), None]); + assert!(accessor.is_null(&null_struct).unwrap()); + } + + #[test] + fn test_complex_type_accessor_map() { + let map_type = Type::Map(MapType::new( + Arc::new(NestedField::map_key_element( + 1, + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::map_value_element( + 2, + Type::Primitive(PrimitiveType::Int), + true, + )), + )); + let accessor = StructAccessor::new_complex(0, &map_type); + + assert!(accessor.accessor_type().is_complex()); + assert!(matches!(accessor.accessor_type(), AccessorType::Map)); + + // Test null check on null map + let null_struct = Struct::from_iter(vec![None]); + assert!(accessor.is_null(&null_struct).unwrap()); + } + + #[test] + fn test_primitive_is_null() { + let accessor = StructAccessor::new(0, PrimitiveType::Int); + + // Test null check on non-null primitive + let test_struct = Struct::from_iter(vec![Some(Literal::int(42))]); + assert!(!accessor.is_null(&test_struct).unwrap()); + + // Test null check on null primitive + let null_struct = Struct::from_iter(vec![None]); + assert!(accessor.is_null(&null_struct).unwrap()); + } + + #[test] + fn test_accessor_type_as_primitive() { + let primitive = AccessorType::Primitive(PrimitiveType::Int); + assert_eq!(primitive.as_primitive(), Some(&PrimitiveType::Int)); + assert!(!primitive.is_complex()); + + let struct_type = AccessorType::Struct; + assert_eq!(struct_type.as_primitive(), None); + assert!(struct_type.is_complex()); + + let list_type = AccessorType::List; + assert_eq!(list_type.as_primitive(), None); + assert!(list_type.is_complex()); + + let map_type = AccessorType::Map; + assert_eq!(map_type.as_primitive(), None); + assert!(map_type.is_complex()); + } } diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 7080b6e700..22a10a841c 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -196,14 +196,22 @@ impl SchemaBuilder { } Type::Struct(nested) => { + // add an accessor for the struct itself (for null checks) + let struct_accessor = + Arc::new(StructAccessor::new_complex(pos, field.field_type.as_ref())); + map.insert(field.id, struct_accessor); + // add accessors for nested fields for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) { let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor)); map.insert(field_id, new_accessor.clone()); } } - _ => { - // Accessors don't get built for Map or List types + Type::List(_) | Type::Map(_) => { + // add an accessor for complex types (for null checks) + let accessor = + Arc::new(StructAccessor::new_complex(pos, field.field_type.as_ref())); + map.insert(field.id, accessor); } } } @@ -220,6 +228,11 @@ impl SchemaBuilder { results.push((field.id, accessor)); } Type::Struct(nested) => { + // add an accessor for the struct itself (for null checks) + let struct_accessor = + Box::new(StructAccessor::new_complex(pos, field.field_type.as_ref())); + results.push((field.id, struct_accessor)); + let nested_accessors = Self::build_accessors_nested(nested.fields()); let wrapped_nested_accessors = @@ -230,8 +243,11 @@ impl SchemaBuilder { results.extend(wrapped_nested_accessors); } - _ => { - // Accessors don't get built for Map or List types + Type::List(_) | Type::Map(_) => { + // add an accessor for complex types (for null checks) + let accessor = + Box::new(StructAccessor::new_complex(pos, field.field_type.as_ref())); + results.push((field.id, accessor)); } } }