(iceberg-rust) branch main updated: fix: restore no-op logic in constants_map for NULL identity-partitioned columns (#1922)

liurenjie1024 Thu, 11 Dec 2025 02:30:21 -0800

This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git



The following commit(s) were added to refs/heads/main by this push:
     new 58bdb9fd9 fix: restore no-op logic in constants_map for NULL 
identity-partitioned columns (#1922)
58bdb9fd9 is described below

commit 58bdb9fd94c9c5d847abffbdcb36e2273bb8bf14
Author: Matt Butrovich <[email protected]>
AuthorDate: Thu Dec 11 05:30:07 2025 -0500

    fix: restore no-op logic in constants_map for NULL identity-partitioned 
columns (#1922)
    
    ## Which issue does this PR close?
    
    See
    https://github.com/apache/iceberg-rust/pull/1824#discussion_r2584486989
    and
    https://github.com/apache/iceberg-rust/issues/1914#issuecomment-3634315005
    
    ## What changes are included in this PR?
    
    This restores the behavior in `record_batch_transformer.rs`'s
    `constants_map` function to pre-#1824 behavior where `NULL`s are not
    inserted into the constants map, and instead are just skipped. This
    allows the column projection rules for missing partition values to
    default to `NULL`.
    
    ## Are these changes tested?
    
    
    New test, and running the entire Iceberg Java suite via DataFusion Comet
    in https://github.com/apache/datafusion-comet/pull/2729.
---
 .../iceberg/src/arrow/record_batch_transformer.rs  | 81 +++++++++++++++++++---
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs 
b/crates/iceberg/src/arrow/record_batch_transformer.rs
index c4782464c..439358435 100644
--- a/crates/iceberg/src/arrow/record_batch_transformer.rs
+++ b/crates/iceberg/src/arrow/record_batch_transformer.rs
@@ -83,14 +83,10 @@ fn constants_map(
             // Handle both None (null) and Some(Literal::Primitive) cases
             match &partition_data[pos] {
                 None => {
-                    // TODO 
(https://github.com/apache/iceberg-rust/issues/1914): Add support for null 
datum values.
-                    return Err(Error::new(
-                        ErrorKind::Unexpected,
-                        format!(
-                            "Partition field {} has null value for identity 
transform",
-                            field.source_id
-                        ),
-                    ));
+                    // Skip null partition values - they will be resolved as 
null per Iceberg spec rule #4.
+                    // When a partition value is null, we don't add it to the 
constants map,
+                    // allowing downstream column resolution to handle it 
correctly.
+                    continue;
                 }
                 Some(Literal::Primitive(value)) => {
                     // Create a Datum from the primitive type and value
@@ -1610,4 +1606,73 @@ mod test {
         assert_eq!(get_string_value(result.column(4).as_ref(), 0), "");
         assert_eq!(get_string_value(result.column(4).as_ref(), 1), "");
     }
+
+    /// Test handling of null values in identity-partitioned columns.
+    ///
+    /// Reproduces TestPartitionValues.testNullPartitionValue() from 
iceberg-java, which
+    /// writes records where the partition column has null values. Before the 
fix in #1922,
+    /// this would error with "Partition field X has null value for identity 
transform".
+    #[test]
+    fn null_identity_partition_value() {
+        use crate::spec::{Struct, Transform};
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(0)
+                .with_fields(vec![
+                    NestedField::optional(1, "id", 
Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "data", 
Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let partition_spec = Arc::new(
+            crate::spec::PartitionSpec::builder(schema.clone())
+                .with_spec_id(0)
+                .add_partition_field("data", "data", Transform::Identity)
+                .unwrap()
+                .build()
+                .unwrap(),
+        );
+
+        // Partition has null value for the data column
+        let partition_data = Struct::from_iter(vec![None]);
+
+        let file_schema = Arc::new(ArrowSchema::new(vec![simple_field(
+            "id",
+            DataType::Int32,
+            true,
+            "1",
+        )]));
+
+        let projected_field_ids = [1, 2];
+
+        let mut transformer = RecordBatchTransformerBuilder::new(schema, 
&projected_field_ids)
+            .with_partition(partition_spec, partition_data)
+            .expect("Should handle null partition values")
+            .build();
+
+        let file_batch =
+            RecordBatch::try_new(file_schema, 
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))])
+                .unwrap();
+
+        let result = transformer.process_record_batch(file_batch).unwrap();
+
+        assert_eq!(result.num_columns(), 2);
+        assert_eq!(result.num_rows(), 3);
+
+        let id_col = result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id_col.values(), &[1, 2, 3]);
+
+        // Partition column with null value should produce nulls
+        let data_col = result.column(1);
+        assert!(data_col.is_null(0));
+        assert!(data_col.is_null(1));
+        assert!(data_col.is_null(2));
+    }
 }

(iceberg-rust) branch main updated: fix: restore no-op logic in constants_map for NULL identity-partitioned columns (#1922)

Reply via email to