This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 58bdb9fd9 fix: restore no-op logic in constants_map for NULL
identity-partitioned columns (#1922)
58bdb9fd9 is described below
commit 58bdb9fd94c9c5d847abffbdcb36e2273bb8bf14
Author: Matt Butrovich <[email protected]>
AuthorDate: Thu Dec 11 05:30:07 2025 -0500
fix: restore no-op logic in constants_map for NULL identity-partitioned
columns (#1922)
## Which issue does this PR close?
See
https://github.com/apache/iceberg-rust/pull/1824#discussion_r2584486989
and
https://github.com/apache/iceberg-rust/issues/1914#issuecomment-3634315005
## What changes are included in this PR?
This restores the behavior in `record_batch_transformer.rs`'s
`constants_map` function to pre-#1824 behavior where `NULL`s are not
inserted into the constants map, and instead are just skipped. This
allows the column projection rules for missing partition values to
default to `NULL`.
## Are these changes tested?
New test, and running the entire Iceberg Java suite via DataFusion Comet
in https://github.com/apache/datafusion-comet/pull/2729.
---
.../iceberg/src/arrow/record_batch_transformer.rs | 81 +++++++++++++++++++---
1 file changed, 73 insertions(+), 8 deletions(-)
diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs
b/crates/iceberg/src/arrow/record_batch_transformer.rs
index c4782464c..439358435 100644
--- a/crates/iceberg/src/arrow/record_batch_transformer.rs
+++ b/crates/iceberg/src/arrow/record_batch_transformer.rs
@@ -83,14 +83,10 @@ fn constants_map(
// Handle both None (null) and Some(Literal::Primitive) cases
match &partition_data[pos] {
None => {
- // TODO
(https://github.com/apache/iceberg-rust/issues/1914): Add support for null
datum values.
- return Err(Error::new(
- ErrorKind::Unexpected,
- format!(
- "Partition field {} has null value for identity
transform",
- field.source_id
- ),
- ));
+ // Skip null partition values - they will be resolved as
null per Iceberg spec rule #4.
+ // When a partition value is null, we don't add it to the
constants map,
+ // allowing downstream column resolution to handle it
correctly.
+ continue;
}
Some(Literal::Primitive(value)) => {
// Create a Datum from the primitive type and value
@@ -1610,4 +1606,73 @@ mod test {
assert_eq!(get_string_value(result.column(4).as_ref(), 0), "");
assert_eq!(get_string_value(result.column(4).as_ref(), 1), "");
}
+
+ /// Test handling of null values in identity-partitioned columns.
+ ///
+ /// Reproduces TestPartitionValues.testNullPartitionValue() from
iceberg-java, which
+ /// writes records where the partition column has null values. Before the
fix in #1922,
+ /// this would error with "Partition field X has null value for identity
transform".
+ #[test]
+ fn null_identity_partition_value() {
+ use crate::spec::{Struct, Transform};
+
+ let schema = Arc::new(
+ Schema::builder()
+ .with_schema_id(0)
+ .with_fields(vec![
+ NestedField::optional(1, "id",
Type::Primitive(PrimitiveType::Int)).into(),
+ NestedField::optional(2, "data",
Type::Primitive(PrimitiveType::String)).into(),
+ ])
+ .build()
+ .unwrap(),
+ );
+
+ let partition_spec = Arc::new(
+ crate::spec::PartitionSpec::builder(schema.clone())
+ .with_spec_id(0)
+ .add_partition_field("data", "data", Transform::Identity)
+ .unwrap()
+ .build()
+ .unwrap(),
+ );
+
+ // Partition has null value for the data column
+ let partition_data = Struct::from_iter(vec![None]);
+
+ let file_schema = Arc::new(ArrowSchema::new(vec![simple_field(
+ "id",
+ DataType::Int32,
+ true,
+ "1",
+ )]));
+
+ let projected_field_ids = [1, 2];
+
+ let mut transformer = RecordBatchTransformerBuilder::new(schema,
&projected_field_ids)
+ .with_partition(partition_spec, partition_data)
+ .expect("Should handle null partition values")
+ .build();
+
+ let file_batch =
+ RecordBatch::try_new(file_schema,
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))])
+ .unwrap();
+
+ let result = transformer.process_record_batch(file_batch).unwrap();
+
+ assert_eq!(result.num_columns(), 2);
+ assert_eq!(result.num_rows(), 3);
+
+ let id_col = result
+ .column(0)
+ .as_any()
+ .downcast_ref::<Int32Array>()
+ .unwrap();
+ assert_eq!(id_col.values(), &[1, 2, 3]);
+
+ // Partition column with null value should produce nulls
+ let data_col = result.column(1);
+ assert!(data_col.is_null(0));
+ assert!(data_col.is_null(1));
+ assert!(data_col.is_null(2));
+ }
}