This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 043ca21c feat(reader): Date32 from days since epoch for
Literal:try_from_json (#1803)
043ca21c is described below
commit 043ca21cd425096a4b25b66090887d03699df696
Author: Matt Butrovich <[email protected]>
AuthorDate: Fri Oct 31 06:58:02 2025 -0400
feat(reader): Date32 from days since epoch for Literal:try_from_json (#1803)
---
crates/iceberg/src/spec/values.rs | 51 +++++++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/crates/iceberg/src/spec/values.rs
b/crates/iceberg/src/spec/values.rs
index d06e754d..4c763a39 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -1888,6 +1888,17 @@ impl Literal {
date::date_to_days(&NaiveDate::parse_from_str(&s,
"%Y-%m-%d")?),
))))
}
+ (PrimitiveType::Date, JsonValue::Number(number)) => {
+ Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(
+ number
+ .as_i64()
+ .ok_or(Error::new(
+ crate::ErrorKind::DataInvalid,
+ "Failed to convert json number to date (days
since epoch)",
+ ))?
+ .try_into()?,
+ ))))
+ }
(PrimitiveType::Time, JsonValue::String(s)) => {
Ok(Some(Literal::Primitive(PrimitiveLiteral::Long(
time::time_to_microseconds(&NaiveTime::parse_from_str(&s, "%H:%M:%S%.f")?),
@@ -3942,4 +3953,44 @@ mod tests {
assert_eq!(double_sorted, double_expected);
}
+
+ /// Test Date deserialization from JSON as number (days since epoch).
+ ///
+ /// This reproduces the scenario from Iceberg Java's TestAddFilesProcedure
where:
+ /// - Date partition columns have initial_default values in manifests
+ /// - These values are serialized as days since epoch (e.g., 18628 for
2021-01-01)
+ /// - The JSON schema includes: {"type":"date","initial-default":18628}
+ ///
+ /// Prior to this fix, Date values in JSON were only parsed from String
format ("2021-01-01"),
+ /// causing initial_default values to be lost during schema
deserialization.
+ ///
+ /// This test ensures both formats are supported:
+ /// - String format: "2021-01-01" (used in table metadata)
+ /// - Number format: 18628 (used in initial-default values from add_files)
+ ///
+ /// See: Iceberg Java
TestAddFilesProcedure.addDataPartitionedByDateToPartitioned()
+ #[test]
+ fn test_date_from_json_as_number() {
+ use serde_json::json;
+
+ // Test Date as number (days since epoch) - used in initial-default
from add_files
+ let date_number = json!(18628); // 2021-01-01 is 18628 days since
1970-01-01
+ let result =
+ Literal::try_from_json(date_number,
&Type::Primitive(PrimitiveType::Date)).unwrap();
+ assert_eq!(
+ result,
+ Some(Literal::Primitive(PrimitiveLiteral::Int(18628)))
+ );
+
+ // Test Date as string - traditional format
+ let date_string = json!("2021-01-01");
+ let result =
+ Literal::try_from_json(date_string,
&Type::Primitive(PrimitiveType::Date)).unwrap();
+ assert_eq!(
+ result,
+ Some(Literal::Primitive(PrimitiveLiteral::Int(18628)))
+ );
+
+ // Both formats should produce the same Literal value
+ }
}