This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 7eae5248 Fix hour transform (#1146)
7eae5248 is described below
commit 7eae52489037f7061fea7a525dcdb1e11c2b1d70
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Mar 31 04:26:08 2025 +0200
Fix hour transform (#1146)
## Which issue does this PR close?
Seems like we're rounding towards zero by default by Rust. While we want
to `floor` the value.
Checked against the reference implementation:

---
bindings/python/tests/test_transform.py | 2 +-
crates/iceberg/src/transform/temporal.rs | 81 ++++++++++++++------------------
2 files changed, 35 insertions(+), 48 deletions(-)
diff --git a/bindings/python/tests/test_transform.py
b/bindings/python/tests/test_transform.py
index 22397754..6156f0d6 100644
--- a/bindings/python/tests/test_transform.py
+++ b/bindings/python/tests/test_transform.py
@@ -98,7 +98,7 @@ def test_hour_transform():
]
)
result = transform.hour(arr)
- expected = pa.array([19, 264420, -17072905], type=pa.int32())
+ expected = pa.array([19, 264420, -17072906], type=pa.int32())
assert result == expected
diff --git a/crates/iceberg/src/transform/temporal.rs
b/crates/iceberg/src/transform/temporal.rs
index c88e5438..51be7c70 100644
--- a/crates/iceberg/src/transform/temporal.rs
+++ b/crates/iceberg/src/transform/temporal.rs
@@ -337,12 +337,12 @@ pub struct Hour;
impl Hour {
#[inline]
fn hour_timestamp_micro(v: i64) -> i32 {
- (v / MICROSECONDS_PER_HOUR) as i32
+ v.div_euclid(MICROSECONDS_PER_HOUR) as i32
}
#[inline]
fn hour_timestamp_nano(v: i64) -> i32 {
- (v / NANOSECONDS_PER_HOUR) as i32
+ v.div_euclid(NANOSECONDS_PER_HOUR) as i32
}
}
@@ -2390,18 +2390,8 @@ mod test {
transform: &BoxedTransformFunction,
expect: Datum,
) {
- let timestamp = Datum::timestamp_micros(
- NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- );
- let timestamp_tz = Datum::timestamptz_micros(
- NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- );
+ let timestamp = Datum::timestamp_from_str(time).unwrap();
+ let timestamp_tz = Datum::timestamptz_from_str(time.to_owned() + "
+00:00").unwrap();
let res = transform.transform_literal(×tamp).unwrap().unwrap();
assert_eq!(res, expect);
let res = transform.transform_literal(×tamp_tz).unwrap().unwrap();
@@ -2432,20 +2422,8 @@ mod test {
transform: &BoxedTransformFunction,
expect: Datum,
) {
- let timestamp_ns = Datum::timestamp_nanos(
- NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_nanos_opt()
- .unwrap(),
- );
- let timestamptz_ns = Datum::timestamptz_nanos(
- NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_nanos_opt()
- .unwrap(),
- );
+ let timestamp_ns = Datum::timestamp_from_str(time).unwrap();
+ let timestamptz_ns = Datum::timestamptz_from_str(time.to_owned() + "
+00:00").unwrap();
let res = transform.transform_literal(×tamp_ns).unwrap().unwrap();
assert_eq!(res, expect);
let res = transform
@@ -2485,7 +2463,7 @@ mod test {
&year,
Datum::int(1970 - super::UNIX_EPOCH_YEAR),
);
- test_timestamp_and_tz_transform("1969-01-01 00:00:00.00", &year,
Datum::int(-1));
+ test_timestamp_and_tz_transform("1969-01-01T00:00:00.000000", &year,
Datum::int(-1));
// Test TimestampNanosecond
test_timestamp_ns_and_tz_transform_using_i64(
@@ -2493,7 +2471,7 @@ mod test {
&year,
Datum::int(1970 - super::UNIX_EPOCH_YEAR),
);
- test_timestamp_ns_and_tz_transform("1969-01-01 00:00:00.00", &year,
Datum::int(-1));
+ test_timestamp_ns_and_tz_transform("1969-01-01T00:00:00.000000",
&year, Datum::int(-1));
}
#[test]
@@ -2584,10 +2562,10 @@ mod test {
&month,
Datum::int((1970 - super::UNIX_EPOCH_YEAR) * 12),
);
- test_timestamp_and_tz_transform("1969-12-01 23:00:00.00", &month,
Datum::int(-1));
- test_timestamp_and_tz_transform("2017-12-01 00:00:00.00", &month,
Datum::int(575));
- test_timestamp_and_tz_transform("1970-01-01 00:00:00.00", &month,
Datum::int(0));
- test_timestamp_and_tz_transform("1969-12-31 00:00:00.00", &month,
Datum::int(-1));
+ test_timestamp_and_tz_transform("1969-12-01T23:00:00.000000", &month,
Datum::int(-1));
+ test_timestamp_and_tz_transform("2017-12-01T00:00:00.000000", &month,
Datum::int(575));
+ test_timestamp_and_tz_transform("1970-01-01T00:00:00.000000", &month,
Datum::int(0));
+ test_timestamp_and_tz_transform("1969-12-31T00:00:00.000000", &month,
Datum::int(-1));
// Test TimestampNanosecond
test_timestamp_ns_and_tz_transform_using_i64(
@@ -2595,10 +2573,10 @@ mod test {
&month,
Datum::int((1970 - super::UNIX_EPOCH_YEAR) * 12),
);
- test_timestamp_ns_and_tz_transform("1969-12-01 23:00:00.00", &month,
Datum::int(-1));
- test_timestamp_ns_and_tz_transform("2017-12-01 00:00:00.00", &month,
Datum::int(575));
- test_timestamp_ns_and_tz_transform("1970-01-01 00:00:00.00", &month,
Datum::int(0));
- test_timestamp_ns_and_tz_transform("1969-12-31 00:00:00.00", &month,
Datum::int(-1));
+ test_timestamp_ns_and_tz_transform("1969-12-01T23:00:00.000000",
&month, Datum::int(-1));
+ test_timestamp_ns_and_tz_transform("2017-12-01T00:00:00.000000",
&month, Datum::int(575));
+ test_timestamp_ns_and_tz_transform("1970-01-01T00:00:00.000000",
&month, Datum::int(0));
+ test_timestamp_ns_and_tz_transform("1969-12-31T00:00:00.000000",
&month, Datum::int(-1));
}
#[test]
@@ -2689,12 +2667,12 @@ mod test {
// Test TimestampMicrosecond
test_timestamp_and_tz_transform_using_i64(1512151975038194, &day,
Datum::date(17501));
test_timestamp_and_tz_transform_using_i64(-115200000000, &day,
Datum::date(-2));
- test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day,
Datum::date(17501));
+ test_timestamp_and_tz_transform("2017-12-01T10:30:42.123000", &day,
Datum::date(17501));
// Test TimestampNanosecond
test_timestamp_ns_and_tz_transform_using_i64(1512151975038194, &day,
Datum::date(17));
test_timestamp_ns_and_tz_transform_using_i64(-115200000000, &day,
Datum::date(-1));
- test_timestamp_ns_and_tz_transform("2017-12-01 10:30:42.123", &day,
Datum::date(17501));
+ test_timestamp_ns_and_tz_transform("2017-12-01T10:30:42.123000", &day,
Datum::date(17501));
}
#[test]
@@ -2760,14 +2738,23 @@ mod test {
fn test_transform_hours_literal() {
let hour = Box::new(super::Hour) as BoxedTransformFunction;
- // Test TimestampMicrosecond
- test_timestamp_and_tz_transform("2017-12-01 18:00:00.00", &hour,
Datum::int(420042));
- test_timestamp_and_tz_transform("1969-12-31 23:00:00.00", &hour,
Datum::int(-1));
- test_timestamp_and_tz_transform("0022-05-01 22:01:01.00", &hour,
Datum::int(-17072905));
+ test_timestamp_and_tz_transform("2017-12-01T18:00:00.000000", &hour,
Datum::int(420042));
+ test_timestamp_and_tz_transform("1970-01-01T22:01:01.000000", &hour,
Datum::int(22));
+ test_timestamp_and_tz_transform("1969-12-31T23:00:00.000000", &hour,
Datum::int(-1));
+ test_timestamp_and_tz_transform("1969-12-31T22:01:01.000000", &hour,
Datum::int(-2));
+ test_timestamp_and_tz_transform("0022-05-01T22:01:01.000000", &hour,
Datum::int(-17072906));
// Test TimestampNanosecond
- test_timestamp_ns_and_tz_transform("2017-12-01 18:00:00.00", &hour,
Datum::int(420042));
- test_timestamp_ns_and_tz_transform("1969-12-31 23:00:00.00", &hour,
Datum::int(-1));
- test_timestamp_ns_and_tz_transform("1900-05-01 22:01:01.00", &hour,
Datum::int(-610705));
+ test_timestamp_ns_and_tz_transform(
+ "2017-12-01T18:00:00.0000000000",
+ &hour,
+ Datum::int(420042),
+ );
+ test_timestamp_ns_and_tz_transform("1969-12-31T23:00:00.0000000000",
&hour, Datum::int(-1));
+ test_timestamp_ns_and_tz_transform(
+ "1900-05-01T22:01:01.0000000000",
+ &hour,
+ Datum::int(-610706),
+ );
}
}