This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new b36d1c6 feat(timestamp_ns): Implement timestamps with nanosecond
precision (#542)
b36d1c6 is described below
commit b36d1c606695aa06e0d359582eeb1a0080d90bf0
Author: Timothy Maloney <[email protected]>
AuthorDate: Wed Aug 21 06:11:18 2024 -0700
feat(timestamp_ns): Implement timestamps with nanosecond precision (#542)
* feat(timestamp_ns): first commit
* feat(timestamp_ns): Add mappings for timestamp_ns/timestamptz_ns
* feat(timestamp_ns): Remove unused dep
* feat(timestamp_ns): Fix unit test
* feat(timestamp_ns): Fix test_all_type_for_write()
* feat(timestamp_ns): fix test_transform_days_literal
* feat(timestamp_ns): fix math for timestamptz_nanos
* chore: formatting
* chore: formatting
* chore: Appease clippy
---------
Co-authored-by: Timothy Maloney <[email protected]>
---
crates/catalog/glue/src/schema.rs | 2 +
crates/catalog/hms/src/schema.rs | 2 +
crates/iceberg/src/arrow/schema.rs | 7 +
crates/iceberg/src/avro/schema.rs | 3 +
crates/iceberg/src/spec/datatypes.rs | 10 ++
crates/iceberg/src/spec/manifest.rs | 10 ++
crates/iceberg/src/spec/transform.rs | 11 +-
crates/iceberg/src/spec/values.rs | 71 +++++++-
crates/iceberg/src/transform/bucket.rs | 4 +-
crates/iceberg/src/transform/identity.rs | 4 +-
crates/iceberg/src/transform/temporal.rs | 190 ++++++++++++++++++++-
crates/iceberg/src/transform/truncate.rs | 4 +-
crates/iceberg/src/transform/void.rs | 4 +-
.../src/writer/file_writer/parquet_writer.rs | 61 +++++--
14 files changed, 353 insertions(+), 30 deletions(-)
diff --git a/crates/catalog/glue/src/schema.rs
b/crates/catalog/glue/src/schema.rs
index c349219..bb676e3 100644
--- a/crates/catalog/glue/src/schema.rs
+++ b/crates/catalog/glue/src/schema.rs
@@ -164,6 +164,8 @@ impl SchemaVisitor for GlueSchemaBuilder {
PrimitiveType::Double => "double".to_string(),
PrimitiveType::Date => "date".to_string(),
PrimitiveType::Timestamp => "timestamp".to_string(),
+ PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
+ PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid
=> {
"string".to_string()
}
diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs
index fa7819d..4012098 100644
--- a/crates/catalog/hms/src/schema.rs
+++ b/crates/catalog/hms/src/schema.rs
@@ -121,6 +121,8 @@ impl SchemaVisitor for HiveSchemaBuilder {
PrimitiveType::Double => "double".to_string(),
PrimitiveType::Date => "date".to_string(),
PrimitiveType::Timestamp => "timestamp".to_string(),
+ PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
+ PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid
=> {
"string".to_string()
}
diff --git a/crates/iceberg/src/arrow/schema.rs
b/crates/iceberg/src/arrow/schema.rs
index 43875bf..a412437 100644
--- a/crates/iceberg/src/arrow/schema.rs
+++ b/crates/iceberg/src/arrow/schema.rs
@@ -579,6 +579,13 @@ impl SchemaVisitor for ToArrowSchemaConverter {
// Timestampz always stored as UTC
DataType::Timestamp(TimeUnit::Microsecond,
Some("+00:00".into())),
)),
+ crate::spec::PrimitiveType::TimestampNs =>
Ok(ArrowSchemaOrFieldOrType::Type(
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ )),
+ crate::spec::PrimitiveType::TimestamptzNs =>
Ok(ArrowSchemaOrFieldOrType::Type(
+ // Store timestamptz_ns as UTC
+ DataType::Timestamp(TimeUnit::Nanosecond,
Some("+00:00".into())),
+ )),
crate::spec::PrimitiveType::String => {
Ok(ArrowSchemaOrFieldOrType::Type(DataType::Utf8))
}
diff --git a/crates/iceberg/src/avro/schema.rs
b/crates/iceberg/src/avro/schema.rs
index 7f81427..cfcf38d 100644
--- a/crates/iceberg/src/avro/schema.rs
+++ b/crates/iceberg/src/avro/schema.rs
@@ -229,6 +229,8 @@ impl SchemaVisitor for SchemaToAvroSchema {
PrimitiveType::Time => AvroSchema::TimeMicros,
PrimitiveType::Timestamp => AvroSchema::TimestampMicros,
PrimitiveType::Timestamptz => AvroSchema::TimestampMicros,
+ PrimitiveType::TimestampNs => AvroSchema::TimestampNanos,
+ PrimitiveType::TimestamptzNs => AvroSchema::TimestampNanos,
PrimitiveType::String => AvroSchema::String,
PrimitiveType::Uuid => avro_fixed_schema(UUID_BYTES,
Some(UUID_LOGICAL_TYPE))?,
PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize,
None)?,
@@ -519,6 +521,7 @@ impl AvroSchemaVisitor for AvroSchemaToSchema {
AvroSchema::Date => Type::Primitive(PrimitiveType::Date),
AvroSchema::TimeMicros => Type::Primitive(PrimitiveType::Time),
AvroSchema::TimestampMicros =>
Type::Primitive(PrimitiveType::Timestamp),
+ AvroSchema::TimestampNanos =>
Type::Primitive(PrimitiveType::TimestampNs),
AvroSchema::Boolean => Type::Primitive(PrimitiveType::Boolean),
AvroSchema::Int => Type::Primitive(PrimitiveType::Int),
AvroSchema::Long => Type::Primitive(PrimitiveType::Long),
diff --git a/crates/iceberg/src/spec/datatypes.rs
b/crates/iceberg/src/spec/datatypes.rs
index 833f17f..d382459 100644
--- a/crates/iceberg/src/spec/datatypes.rs
+++ b/crates/iceberg/src/spec/datatypes.rs
@@ -225,6 +225,10 @@ pub enum PrimitiveType {
Timestamp,
/// Timestamp in microsecond precision, with timezone
Timestamptz,
+ /// Timestamp in nanosecond precision, without timezone
+ TimestampNs,
+ /// Timestamp in nanosecond precision with timezone
+ TimestamptzNs,
/// Arbitrary-length character sequences encoded in utf-8
String,
/// Universally Unique Identifiers, should use 16-byte fixed
@@ -250,6 +254,8 @@ impl PrimitiveType {
| (PrimitiveType::Time, PrimitiveLiteral::Long(_))
| (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_))
| (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_))
+ | (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_))
+ | (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_))
| (PrimitiveType::String, PrimitiveLiteral::String(_))
| (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_))
| (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_))
@@ -360,6 +366,8 @@ impl fmt::Display for PrimitiveType {
PrimitiveType::Time => write!(f, "time"),
PrimitiveType::Timestamp => write!(f, "timestamp"),
PrimitiveType::Timestamptz => write!(f, "timestamptz"),
+ PrimitiveType::TimestampNs => write!(f, "timestamp_ns"),
+ PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"),
PrimitiveType::String => write!(f, "string"),
PrimitiveType::Uuid => write!(f, "uuid"),
PrimitiveType::Fixed(size) => write!(f, "fixed({})", size),
@@ -1152,6 +1160,8 @@ mod tests {
(PrimitiveType::Time, PrimitiveLiteral::Long(1)),
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)),
(PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)),
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)),
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)),
(
PrimitiveType::Uuid,
PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()),
diff --git a/crates/iceberg/src/spec/manifest.rs
b/crates/iceberg/src/spec/manifest.rs
index eb3d022..e1fe33c 100644
--- a/crates/iceberg/src/spec/manifest.rs
+++ b/crates/iceberg/src/spec/manifest.rs
@@ -1570,6 +1570,11 @@ mod tests {
"v_ts_ntz",
Type::Primitive(PrimitiveType::Timestamp),
)),
+ Arc::new(NestedField::optional(
+ 12,
+ "v_ts_ns_ntz",
+ Type::Primitive(PrimitiveType::TimestampNs
+ ))),
])
.build()
.unwrap(),
@@ -1678,6 +1683,11 @@ mod tests {
"v_ts_ntz",
Type::Primitive(PrimitiveType::Timestamp),
)),
+ Arc::new(NestedField::optional(
+ 12,
+ "v_ts_ns_ntz",
+ Type::Primitive(PrimitiveType::TimestampNs
+ )))
])
.build()
.unwrap(),
diff --git a/crates/iceberg/src/spec/transform.rs
b/crates/iceberg/src/spec/transform.rs
index 9148844..6b7d03f 100644
--- a/crates/iceberg/src/spec/transform.rs
+++ b/crates/iceberg/src/spec/transform.rs
@@ -159,6 +159,8 @@ impl Transform {
| PrimitiveType::Time
| PrimitiveType::Timestamp
| PrimitiveType::Timestamptz
+ | PrimitiveType::TimestampNs
+ | PrimitiveType::TimestamptzNs
| PrimitiveType::String
| PrimitiveType::Uuid
| PrimitiveType::Fixed(_)
@@ -200,6 +202,8 @@ impl Transform {
match p {
PrimitiveType::Timestamp
| PrimitiveType::Timestamptz
+ | PrimitiveType::TimestampNs
+ | PrimitiveType::TimestamptzNs
| PrimitiveType::Date =>
Ok(Type::Primitive(PrimitiveType::Date)),
_ => Err(Error::new(
ErrorKind::DataInvalid,
@@ -216,9 +220,10 @@ impl Transform {
Transform::Hour => {
if let Type::Primitive(p) = input_type {
match p {
- PrimitiveType::Timestamp | PrimitiveType::Timestamptz
=> {
- Ok(Type::Primitive(PrimitiveType::Int))
- }
+ PrimitiveType::Timestamp
+ | PrimitiveType::Timestamptz
+ | PrimitiveType::TimestampNs
+ | PrimitiveType::TimestamptzNs =>
Ok(Type::Primitive(PrimitiveType::Int)),
_ => Err(Error::new(
ErrorKind::DataInvalid,
format!("{input_type} is not a valid input type of
{self} transform",),
diff --git a/crates/iceberg/src/spec/values.rs
b/crates/iceberg/src/spec/values.rs
index 03fd1ec..3568d3d 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -38,6 +38,7 @@ use serde::ser::SerializeStruct;
use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf;
use serde_json::{Map as JsonMap, Number, Value as JsonValue};
+use timestamp::nanoseconds_to_datetime;
use uuid::Uuid;
use super::datatypes::{PrimitiveType, Type};
@@ -45,7 +46,7 @@ use crate::error::Result;
use crate::spec::values::date::{date_from_naive_date, days_to_date,
unix_epoch};
use crate::spec::values::time::microseconds_to_time;
use crate::spec::values::timestamp::microseconds_to_datetime;
-use crate::spec::values::timestamptz::microseconds_to_datetimetz;
+use crate::spec::values::timestamptz::{microseconds_to_datetimetz,
nanoseconds_to_datetimetz};
use crate::spec::MAX_DECIMAL_PRECISION;
use crate::{ensure_data_valid, Error, ErrorKind};
@@ -326,6 +327,12 @@ impl Display for Datum {
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => {
write!(f, "{}", microseconds_to_datetimetz(*val))
}
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => {
+ write!(f, "{}", nanoseconds_to_datetime(*val))
+ }
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
+ write!(f, "{}", nanoseconds_to_datetimetz(*val))
+ }
(_, PrimitiveLiteral::String(val)) => write!(f, r#""{}""#, val),
(PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => {
write!(f, "{}", Uuid::from_u128(*val))
@@ -401,6 +408,12 @@ impl Datum {
PrimitiveType::Timestamptz => {
PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
}
+ PrimitiveType::TimestampNs => {
+ PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
+ }
+ PrimitiveType::TimestamptzNs => {
+ PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
+ }
PrimitiveType::String => {
PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string())
}
@@ -734,6 +747,23 @@ impl Datum {
}
}
+ /// Creates a timestamp from unix epoch in nanoseconds.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::timestamp_nanos(1000);
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001");
+ /// ```
+ pub fn timestamp_nanos(value: i64) -> Self {
+ Self {
+ r#type: PrimitiveType::TimestampNs,
+ literal: PrimitiveLiteral::Long(value),
+ }
+ }
+
/// Creates a timestamp from [`DateTime`].
///
/// Example:
@@ -792,6 +822,23 @@ impl Datum {
}
}
+ /// Creates a timestamp with timezone from unix epoch in nanoseconds.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::timestamptz_nanos(1000);
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001 UTC");
+ /// ```
+ pub fn timestamptz_nanos(value: i64) -> Self {
+ Self {
+ r#type: PrimitiveType::TimestamptzNs,
+ literal: PrimitiveLiteral::Long(value),
+ }
+ }
+
/// Creates a timestamp with timezone from [`DateTime`].
/// Example:
///
@@ -1805,6 +1852,18 @@ impl Literal {
.format("%Y-%m-%dT%H:%M:%S%.f+00:00")
.to_string(),
)),
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) =>
Ok(JsonValue::String(
+ timestamp::nanoseconds_to_datetime(val)
+ .format("%Y-%m-%dT%H:%M:%S%.f")
+ .to_string(),
+ )),
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) =>
{
+ Ok(JsonValue::String(
+ timestamptz::nanoseconds_to_datetimetz(val)
+ .format("%Y-%m-%dT%H:%M:%S%.f+00:00")
+ .to_string(),
+ ))
+ }
(PrimitiveType::String, PrimitiveLiteral::String(val)) => {
Ok(JsonValue::String(val.clone()))
}
@@ -1958,6 +2017,10 @@ mod timestamp {
// This shouldn't fail until the year 262000
DateTime::from_timestamp_micros(micros).unwrap().naive_utc()
}
+
+ pub(crate) fn nanoseconds_to_datetime(nanos: i64) -> NaiveDateTime {
+ DateTime::from_timestamp_nanos(nanos).naive_utc()
+ }
}
mod timestamptz {
@@ -1972,6 +2035,12 @@ mod timestamptz {
DateTime::from_timestamp(secs, rem as u32 * 1_000).unwrap()
}
+
+ pub(crate) fn nanoseconds_to_datetimetz(nanos: i64) -> DateTime<Utc> {
+ let (secs, rem) = (nanos / 1_000_000_000, nanos % 1_000_000_000);
+
+ DateTime::from_timestamp(secs, rem as u32).unwrap()
+ }
}
mod _serde {
diff --git a/crates/iceberg/src/transform/bucket.rs
b/crates/iceberg/src/transform/bucket.rs
index 83cbbd8..ce39826 100644
--- a/crates/iceberg/src/transform/bucket.rs
+++ b/crates/iceberg/src/transform/bucket.rs
@@ -256,7 +256,7 @@ mod test {
use crate::expr::PredicateOperator;
use crate::spec::PrimitiveType::{
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time,
Timestamp,
- Timestamptz, Uuid,
+ TimestampNs, Timestamptz, TimestamptzNs, Uuid,
};
use crate::spec::Type::{Primitive, Struct};
use crate::spec::{Datum, NestedField, PrimitiveType, StructType,
Transform, Type};
@@ -297,6 +297,8 @@ mod test {
(Primitive(Time), Some(Primitive(Int))),
(Primitive(Timestamp), Some(Primitive(Int))),
(Primitive(Timestamptz), Some(Primitive(Int))),
+ (Primitive(TimestampNs), Some(Primitive(Int))),
+ (Primitive(TimestamptzNs), Some(Primitive(Int))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
diff --git a/crates/iceberg/src/transform/identity.rs
b/crates/iceberg/src/transform/identity.rs
index e23ccff..68e5a0b 100644
--- a/crates/iceberg/src/transform/identity.rs
+++ b/crates/iceberg/src/transform/identity.rs
@@ -38,7 +38,7 @@ impl TransformFunction for Identity {
mod test {
use crate::spec::PrimitiveType::{
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time,
Timestamp,
- Timestamptz, Uuid,
+ TimestampNs, Timestamptz, TimestamptzNs, Uuid,
};
use crate::spec::Type::{Primitive, Struct};
use crate::spec::{NestedField, StructType, Transform};
@@ -81,6 +81,8 @@ mod test {
(Primitive(Time), Some(Primitive(Time))),
(Primitive(Timestamp), Some(Primitive(Timestamp))),
(Primitive(Timestamptz), Some(Primitive(Timestamptz))),
+ (Primitive(TimestampNs), Some(Primitive(TimestampNs))),
+ (Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
diff --git a/crates/iceberg/src/transform/temporal.rs
b/crates/iceberg/src/transform/temporal.rs
index 72506a7..f326cfe 100644
--- a/crates/iceberg/src/transform/temporal.rs
+++ b/crates/iceberg/src/transform/temporal.rs
@@ -20,7 +20,9 @@ use std::sync::Arc;
use arrow_arith::arity::binary;
use arrow_arith::temporal::{date_part, DatePart};
use arrow_array::types::Date32Type;
-use arrow_array::{Array, ArrayRef, Date32Array, Int32Array,
TimestampMicrosecondArray};
+use arrow_array::{
+ Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray,
TimestampNanosecondArray,
+};
use arrow_schema::{DataType, TimeUnit};
use chrono::{DateTime, Datelike, Duration};
@@ -34,6 +36,8 @@ const HOUR_PER_SECOND: f64 = 1.0_f64 / 3600.0_f64;
const UNIX_EPOCH_YEAR: i32 = 1970;
/// One second in micros.
const MICROS_PER_SECOND: i64 = 1_000_000;
+/// One second in nanos.
+const NANOS_PER_SECOND: i64 = 1_000_000_000;
/// Extract a date or timestamp year, as years from 1970
#[derive(Debug)]
@@ -41,7 +45,7 @@ pub struct Year;
impl Year {
#[inline]
- fn timestamp_to_year(timestamp: i64) -> Result<i32> {
+ fn timestamp_to_year_micros(timestamp: i64) -> Result<i32> {
Ok(DateTime::from_timestamp_micros(timestamp)
.ok_or_else(|| {
Error::new(
@@ -52,6 +56,11 @@ impl Year {
.year()
- UNIX_EPOCH_YEAR)
}
+
+ #[inline]
+ fn timestamp_to_year_nanos(timestamp: i64) -> Result<i32> {
+ Ok(DateTime::from_timestamp_nanos(timestamp).year() - UNIX_EPOCH_YEAR)
+ }
}
impl TransformFunction for Year {
@@ -72,8 +81,18 @@ impl TransformFunction for Year {
(PrimitiveType::Date, PrimitiveLiteral::Int(v)) => {
Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR
}
- (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) =>
Self::timestamp_to_year(*v)?,
- (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) =>
Self::timestamp_to_year(*v)?,
+ (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_year_micros(*v)?
+ }
+ (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_year_micros(*v)?
+ }
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_year_nanos(*v)?
+ }
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_year_nanos(*v)?
+ }
_ => {
return Err(crate::Error::new(
crate::ErrorKind::FeatureUnsupported,
@@ -94,7 +113,7 @@ pub struct Month;
impl Month {
#[inline]
- fn timestamp_to_month(timestamp: i64) -> Result<i32> {
+ fn timestamp_to_month_micros(timestamp: i64) -> Result<i32> {
// date: aaaa-aa-aa
// unix epoch date: 1970-01-01
// if date > unix epoch date, delta month = (aa - 1) + 12 * (aaaa-1970)
@@ -114,6 +133,22 @@ impl Month {
Ok(-delta)
}
}
+
+ #[inline]
+ fn timestamp_to_month_nanos(timestamp: i64) -> Result<i32> {
+ // date: aaaa-aa-aa
+ // unix epoch date: 1970-01-01
+ // if date > unix epoch date, delta month = (aa - 1) + 12 * (aaaa-1970)
+ // if date < unix epoch date, delta month = (12 - (aa - 1)) + 12 *
(1970-aaaa-1)
+ let date = DateTime::from_timestamp_nanos(timestamp);
+ let unix_epoch_date = DateTime::from_timestamp_nanos(0);
+ if date > unix_epoch_date {
+ Ok((date.month0() as i32) + 12 * (date.year() - UNIX_EPOCH_YEAR))
+ } else {
+ let delta = (12 - date.month0() as i32) + 12 * (UNIX_EPOCH_YEAR -
date.year() - 1);
+ Ok(-delta)
+ }
+ }
}
impl TransformFunction for Month {
@@ -144,9 +179,17 @@ impl TransformFunction for Month {
(Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12
+ Date32Type::to_naive_date(*v).month0() as i32
}
- (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) =>
Self::timestamp_to_month(*v)?,
+ (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_month_micros(*v)?
+ }
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => {
- Self::timestamp_to_month(*v)?
+ Self::timestamp_to_month_micros(*v)?
+ }
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_month_nanos(*v)?
+ }
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => {
+ Self::timestamp_to_month_nanos(*v)?
}
_ => {
return Err(crate::Error::new(
@@ -196,6 +239,35 @@ impl Day {
Ok(days)
}
+
+ fn day_timestamp_nano(v: i64) -> Result<i32> {
+ let secs = v / NANOS_PER_SECOND;
+
+ let (nanos, offset) = if v >= 0 {
+ let nanos = (v.rem_euclid(NANOS_PER_SECOND)) as u32;
+ let offset = 0i64;
+ (nanos, offset)
+ } else {
+ let v = v + 1;
+ let nanos = (v.rem_euclid(NANOS_PER_SECOND)) as u32;
+ let offset = 1i64;
+ (nanos, offset)
+ };
+
+ let delta = Duration::new(secs, nanos).ok_or_else(|| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "Failed to create 'TimeDelta' from seconds {} and nanos
{}",
+ secs, nanos
+ ),
+ )
+ })?;
+
+ let days = (delta.num_days() - offset) as i32;
+
+ Ok(days)
+ }
}
impl TransformFunction for Day {
@@ -206,6 +278,11 @@ impl TransformFunction for Day {
.downcast_ref::<TimestampMicrosecondArray>()
.unwrap()
.try_unary(|v| -> Result<i32> { Self::day_timestamp_micro(v)
})?,
+ DataType::Timestamp(TimeUnit::Nanosecond, _) => input
+ .as_any()
+ .downcast_ref::<TimestampNanosecondArray>()
+ .unwrap()
+ .try_unary(|v| -> Result<i32> { Self::day_timestamp_nano(v)
})?,
DataType::Date32 => input
.as_any()
.downcast_ref::<Date32Array>()
@@ -231,6 +308,12 @@ impl TransformFunction for Day {
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => {
Self::day_timestamp_micro(*v)?
}
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => {
+ Self::day_timestamp_nano(*v)?
+ }
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => {
+ Self::day_timestamp_nano(*v)?
+ }
_ => {
return Err(crate::Error::new(
crate::ErrorKind::FeatureUnsupported,
@@ -254,6 +337,11 @@ impl Hour {
fn hour_timestamp_micro(v: i64) -> i32 {
(v as f64 / 1000.0 / 1000.0 * HOUR_PER_SECOND) as i32
}
+
+ #[inline]
+ fn hour_timestamp_nano(v: i64) -> i32 {
+ (v as f64 / 1_000_000.0 / 1000.0 * HOUR_PER_SECOND) as i32
+ }
}
impl TransformFunction for Hour {
@@ -283,6 +371,12 @@ impl TransformFunction for Hour {
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => {
Self::hour_timestamp_micro(*v)
}
+ (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => {
+ Self::hour_timestamp_nano(*v)
+ }
+ (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => {
+ Self::hour_timestamp_nano(*v)
+ }
_ => {
return Err(crate::Error::new(
crate::ErrorKind::FeatureUnsupported,
@@ -307,7 +401,7 @@ mod test {
use crate::expr::PredicateOperator;
use crate::spec::PrimitiveType::{
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time,
Timestamp,
- Timestamptz, Uuid,
+ TimestampNs, Timestamptz, TimestamptzNs, Uuid,
};
use crate::spec::Type::{Primitive, Struct};
use crate::spec::{Datum, NestedField, PrimitiveType, StructType,
Transform, Type};
@@ -350,6 +444,8 @@ mod test {
(Primitive(Time), None),
(Primitive(Timestamp), Some(Primitive(Date))),
(Primitive(Timestamptz), Some(Primitive(Date))),
+ (Primitive(TimestampNs), Some(Primitive(Date))),
+ (Primitive(TimestamptzNs), Some(Primitive(Date))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
@@ -400,6 +496,8 @@ mod test {
(Primitive(Time), None),
(Primitive(Timestamp), Some(Primitive(Date))),
(Primitive(Timestamptz), Some(Primitive(Date))),
+ (Primitive(TimestampNs), Some(Primitive(Date))),
+ (Primitive(TimestamptzNs), Some(Primitive(Date))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
@@ -450,6 +548,8 @@ mod test {
(Primitive(Time), None),
(Primitive(Timestamp), Some(Primitive(Date))),
(Primitive(Timestamptz), Some(Primitive(Date))),
+ (Primitive(TimestampNs), Some(Primitive(Date))),
+ (Primitive(TimestamptzNs), Some(Primitive(Date))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
@@ -500,6 +600,8 @@ mod test {
(Primitive(Time), None),
(Primitive(Timestamp), Some(Primitive(Int))),
(Primitive(Timestamptz), Some(Primitive(Int))),
+ (Primitive(TimestampNs), Some(Primitive(Int))),
+ (Primitive(TimestamptzNs), Some(Primitive(Int))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
@@ -2323,6 +2425,50 @@ mod test {
assert_eq!(res, expect);
}
+ fn test_timestamp_ns_and_tz_transform(
+ time: &str,
+ transform: &BoxedTransformFunction,
+ expect: Datum,
+ ) {
+ let timestamp_ns = Datum::timestamp_nanos(
+ NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
+ .unwrap()
+ .and_utc()
+ .timestamp_nanos_opt()
+ .unwrap(),
+ );
+ let timestamptz_ns = Datum::timestamptz_nanos(
+ NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
+ .unwrap()
+ .and_utc()
+ .timestamp_nanos_opt()
+ .unwrap(),
+ );
+ let res = transform.transform_literal(×tamp_ns).unwrap().unwrap();
+ assert_eq!(res, expect);
+ let res = transform
+ .transform_literal(×tamptz_ns)
+ .unwrap()
+ .unwrap();
+ assert_eq!(res, expect);
+ }
+
+ fn test_timestamp_ns_and_tz_transform_using_i64(
+ time: i64,
+ transform: &BoxedTransformFunction,
+ expect: Datum,
+ ) {
+ let timestamp_ns = Datum::timestamp_nanos(time);
+ let timestamptz_ns = Datum::timestamptz_nanos(time);
+ let res = transform.transform_literal(×tamp_ns).unwrap().unwrap();
+ assert_eq!(res, expect);
+ let res = transform
+ .transform_literal(×tamptz_ns)
+ .unwrap()
+ .unwrap();
+ assert_eq!(res, expect);
+ }
+
#[test]
fn test_transform_year_literal() {
let year = Box::new(super::Year) as BoxedTransformFunction;
@@ -2338,6 +2484,14 @@ mod test {
Datum::int(1970 - super::UNIX_EPOCH_YEAR),
);
test_timestamp_and_tz_transform("1969-01-01 00:00:00.00", &year,
Datum::int(-1));
+
+ // Test TimestampNanosecond
+ test_timestamp_ns_and_tz_transform_using_i64(
+ 186280000000,
+ &year,
+ Datum::int(1970 - super::UNIX_EPOCH_YEAR),
+ );
+ test_timestamp_ns_and_tz_transform("1969-01-01 00:00:00.00", &year,
Datum::int(-1));
}
#[test]
@@ -2432,6 +2586,17 @@ mod test {
test_timestamp_and_tz_transform("2017-12-01 00:00:00.00", &month,
Datum::int(575));
test_timestamp_and_tz_transform("1970-01-01 00:00:00.00", &month,
Datum::int(0));
test_timestamp_and_tz_transform("1969-12-31 00:00:00.00", &month,
Datum::int(-1));
+
+ // Test TimestampNanosecond
+ test_timestamp_ns_and_tz_transform_using_i64(
+ 186280000000,
+ &month,
+ Datum::int((1970 - super::UNIX_EPOCH_YEAR) * 12),
+ );
+ test_timestamp_ns_and_tz_transform("1969-12-01 23:00:00.00", &month,
Datum::int(-1));
+ test_timestamp_ns_and_tz_transform("2017-12-01 00:00:00.00", &month,
Datum::int(575));
+ test_timestamp_ns_and_tz_transform("1970-01-01 00:00:00.00", &month,
Datum::int(0));
+ test_timestamp_ns_and_tz_transform("1969-12-31 00:00:00.00", &month,
Datum::int(-1));
}
#[test]
@@ -2523,6 +2688,11 @@ mod test {
test_timestamp_and_tz_transform_using_i64(1512151975038194, &day,
Datum::int(17501));
test_timestamp_and_tz_transform_using_i64(-115200000000, &day,
Datum::int(-2));
test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day,
Datum::int(17501));
+
+ // Test TimestampNanosecond
+ test_timestamp_ns_and_tz_transform_using_i64(1512151975038194, &day,
Datum::int(17));
+ test_timestamp_ns_and_tz_transform_using_i64(-115200000000, &day,
Datum::int(-1));
+ test_timestamp_ns_and_tz_transform("2017-12-01 10:30:42.123", &day,
Datum::int(17501));
}
#[test]
@@ -2591,5 +2761,9 @@ mod test {
// Test TimestampMicrosecond
test_timestamp_and_tz_transform("2017-12-01 18:00:00.00", &hour,
Datum::int(420042));
test_timestamp_and_tz_transform("1969-12-31 23:00:00.00", &hour,
Datum::int(-1));
+
+ // Test TimestampNanosecond
+ test_timestamp_ns_and_tz_transform("2017-12-01 18:00:00.00", &hour,
Datum::int(420042));
+ test_timestamp_ns_and_tz_transform("1969-12-31 23:00:00.00", &hour,
Datum::int(-1));
}
}
diff --git a/crates/iceberg/src/transform/truncate.rs
b/crates/iceberg/src/transform/truncate.rs
index cba5409..83f769e 100644
--- a/crates/iceberg/src/transform/truncate.rs
+++ b/crates/iceberg/src/transform/truncate.rs
@@ -174,7 +174,7 @@ mod test {
use crate::expr::PredicateOperator;
use crate::spec::PrimitiveType::{
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time,
Timestamp,
- Timestamptz, Uuid,
+ TimestampNs, Timestamptz, TimestamptzNs, Uuid,
};
use crate::spec::Type::{Primitive, Struct};
use crate::spec::{Datum, NestedField, PrimitiveType, StructType,
Transform, Type};
@@ -219,6 +219,8 @@ mod test {
(Primitive(Time), None),
(Primitive(Timestamp), None),
(Primitive(Timestamptz), None),
+ (Primitive(TimestampNs), None),
+ (Primitive(TimestamptzNs), None),
(
Struct(StructType::new(vec![NestedField::optional(
1,
diff --git a/crates/iceberg/src/transform/void.rs
b/crates/iceberg/src/transform/void.rs
index 92d76b1..5d429a5 100644
--- a/crates/iceberg/src/transform/void.rs
+++ b/crates/iceberg/src/transform/void.rs
@@ -37,7 +37,7 @@ impl TransformFunction for Void {
mod test {
use crate::spec::PrimitiveType::{
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time,
Timestamp,
- Timestamptz, Uuid,
+ TimestampNs, Timestamptz, TimestamptzNs, Uuid,
};
use crate::spec::Type::{Primitive, Struct};
use crate::spec::{NestedField, StructType, Transform};
@@ -81,6 +81,8 @@ mod test {
(Primitive(Time), Some(Primitive(Time))),
(Primitive(Timestamp), Some(Primitive(Timestamp))),
(Primitive(Timestamptz), Some(Primitive(Timestamptz))),
+ (Primitive(TimestampNs), Some(Primitive(TimestampNs))),
+ (Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))),
(
Struct(StructType::new(vec![NestedField::optional(
1,
diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs
b/crates/iceberg/src/writer/file_writer/parquet_writer.rs
index d41714b..11ba04f 100644
--- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs
+++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs
@@ -337,6 +337,14 @@ impl MinMaxColAggregator {
let convert_func = |v: i64|
Result::<Datum>::Ok(Datum::timestamptz_micros(v));
self.update_state::<Int64Type>(field_id, &stat, convert_func)
}
+ (PrimitiveType::TimestampNs, Statistics::Int64(stat)) => {
+ let convert_func = |v: i64|
Result::<Datum>::Ok(Datum::timestamp_nanos(v));
+ self.update_state::<Int64Type>(field_id, &stat, convert_func)
+ }
+ (PrimitiveType::TimestamptzNs, Statistics::Int64(stat)) => {
+ let convert_func = |v: i64|
Result::<Datum>::Ok(Datum::timestamptz_nanos(v));
+ self.update_state::<Int64Type>(field_id, &stat, convert_func)
+ }
(
PrimitiveType::Decimal {
precision: _,
@@ -595,7 +603,7 @@ mod tests {
use anyhow::Result;
use arrow_array::types::Int64Type;
use arrow_array::{
- ArrayRef, BooleanArray, Int32Array, Int64Array, ListArray,
RecordBatch, StructArray,
+ Array, ArrayRef, BooleanArray, Int32Array, Int64Array, ListArray,
RecordBatch, StructArray,
};
use arrow_schema::{DataType, SchemaRef as ArrowSchemaRef};
use arrow_select::concat::concat_batches;
@@ -632,6 +640,18 @@ mod tests {
.into(),
NestedField::optional(
11,
+ "timestamp_ns",
+ Type::Primitive(PrimitiveType::TimestampNs),
+ )
+ .into(),
+ NestedField::optional(
+ 12,
+ "timestamptz_ns",
+ Type::Primitive(PrimitiveType::TimestamptzNs),
+ )
+ .into(),
+ NestedField::optional(
+ 13,
"decimal",
Type::Primitive(PrimitiveType::Decimal {
precision: 10,
@@ -639,8 +659,8 @@ mod tests {
}),
)
.into(),
- NestedField::optional(12, "uuid",
Type::Primitive(PrimitiveType::Uuid)).into(),
- NestedField::optional(13, "fixed",
Type::Primitive(PrimitiveType::Fixed(10)))
+ NestedField::optional(14, "uuid",
Type::Primitive(PrimitiveType::Uuid)).into(),
+ NestedField::optional(15, "fixed",
Type::Primitive(PrimitiveType::Fixed(10)))
.into(),
])
.build()
@@ -1091,12 +1111,22 @@ mod tests {
arrow_array::TimestampMicrosecondArray::from(vec![Some(0),
Some(1), None, Some(3)])
.with_timezone_utc(),
) as ArrayRef;
- let col11 = Arc::new(
+ let col11 = Arc::new(arrow_array::TimestampNanosecondArray::from(vec![
+ Some(0),
+ Some(1),
+ None,
+ Some(3),
+ ])) as ArrayRef;
+ let col12 = Arc::new(
+ arrow_array::TimestampNanosecondArray::from(vec![Some(0), Some(1),
None, Some(3)])
+ .with_timezone_utc(),
+ ) as ArrayRef;
+ let col13 = Arc::new(
arrow_array::Decimal128Array::from(vec![Some(1), Some(2), None,
Some(100)])
.with_precision_and_scale(10, 5)
.unwrap(),
) as ArrayRef;
- let col12 = Arc::new(
+ let col14 = Arc::new(
arrow_array::FixedSizeBinaryArray::try_from_sparse_iter_with_size(
vec![
Some(Uuid::from_u128(0).as_bytes().to_vec()),
@@ -1109,7 +1139,7 @@ mod tests {
)
.unwrap(),
) as ArrayRef;
- let col13 = Arc::new(
+ let col15 = Arc::new(
arrow_array::FixedSizeBinaryArray::try_from_sparse_iter_with_size(
vec![
Some(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
@@ -1124,6 +1154,7 @@ mod tests {
) as ArrayRef;
let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![
col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10,
col11, col12, col13,
+ col14, col15,
])
.unwrap();
@@ -1171,8 +1202,10 @@ mod tests {
(8, Datum::time_micros(0).unwrap()),
(9, Datum::timestamp_micros(0)),
(10, Datum::timestamptz_micros(0)),
+ (11, Datum::timestamp_nanos(0)),
+ (12, Datum::timestamptz_nanos(0)),
(
- 11,
+ 13,
Datum::new(
PrimitiveType::Decimal {
precision: 10,
@@ -1181,10 +1214,8 @@ mod tests {
PrimitiveLiteral::Int128(1)
)
),
- (12, Datum::uuid(Uuid::from_u128(0))),
- (13, Datum::fixed(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])),
- (12, Datum::uuid(Uuid::from_u128(0))),
- (13, Datum::fixed(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])),
+ (14, Datum::uuid(Uuid::from_u128(0))),
+ (15, Datum::fixed(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])),
])
);
assert_eq!(
@@ -1201,8 +1232,10 @@ mod tests {
(8, Datum::time_micros(3).unwrap()),
(9, Datum::timestamp_micros(3)),
(10, Datum::timestamptz_micros(3)),
+ (11, Datum::timestamp_nanos(3)),
+ (12, Datum::timestamptz_nanos(3)),
(
- 11,
+ 13,
Datum::new(
PrimitiveType::Decimal {
precision: 10,
@@ -1211,9 +1244,9 @@ mod tests {
PrimitiveLiteral::Int128(100)
)
),
- (12, Datum::uuid(Uuid::from_u128(3))),
+ (14, Datum::uuid(Uuid::from_u128(3))),
(
- 13,
+ 15,
Datum::fixed(vec![21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
),
])