This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 811fd1d feat: Add expression builder and display. (#169)
811fd1d is described below
commit 811fd1d04230883e7288675116da32917f6591b6
Author: Renjie Liu <[email protected]>
AuthorDate: Fri Mar 1 19:58:05 2024 +0800
feat: Add expression builder and display. (#169)
* feat: Add expression builder and display.
* Fix comments
* Fix doc test
* Fix name of op
* Fix comments
* Fix timestamp
---
crates/iceberg/src/expr/mod.rs | 132 ++++++-
crates/iceberg/src/expr/predicate.rs | 205 ++++++++++-
crates/iceberg/src/expr/term.rs | 74 +++-
crates/iceberg/src/spec/values.rs | 654 ++++++++++++++++++++++++++++++++++-
4 files changed, 1021 insertions(+), 44 deletions(-)
diff --git a/crates/iceberg/src/expr/mod.rs b/crates/iceberg/src/expr/mod.rs
index aef1444..ef3d2a6 100644
--- a/crates/iceberg/src/expr/mod.rs
+++ b/crates/iceberg/src/expr/mod.rs
@@ -18,25 +18,129 @@
//! This module contains expressions.
mod term;
+
+use std::fmt::{Display, Formatter};
+
pub use term::*;
mod predicate;
pub use predicate::*;
/// Predicate operators used in expressions.
+///
+/// The discriminant of this enum is used for determining the type of the
operator, see
+/// [`PredicateOperator::is_unary`], [`PredicateOperator::is_binary`],
[`PredicateOperator::is_set`]
#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy)]
+#[repr(u16)]
pub enum PredicateOperator {
- IsNull,
- NotNull,
- IsNan,
- NotNan,
- LessThan,
- LessThanOrEq,
- GreaterThan,
- GreaterThanOrEq,
- Eq,
- NotEq,
- In,
- NotIn,
- StartsWith,
- NotStartsWith,
+ // Unary operators
+ IsNull = 101,
+ NotNull = 102,
+ IsNan = 103,
+ NotNan = 104,
+
+ // Binary operators
+ LessThan = 201,
+ LessThanOrEq = 202,
+ GreaterThan = 203,
+ GreaterThanOrEq = 204,
+ Eq = 205,
+ NotEq = 206,
+ StartsWith = 207,
+ NotStartsWith = 208,
+
+ // Set operators
+ In = 301,
+ NotIn = 302,
+}
+
+impl Display for PredicateOperator {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ PredicateOperator::IsNull => write!(f, "IS NULL"),
+ PredicateOperator::NotNull => write!(f, "IS NOT NULL"),
+ PredicateOperator::IsNan => write!(f, "IS NAN"),
+ PredicateOperator::NotNan => write!(f, "IS NOT NAN"),
+ PredicateOperator::LessThan => write!(f, "<"),
+ PredicateOperator::LessThanOrEq => write!(f, "<="),
+ PredicateOperator::GreaterThan => write!(f, ">"),
+ PredicateOperator::GreaterThanOrEq => write!(f, ">="),
+ PredicateOperator::Eq => write!(f, "="),
+ PredicateOperator::NotEq => write!(f, "!="),
+ PredicateOperator::In => write!(f, "IN"),
+ PredicateOperator::NotIn => write!(f, "NOT IN"),
+ PredicateOperator::StartsWith => write!(f, "STARTS WITH"),
+ PredicateOperator::NotStartsWith => write!(f, "NOT STARTS WITH"),
+ }
+ }
+}
+
+impl PredicateOperator {
+ /// Check if this operator is unary operator.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// use iceberg::expr::PredicateOperator;
+ /// assert!(PredicateOperator::IsNull.is_unary());
+ /// ```
+ pub fn is_unary(self) -> bool {
+ (self as u16) < (PredicateOperator::LessThan as u16)
+ }
+
+ /// Check if this operator is binary operator.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// use iceberg::expr::PredicateOperator;
+ /// assert!(PredicateOperator::LessThan.is_binary());
+ /// ```
+ pub fn is_binary(self) -> bool {
+ ((self as u16) > (PredicateOperator::NotNan as u16))
+ && ((self as u16) < (PredicateOperator::In as u16))
+ }
+
+ /// Check if this operator is set operator.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// use iceberg::expr::PredicateOperator;
+ /// assert!(PredicateOperator::In.is_set());
+ /// ```
+ pub fn is_set(self) -> bool {
+ (self as u16) > (PredicateOperator::NotStartsWith as u16)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::expr::PredicateOperator;
+
+ #[test]
+ fn test_unary() {
+ assert!(PredicateOperator::IsNull.is_unary());
+ assert!(PredicateOperator::NotNull.is_unary());
+ assert!(PredicateOperator::IsNan.is_unary());
+ assert!(PredicateOperator::NotNan.is_unary());
+ }
+
+ #[test]
+ fn test_binary() {
+ assert!(PredicateOperator::LessThan.is_binary());
+ assert!(PredicateOperator::LessThanOrEq.is_binary());
+ assert!(PredicateOperator::GreaterThan.is_binary());
+ assert!(PredicateOperator::GreaterThanOrEq.is_binary());
+ assert!(PredicateOperator::Eq.is_binary());
+ assert!(PredicateOperator::NotEq.is_binary());
+ assert!(PredicateOperator::StartsWith.is_binary());
+ assert!(PredicateOperator::NotStartsWith.is_binary());
+ }
+
+ #[test]
+ fn test_set() {
+ assert!(PredicateOperator::In.is_set());
+ assert!(PredicateOperator::NotIn.is_set());
+ }
}
diff --git a/crates/iceberg/src/expr/predicate.rs
b/crates/iceberg/src/expr/predicate.rs
index 9d6bf86..c9c047e 100644
--- a/crates/iceberg/src/expr/predicate.rs
+++ b/crates/iceberg/src/expr/predicate.rs
@@ -19,15 +19,40 @@
//! Predicate expressions are used to filter data, and evaluates to a boolean
value. For example,
//! `a > 10` is a predicate expression, and it evaluates to `true` if `a` is
greater than `10`,
-use crate::expr::{BoundReference, PredicateOperator, UnboundReference};
-use crate::spec::Literal;
+use crate::expr::{BoundReference, PredicateOperator, Reference};
+use crate::spec::Datum;
use std::collections::HashSet;
+use std::fmt::{Debug, Display, Formatter};
+use std::ops::Not;
/// Logical expression, such as `AND`, `OR`, `NOT`.
pub struct LogicalExpression<T, const N: usize> {
inputs: [Box<T>; N],
}
+impl<T: Debug, const N: usize> Debug for LogicalExpression<T, N> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("LogicalExpression")
+ .field("inputs", &self.inputs)
+ .finish()
+ }
+}
+
+impl<T, const N: usize> LogicalExpression<T, N> {
+ fn new(inputs: [Box<T>; N]) -> Self {
+ Self { inputs }
+ }
+
+ /// Return inputs of this logical expression.
+ pub fn inputs(&self) -> [&T; N] {
+ let mut ret: [&T; N] = [self.inputs[0].as_ref(); N];
+ for (i, item) in ret.iter_mut().enumerate() {
+ *item = &self.inputs[i];
+ }
+ ret
+ }
+}
+
/// Unary predicate, for example, `a IS NULL`.
pub struct UnaryExpression<T> {
/// Operator of this predicate, must be single operand operator.
@@ -36,6 +61,28 @@ pub struct UnaryExpression<T> {
term: T,
}
+impl<T: Debug> Debug for UnaryExpression<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("UnaryExpression")
+ .field("op", &self.op)
+ .field("term", &self.term)
+ .finish()
+ }
+}
+
+impl<T: Display> Display for UnaryExpression<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{} {}", self.term, self.op)
+ }
+}
+
+impl<T> UnaryExpression<T> {
+ pub(crate) fn new(op: PredicateOperator, term: T) -> Self {
+ debug_assert!(op.is_unary());
+ Self { op, term }
+ }
+}
+
/// Binary predicate, for example, `a > 10`.
pub struct BinaryExpression<T> {
/// Operator of this predicate, must be binary operator, such as `=`, `>`,
`<`, etc.
@@ -43,7 +90,30 @@ pub struct BinaryExpression<T> {
/// Term of this predicate, for example, `a` in `a > 10`.
term: T,
/// Literal of this predicate, for example, `10` in `a > 10`.
- literal: Literal,
+ literal: Datum,
+}
+
+impl<T: Debug> Debug for BinaryExpression<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("BinaryExpression")
+ .field("op", &self.op)
+ .field("term", &self.term)
+ .field("literal", &self.literal)
+ .finish()
+ }
+}
+
+impl<T> BinaryExpression<T> {
+ pub(crate) fn new(op: PredicateOperator, term: T, literal: Datum) -> Self {
+ debug_assert!(op.is_binary());
+ Self { op, term, literal }
+ }
+}
+
+impl<T: Display> Display for BinaryExpression<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{} {} {}", self.term, self.op, self.literal)
+ }
}
/// Set predicates, for example, `a in (1, 2, 3)`.
@@ -53,26 +123,139 @@ pub struct SetExpression<T> {
/// Term of this predicate, for example, `a` in `a in (1, 2, 3)`.
term: T,
/// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2,
3)`.
- literals: HashSet<Literal>,
+ literals: HashSet<Datum>,
+}
+
+impl<T: Debug> Debug for SetExpression<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("SetExpression")
+ .field("op", &self.op)
+ .field("term", &self.term)
+ .field("literal", &self.literals)
+ .finish()
+ }
}
/// Unbound predicate expression before binding to a schema.
-pub enum UnboundPredicate {
+#[derive(Debug)]
+pub enum Predicate {
/// And predicate, for example, `a > 10 AND b < 20`.
- And(LogicalExpression<UnboundPredicate, 2>),
+ And(LogicalExpression<Predicate, 2>),
/// Or predicate, for example, `a > 10 OR b < 20`.
- Or(LogicalExpression<UnboundPredicate, 2>),
+ Or(LogicalExpression<Predicate, 2>),
/// Not predicate, for example, `NOT (a > 10)`.
- Not(LogicalExpression<UnboundPredicate, 1>),
+ Not(LogicalExpression<Predicate, 1>),
/// Unary expression, for example, `a IS NULL`.
- Unary(UnaryExpression<UnboundReference>),
+ Unary(UnaryExpression<Reference>),
/// Binary expression, for example, `a > 10`.
- Binary(BinaryExpression<UnboundReference>),
+ Binary(BinaryExpression<Reference>),
/// Set predicates, for example, `a in (1, 2, 3)`.
- Set(SetExpression<UnboundReference>),
+ Set(SetExpression<Reference>),
+}
+
+impl Display for Predicate {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Predicate::And(expr) => {
+ write!(f, "({}) AND ({})", expr.inputs()[0], expr.inputs()[1])
+ }
+ Predicate::Or(expr) => {
+ write!(f, "({}) OR ({})", expr.inputs()[0], expr.inputs()[1])
+ }
+ Predicate::Not(expr) => {
+ write!(f, "NOT ({})", expr.inputs()[0])
+ }
+ Predicate::Unary(expr) => {
+ write!(f, "{}", expr.term)
+ }
+ Predicate::Binary(expr) => {
+ write!(f, "{} {} {}", expr.term, expr.op, expr.literal)
+ }
+ Predicate::Set(expr) => {
+ write!(
+ f,
+ "{} {} ({})",
+ expr.term,
+ expr.op,
+ expr.literals
+ .iter()
+ .map(|l| format!("{:?}", l))
+ .collect::<Vec<String>>()
+ .join(", ")
+ )
+ }
+ }
+ }
+}
+
+impl Predicate {
+ /// Combines two predicates with `AND`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// use std::ops::Bound::Unbounded;
+ /// use iceberg::expr::BoundPredicate::Unary;
+ /// use iceberg::expr::Reference;
+ /// use iceberg::spec::Datum;
+ /// let expr1 = Reference::new("a").less_than(Datum::long(10));
+ ///
+ /// let expr2 = Reference::new("b").less_than(Datum::long(20));
+ ///
+ /// let expr = expr1.and(expr2);
+ ///
+ /// assert_eq!(&format!("{expr}"), "(a < 10) AND (b < 20)");
+ /// ```
+ pub fn and(self, other: Predicate) -> Predicate {
+ Predicate::And(LogicalExpression::new([Box::new(self),
Box::new(other)]))
+ }
+
+ /// Combines two predicates with `OR`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// use std::ops::Bound::Unbounded;
+ /// use iceberg::expr::BoundPredicate::Unary;
+ /// use iceberg::expr::Reference;
+ /// use iceberg::spec::Datum;
+ /// let expr1 = Reference::new("a").less_than(Datum::long(10));
+ ///
+ /// let expr2 = Reference::new("b").less_than(Datum::long(20));
+ ///
+ /// let expr = expr1.or(expr2);
+ ///
+ /// assert_eq!(&format!("{expr}"), "(a < 10) OR (b < 20)");
+ /// ```
+ pub fn or(self, other: Predicate) -> Predicate {
+ Predicate::Or(LogicalExpression::new([Box::new(self),
Box::new(other)]))
+ }
+}
+
+impl Not for Predicate {
+ type Output = Predicate;
+
+ /// Create a predicate which is the reverse of this predicate. For
example: `NOT (a > 10)`
+ /// # Example
+ ///
+ ///```rust
+ ///use std::ops::Bound::Unbounded;
+ ///use iceberg::expr::BoundPredicate::Unary;
+ ///use iceberg::expr::Reference;
+ ///use iceberg::spec::Datum;
+ ///let expr1 = Reference::new("a").less_than(Datum::long(10));
+ ///
+ ///let expr = !expr1;
+ ///
+ ///assert_eq!(&format!("{expr}"), "NOT (a < 10)");
+ ///```
+ fn not(self) -> Self::Output {
+ Predicate::Not(LogicalExpression::new([Box::new(self)]))
+ }
}
/// Bound predicate expression after binding to a schema.
+#[derive(Debug)]
pub enum BoundPredicate {
/// An expression always evaluates to true.
AlwaysTrue,
diff --git a/crates/iceberg/src/expr/term.rs b/crates/iceberg/src/expr/term.rs
index 5a81ecd..a4338a3 100644
--- a/crates/iceberg/src/expr/term.rs
+++ b/crates/iceberg/src/expr/term.rs
@@ -17,21 +17,89 @@
//! Term definition.
-use crate::spec::NestedFieldRef;
+use crate::expr::{BinaryExpression, Predicate, PredicateOperator};
+use crate::spec::{Datum, NestedField, NestedFieldRef};
+use std::fmt::{Display, Formatter};
/// Unbound term before binding to a schema.
-pub type UnboundTerm = UnboundReference;
+pub type Term = Reference;
/// A named reference in an unbound expression.
/// For example, `a` in `a > 10`.
-pub struct UnboundReference {
+#[derive(Debug, Clone)]
+pub struct Reference {
name: String,
}
+impl Reference {
+ /// Create a new unbound reference.
+ pub fn new(name: impl Into<String>) -> Self {
+ Self { name: name.into() }
+ }
+
+ /// Return the name of this reference.
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+}
+
+impl Reference {
+ /// Creates an less than expression. For example, `a < 10`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ ///
+ /// use iceberg::expr::Reference;
+ /// use iceberg::spec::Datum;
+ /// let expr = Reference::new("a").less_than(Datum::long(10));
+ ///
+ /// assert_eq!(&format!("{expr}"), "a < 10");
+ /// ```
+ pub fn less_than(self, datum: Datum) -> Predicate {
+ Predicate::Binary(BinaryExpression::new(
+ PredicateOperator::LessThan,
+ self,
+ datum,
+ ))
+ }
+}
+
+impl Display for Reference {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.name)
+ }
+}
+
/// A named reference in a bound expression after binding to a schema.
+#[derive(Debug, Clone)]
pub struct BoundReference {
+ // This maybe different from [`name`] filed in [`NestedField`] since this
contains full path.
+ // For example, if the field is `a.b.c`, then `field.name` is `c`, but
`original_name` is `a.b.c`.
+ column_name: String,
field: NestedFieldRef,
}
+impl BoundReference {
+ /// Creates a new bound reference.
+ pub fn new(name: impl Into<String>, field: NestedFieldRef) -> Self {
+ Self {
+ column_name: name.into(),
+ field,
+ }
+ }
+
+ /// Return the field of this reference.
+ pub fn field(&self) -> &NestedField {
+ &self.field
+ }
+}
+
+impl Display for BoundReference {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.column_name)
+ }
+}
+
/// Bound term after binding to a schema.
pub type BoundTerm = BoundReference;
diff --git a/crates/iceberg/src/spec/values.rs
b/crates/iceberg/src/spec/values.rs
index a8a748d..688bce4 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -19,6 +19,7 @@
* Value in iceberg
*/
+use std::fmt::{Display, Formatter};
use std::str::FromStr;
use std::{any::Any, collections::BTreeMap};
@@ -31,12 +32,20 @@ use serde_bytes::ByteBuf;
use serde_json::{Map as JsonMap, Number, Value as JsonValue};
use uuid::Uuid;
-use crate::{Error, ErrorKind};
+use crate::{ensure_data_valid, Error, ErrorKind};
use super::datatypes::{PrimitiveType, Type};
+use crate::spec::values::date::{date_from_naive_date, days_to_date,
unix_epoch};
+use crate::spec::values::time::microseconds_to_time;
+use crate::spec::values::timestamp::microseconds_to_datetime;
+use crate::spec::values::timestamptz::microseconds_to_datetimetz;
+use crate::spec::MAX_DECIMAL_PRECISION;
pub use _serde::RawLiteral;
+/// Maximum value for [`PrimitiveType::Time`] type in microseconds, e.g. 23
hours 59 minutes 59 seconds 999999 microseconds.
+const MAX_TIME_VALUE: i64 = 24 * 60 * 60 * 1_000_000i64 - 1;
+
/// Values present in iceberg type
#[derive(Clone, Debug, PartialEq, Hash, Eq, PartialOrd, Ord)]
pub enum PrimitiveLiteral {
@@ -70,6 +79,587 @@ pub enum PrimitiveLiteral {
Decimal(i128),
}
+/// Literal associated with its type. The value and type pair is checked when
construction, so the type and value is
+/// guaranteed to be correct when used.
+///
+/// By default, we decouple the type and value of a literal, so we can use
avoid the cost of storing extra type info
+/// for each literal. But associate type with literal can be useful in some
cases, for example, in unbound expression.
+#[derive(Debug)]
+pub struct Datum {
+ r#type: PrimitiveType,
+ literal: PrimitiveLiteral,
+}
+
+impl Display for Datum {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match (&self.r#type, &self.literal) {
+ (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::Int(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::Long(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::Float(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::Double(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::Date(val)) => write!(f, "{}",
days_to_date(*val)),
+ (_, PrimitiveLiteral::Time(val)) => write!(f, "{}",
microseconds_to_time(*val)),
+ (_, PrimitiveLiteral::Timestamp(val)) => {
+ write!(f, "{}", microseconds_to_datetime(*val))
+ }
+ (_, PrimitiveLiteral::TimestampTZ(val)) => {
+ write!(f, "{}", microseconds_to_datetimetz(*val))
+ }
+ (_, PrimitiveLiteral::String(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::UUID(val)) => write!(f, "{}", val),
+ (_, PrimitiveLiteral::Fixed(val)) => display_bytes(val, f),
+ (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),
+ (
+ PrimitiveType::Decimal {
+ precision: _,
+ scale,
+ },
+ PrimitiveLiteral::Decimal(val),
+ ) => {
+ write!(f, "{}", Decimal::from_i128_with_scale(*val, *scale))
+ }
+ (_, _) => {
+ unreachable!()
+ }
+ }
+ }
+}
+
+fn display_bytes(bytes: &[u8], f: &mut Formatter<'_>) -> std::fmt::Result {
+ let mut s = String::with_capacity(bytes.len() * 2);
+ for b in bytes {
+ s.push_str(&format!("{:02X}", b));
+ }
+ f.write_str(&s)
+}
+
+impl From<Datum> for Literal {
+ fn from(value: Datum) -> Self {
+ Literal::Primitive(value.literal)
+ }
+}
+
+impl Datum {
+ /// Creates a boolean value.
+ ///
+ /// Example:
+ /// ```rust
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::bool(true);
+ ///
+ /// assert_eq!(format!("{}", t), "true".to_string());
+ /// assert_eq!(Literal::from(t),
Literal::Primitive(PrimitiveLiteral::Boolean(true)));
+ /// ```
+ pub fn bool<T: Into<bool>>(t: T) -> Self {
+ Self {
+ r#type: PrimitiveType::Boolean,
+ literal: PrimitiveLiteral::Boolean(t.into()),
+ }
+ }
+
+ /// Creates a boolean value from string.
+ /// See [Parse bool from
str](https://doc.rust-lang.org/stable/std/primitive.bool.html#impl-FromStr-for-bool)
for reference.
+ ///
+ /// Example:
+ /// ```rust
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::bool_from_str("false").unwrap();
+ ///
+ /// assert_eq!(&format!("{}", t), "false");
+ /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Boolean(false)),
t.into());
+ /// ```
+ pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let v = s.as_ref().parse::<bool>().map_err(|e| {
+ Error::new(ErrorKind::DataInvalid, "Can't parse string to
bool.").with_source(e)
+ })?;
+ Ok(Self::bool(v))
+ }
+
+ /// Creates an 32bit integer.
+ ///
+ /// Example:
+ /// ```rust
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::int(23i8);
+ ///
+ /// assert_eq!(&format!("{}", t), "23");
+ /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(23)), t.into());
+ /// ```
+ pub fn int<T: Into<i32>>(t: T) -> Self {
+ Self {
+ r#type: PrimitiveType::Int,
+ literal: PrimitiveLiteral::Int(t.into()),
+ }
+ }
+
+ /// Creates an 64bit integer.
+ ///
+ /// Example:
+ /// ```rust
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::long(24i8);
+ ///
+ /// assert_eq!(&format!("{t}"), "24");
+ /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Long(24)), t.into());
+ /// ```
+ pub fn long<T: Into<i64>>(t: T) -> Self {
+ Self {
+ r#type: PrimitiveType::Long,
+ literal: PrimitiveLiteral::Long(t.into()),
+ }
+ }
+
+ /// Creates an 32bit floating point number.
+ ///
+ /// Example:
+ /// ```rust
+ /// use ordered_float::OrderedFloat;
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::float( 32.1f32 );
+ ///
+ /// assert_eq!(&format!("{t}"), "32.1");
+ ///
assert_eq!(Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(32.1))),
t.into());
+ /// ```
+ pub fn float<T: Into<f32>>(t: T) -> Self {
+ Self {
+ r#type: PrimitiveType::Float,
+ literal: PrimitiveLiteral::Float(OrderedFloat(t.into())),
+ }
+ }
+
+ /// Creates an 32bit floating point number.
+ ///
+ /// Example:
+ /// ```rust
+ /// use ordered_float::OrderedFloat;
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::double( 32.1f64 );
+ ///
+ /// assert_eq!(&format!("{t}"), "32.1");
+ ///
assert_eq!(Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(32.1))),
t.into());
+ /// ```
+ pub fn double<T: Into<f64>>(t: T) -> Self {
+ Self {
+ r#type: PrimitiveType::Double,
+ literal: PrimitiveLiteral::Double(OrderedFloat(t.into())),
+ }
+ }
+
+ /// Creates date literal from number of days from unix epoch directly.
+ ///
+ /// Example:
+ /// ```rust
+ ///
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// // 2 days after 1970-01-01
+ /// let t = Datum::date(2);
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-03");
+ /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Date(2)), t.into());
+ /// ```
+ pub fn date(days: i32) -> Self {
+ Self {
+ r#type: PrimitiveType::Date,
+ literal: PrimitiveLiteral::Date(days),
+ }
+ }
+
+ /// Creates a date in `%Y-%m-%d` format, assume in utc timezone.
+ ///
+ /// See [`NaiveDate::from_str`].
+ ///
+ /// Example
+ /// ```rust
+ /// use iceberg::spec::{Literal, Datum};
+ /// let t = Datum::date_from_str("1970-01-05").unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-05");
+ /// assert_eq!(Literal::date(4), t.into());
+ /// ```
+ pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!("Can't parse date from string: {}", s.as_ref()),
+ )
+ .with_source(e)
+ })?;
+
+ Ok(Self::date(date_from_naive_date(t)))
+ }
+
+ /// Create a date from calendar date (year, month and day).
+ ///
+ /// See [`NaiveDate::from_ymd_opt`].
+ ///
+ /// Example:
+ ///
+ ///```rust
+ /// use iceberg::spec::{Literal, Datum};
+ /// let t = Datum::date_from_ymd(1970, 1, 5).unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-05");
+ /// assert_eq!(Literal::date(4), t.into());
+ /// ```
+ pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
+ let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!("Can't create date from year: {year}, month: {month},
day: {day}"),
+ )
+ })?;
+
+ Ok(Self::date(date_from_naive_date(t)))
+ }
+
+ /// Creates time in microseconds directly.
+ ///
+ /// It will returns error when it's negative or too large to fit in 24
hours.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::{Literal, Datum};
+ /// let micro_secs = {
+ /// 1 * 3600 * 1_000_000 + // 1 hour
+ /// 2 * 60 * 1_000_000 + // 2 minutes
+ /// 1 * 1_000_000 + // 1 second
+ /// 888999 // microseconds
+ /// };
+ ///
+ /// let t = Datum::time_micros(micro_secs).unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "01:02:01.888999");
+ /// assert_eq!(Literal::time(micro_secs), t.into());
+ ///
+ /// let negative_value = -100;
+ /// assert!(Datum::time_micros(negative_value).is_err());
+ ///
+ /// let too_large_value = 36 * 60 * 60 * 1_000_000; // Too large to fit in
24 hours.
+ /// assert!(Datum::time_micros(too_large_value).is_err());
+ /// ```
+ pub fn time_micros(value: i64) -> Result<Self> {
+ ensure_data_valid!(
+ (0..=MAX_TIME_VALUE).contains(&value),
+ "Invalid value for Time type: {}",
+ value
+ );
+
+ Ok(Self {
+ r#type: PrimitiveType::Time,
+ literal: PrimitiveLiteral::Time(value),
+ })
+ }
+
+ /// Creates time literal from [`chrono::NaiveTime`].
+ fn time_from_naive_time(t: NaiveTime) -> Self {
+ let duration = t - unix_epoch().time();
+ // It's safe to unwrap here since less than 24 hours will never
overflow.
+ let micro_secs = duration.num_microseconds().unwrap();
+
+ Self {
+ r#type: PrimitiveType::Time,
+ literal: PrimitiveLiteral::Time(micro_secs),
+ }
+ }
+
+ /// Creates time in microseconds in `%H:%M:%S:.f` format.
+ ///
+ /// See [`NaiveTime::from_str`] for details.
+ ///
+ /// Example:
+ /// ```rust
+ /// use iceberg::spec::{Literal, Datum};
+ /// let t = Datum::time_from_str("01:02:01.888999777").unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "01:02:01.888999");
+ /// ```
+ pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!("Can't parse time from string: {}", s.as_ref()),
+ )
+ .with_source(e)
+ })?;
+
+ Ok(Self::time_from_naive_time(t))
+ }
+
+ /// Creates time literal from hour, minute, second, and microseconds.
+ ///
+ /// See [`NaiveTime::from_hms_micro_opt`].
+ ///
+ /// Example:
+ /// ```rust
+ ///
+ /// use iceberg::spec::{Literal, Datum};
+ /// let t = Datum::time_from_hms_micro(22, 15, 33, 111).unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "22:15:33.000111");
+ /// ```
+ pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) ->
Result<Self> {
+ let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
+ .ok_or_else(|| Error::new(
+ ErrorKind::DataInvalid,
+ format!("Can't create time from hour: {hour}, min: {min},
second: {sec}, microsecond: {micro}"),
+ ))?;
+ Ok(Self::time_from_naive_time(t))
+ }
+
+ /// Creates a timestamp from unix epoch in microseconds.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ ///
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::timestamp_micros(1000);
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001");
+ /// ```
+ pub fn timestamp_micros(value: i64) -> Self {
+ Self {
+ r#type: PrimitiveType::Timestamp,
+ literal: PrimitiveLiteral::Timestamp(value),
+ }
+ }
+
+ /// Creates a timestamp from [`DateTime`].
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ ///
+ /// use chrono::{NaiveDate, NaiveDateTime, TimeZone, Utc};
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::timestamp_from_datetime(
+ /// NaiveDate::from_ymd_opt(1992, 3, 1)
+ /// .unwrap()
+ /// .and_hms_micro_opt(1, 2, 3, 88)
+ /// .unwrap());
+ ///
+ /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
+ /// ```
+ pub fn timestamp_from_datetime(dt: NaiveDateTime) -> Self {
+ Self::timestamp_micros(dt.timestamp_micros())
+ }
+
+ /// Parse a timestamp in [`%Y-%m-%dT%H:%M:%S%.f`] format.
+ ///
+ /// See [`NaiveDateTime::from_str`].
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime,
NaiveTime};
+ /// use iceberg::spec::{Literal, Datum};
+ /// let t =
Datum::timestamp_from_str("1992-03-01T01:02:03.000088").unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
+ /// ```
+ pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let dt = s.as_ref().parse::<NaiveDateTime>().map_err(|e| {
+ Error::new(ErrorKind::DataInvalid, "Can't parse
timestamp.").with_source(e)
+ })?;
+
+ Ok(Self::timestamp_from_datetime(dt))
+ }
+
+ /// Creates a timestamp with timezone from unix epoch in microseconds.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ ///
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::timestamptz_micros(1000);
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001 UTC");
+ /// ```
+ pub fn timestamptz_micros(value: i64) -> Self {
+ Self {
+ r#type: PrimitiveType::Timestamptz,
+ literal: PrimitiveLiteral::TimestampTZ(value),
+ }
+ }
+
+ /// Creates a timestamp with timezone from [`DateTime`].
+ /// Example:
+ ///
+ /// ```rust
+ ///
+ /// use chrono::{TimeZone, Utc};
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::timestamptz_from_datetime(Utc.timestamp_opt(1000,
0).unwrap());
+ ///
+ /// assert_eq!(&format!("{t}"), "1970-01-01 00:16:40 UTC");
+ /// ```
+ pub fn timestamptz_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
+ Self::timestamptz_micros(dt.with_timezone(&Utc).timestamp_micros())
+ }
+
+ /// Parse timestamp with timezone in RFC3339 format.
+ ///
+ /// See [`DateTime::from_str`].
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime,
NaiveTime};
+ /// use iceberg::spec::{Literal, Datum};
+ /// let t =
Datum::timestamptz_from_str("1992-03-01T01:02:03.000088+08:00").unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "1992-02-29 17:02:03.000088 UTC");
+ /// ```
+ pub fn timestamptz_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
+ Error::new(ErrorKind::DataInvalid, "Can't parse
datetime.").with_source(e)
+ })?;
+
+ Ok(Self::timestamptz_from_datetime(dt))
+ }
+
+ /// Creates a string literal.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::string("ss");
+ ///
+ /// assert_eq!(&format!("{t}"), "ss");
+ /// ```
+ pub fn string<S: ToString>(s: S) -> Self {
+ Self {
+ r#type: PrimitiveType::String,
+ literal: PrimitiveLiteral::String(s.to_string()),
+ }
+ }
+
+ /// Creates uuid literal.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use uuid::uuid;
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::uuid(uuid!("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"));
+ ///
+ /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
+ /// ```
+ pub fn uuid(uuid: Uuid) -> Self {
+ Self {
+ r#type: PrimitiveType::Uuid,
+ literal: PrimitiveLiteral::UUID(uuid),
+ }
+ }
+
+ /// Creates uuid from str. See [`Uuid::parse_str`].
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::{Datum};
+ /// let t =
Datum::uuid_from_str("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8").unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
+ /// ```
+ pub fn uuid_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let uuid = Uuid::parse_str(s.as_ref()).map_err(|e| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!("Can't parse uuid from string: {}", s.as_ref()),
+ )
+ .with_source(e)
+ })?;
+ Ok(Self::uuid(uuid))
+ }
+
+ /// Creates a fixed literal from bytes.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+ /// let t = Datum::fixed(vec![1u8, 2u8]);
+ ///
+ /// assert_eq!(&format!("{t}"), "0102");
+ /// ```
+ pub fn fixed<I: IntoIterator<Item = u8>>(input: I) -> Self {
+ let value: Vec<u8> = input.into_iter().collect();
+ Self {
+ r#type: PrimitiveType::Fixed(value.len() as u64),
+ literal: PrimitiveLiteral::Fixed(value),
+ }
+ }
+
+ /// Creates a binary literal from bytes.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::binary(vec![1u8, 100u8]);
+ ///
+ /// assert_eq!(&format!("{t}"), "0164");
+ /// ```
+ pub fn binary<I: IntoIterator<Item = u8>>(input: I) -> Self {
+ Self {
+ r#type: PrimitiveType::Binary,
+ literal: PrimitiveLiteral::Binary(input.into_iter().collect()),
+ }
+ }
+
+ /// Creates decimal literal from string. See [`Decimal::from_str_exact`].
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use itertools::assert_equal;
+ /// use rust_decimal::Decimal;
+ /// use iceberg::spec::Datum;
+ /// let t = Datum::decimal_from_str("123.45").unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "123.45");
+ /// ```
+ pub fn decimal_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+ let decimal = Decimal::from_str_exact(s.as_ref()).map_err(|e| {
+ Error::new(ErrorKind::DataInvalid, "Can't parse
decimal.").with_source(e)
+ })?;
+
+ Self::decimal(decimal)
+ }
+
+ /// Try to create a decimal literal from [`Decimal`].
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use rust_decimal::Decimal;
+ /// use iceberg::spec::Datum;
+ ///
+ /// let t = Datum::decimal(Decimal::new(123, 2)).unwrap();
+ ///
+ /// assert_eq!(&format!("{t}"), "1.23");
+ /// ```
+ pub fn decimal(value: impl Into<Decimal>) -> Result<Self> {
+ let decimal = value.into();
+ let scale = decimal.scale();
+
+ let r#type = Type::decimal(MAX_DECIMAL_PRECISION, scale)?;
+ if let Type::Primitive(p) = r#type {
+ Ok(Self {
+ r#type: p,
+ literal: PrimitiveLiteral::Decimal(decimal.mantissa()),
+ })
+ } else {
+ unreachable!("Decimal type must be primitive.")
+ }
+ }
+}
+
/// Values present in iceberg type
#[derive(Clone, Debug, PartialEq, Hash, Eq, PartialOrd, Ord)]
pub enum Literal {
@@ -174,22 +764,11 @@ impl Literal {
Self::Primitive(PrimitiveLiteral::Double(OrderedFloat(t.into())))
}
- /// Returns unix epoch.
- pub fn unix_epoch() -> DateTime<Utc> {
- Utc.timestamp_nanos(0)
- }
-
/// Creates date literal from number of days from unix epoch directly.
pub fn date(days: i32) -> Self {
Self::Primitive(PrimitiveLiteral::Date(days))
}
- /// Creates date literal from `NaiveDate`, assuming it's utc timezone.
- fn date_from_naive_date(date: NaiveDate) -> Self {
- let days = (date - Self::unix_epoch().date_naive()).num_days();
- Self::date(days as i32)
- }
-
/// Creates a date in `%Y-%m-%d` format, assume in utc timezone.
///
/// See [`NaiveDate::from_str`].
@@ -210,7 +789,7 @@ impl Literal {
.with_source(e)
})?;
- Ok(Self::date_from_naive_date(t))
+ Ok(Self::date(date_from_naive_date(t)))
}
/// Create a date from calendar date (year, month and day).
@@ -233,7 +812,7 @@ impl Literal {
)
})?;
- Ok(Self::date_from_naive_date(t))
+ Ok(Self::date(date_from_naive_date(t)))
}
/// Creates time in microseconds directly
@@ -243,7 +822,7 @@ impl Literal {
/// Creates time literal from [`chrono::NaiveTime`].
fn time_from_naive_time(t: NaiveTime) -> Self {
- let duration = t - Self::unix_epoch().time();
+ let duration = t - unix_epoch().time();
// It's safe to unwrap here since less than 24 hours will never
overflow.
let micro_secs = duration.num_microseconds().unwrap();
@@ -951,7 +1530,7 @@ impl Literal {
}
mod date {
- use chrono::{NaiveDate, NaiveDateTime};
+ use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc};
pub(crate) fn date_to_days(date: &NaiveDate) -> i32 {
date.signed_duration_since(
@@ -967,6 +1546,16 @@ mod date {
.unwrap()
.date()
}
+
+ /// Returns unix epoch.
+ pub(crate) fn unix_epoch() -> DateTime<Utc> {
+ Utc.timestamp_nanos(0)
+ }
+
+ /// Creates date literal from `NaiveDate`, assuming it's utc timezone.
+ pub(crate) fn date_from_naive_date(date: NaiveDate) -> i32 {
+ (date - unix_epoch().date_naive()).num_days() as i32
+ }
}
mod time {
@@ -2234,4 +2823,37 @@ mod tests {
// rust avro can't support to convert any byte-like type to fixed in avro
now.
// - uuid ser/de
// - fixed ser/de
+
+ #[test]
+ fn test_parse_timestamp() {
+ let value =
Datum::timestamp_from_str("2021-08-01T01:09:00.0899").unwrap();
+ assert_eq!(&format!("{value}"), "2021-08-01 01:09:00.089900");
+
+ let value = Datum::timestamp_from_str("2021-08-01T01:09:00.0899+0800");
+ assert!(value.is_err(), "Parse timestamp with timezone should fail!");
+
+ let value = Datum::timestamp_from_str("dfa");
+ assert!(
+ value.is_err(),
+ "Parse timestamp with invalid input should fail!"
+ );
+ }
+
+ #[test]
+ fn test_parse_timestamptz() {
+ let value =
Datum::timestamptz_from_str("2021-08-01T09:09:00.0899+0800").unwrap();
+ assert_eq!(&format!("{value}"), "2021-08-01 01:09:00.089900 UTC");
+
+ let value = Datum::timestamptz_from_str("2021-08-01T01:09:00.0899");
+ assert!(
+ value.is_err(),
+ "Parse timestamptz without timezone should fail!"
+ );
+
+ let value = Datum::timestamptz_from_str("dfa");
+ assert!(
+ value.is_err(),
+ "Parse timestamptz with invalid input should fail!"
+ );
+ }
}