liurenjie1024 commented on code in PR #309:
URL: https://github.com/apache/iceberg-rust/pull/309#discussion_r1547649993


##########
crates/iceberg/src/spec/transform.rs:
##########
@@ -261,6 +269,300 @@ impl Transform {
             _ => self == other,
         }
     }
+
+    /// Projects a given predicate according to the transformation
+    /// specified by the `Transform` instance.
+    ///
+    /// This allows predicates to be effectively applied to data
+    /// that has undergone transformation, enabling efficient querying
+    /// and filtering based on the original, untransformed data.
+    ///
+    /// # Example
+    /// Suppose, we have row filter `a = 10`, and a partition spec
+    /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition
+    /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to
+    /// `bs = bucket(10, 37)`
+    pub fn project(&self, name: String, predicate: &BoundPredicate) -> 
Result<Option<Predicate>> {
+        let func = create_transform_function(self)?;
+
+        let projection = match predicate {
+            BoundPredicate::Unary(expr) => match self {
+                Transform::Identity
+                | Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => 
Some(Predicate::Unary(UnaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                ))),
+                _ => None,
+            },
+            BoundPredicate::Binary(expr) => match self {
+                Transform::Identity => 
Some(Predicate::Binary(BinaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literal().to_owned(),
+                ))),
+                Transform::Bucket(_) => {
+                    if expr.op() != PredicateOperator::Eq || 
!self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Binary(BinaryExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        func.transform_literal_result(expr.literal())?,
+                    )))
+                }
+                Transform::Truncate(width) => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        Some(*width),
+                    )?
+                }
+                Transform::Year | Transform::Month | Transform::Day | 
Transform::Hour => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        None,
+                    )?
+                }
+                _ => None,
+            },
+            BoundPredicate::Set(expr) => match self {
+                Transform::Identity => Some(Predicate::Set(SetExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literals().to_owned(),
+                ))),
+                Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => {
+                    if expr.op() != PredicateOperator::In
+                        || expr.literals().iter().any(|d| 
!self.can_transform(d))
+                    {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Set(SetExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        self.transform_set(expr.literals(), &func)?,
+                    )))
+                }
+                _ => None,
+            },
+            _ => None,
+        };
+
+        Ok(projection)
+    }
+
+    /// Check if `Transform` is applicable on datum's `PrimitiveType`
+    fn can_transform(&self, datum: &Datum) -> bool {
+        let input_type = datum.data_type().clone();
+        self.result_type(&Type::Primitive(input_type)).is_ok()
+    }
+
+    /// Transform each literal value of `FnvHashSet<Datum>`
+    fn transform_set(
+        &self,
+        literals: &FnvHashSet<Datum>,
+        func: &BoxedTransformFunction,
+    ) -> Result<FnvHashSet<Datum>> {
+        let mut new_set = FnvHashSet::default();
+
+        for lit in literals {
+            let datum = func.transform_literal_result(lit)?;
+
+            if let Some(AdjustedProjection::Single(d)) =
+                self.adjust_projection(&PredicateOperator::In, lit, &datum)
+            {
+                new_set.insert(d);
+            };
+
+            new_set.insert(datum);
+        }
+
+        Ok(new_set)
+    }
+
+    /// Apply transform on `Datum` with adjusted boundaries.
+    /// Returns Predicate with projection and possibly
+    /// rewritten `PredicateOperator`
+    fn transform_projected_boundary(
+        &self,
+        name: String,
+        datum: &Datum,
+        op: &PredicateOperator,
+        func: &BoxedTransformFunction,
+        width: Option<u32>,
+    ) -> Result<Option<Predicate>> {
+        if let Some(boundary) = self.projected_boundary(op, datum)? {
+            let transformed = func.transform_literal_result(&boundary)?;
+            let adjusted = self.adjust_projection(op, datum, &transformed);
+            let op = self.projected_operator(op, datum, width);
+
+            if let Some(op) = op {
+                let predicate = match adjusted {
+                    None => Predicate::Binary(BinaryExpression::new(
+                        op,
+                        Reference::new(name),
+                        transformed,
+                    )),
+                    Some(AdjustedProjection::Single(d)) => {
+                        Predicate::Binary(BinaryExpression::new(op, 
Reference::new(name), d))
+                    }
+                    Some(AdjustedProjection::Set(d)) => 
Predicate::Set(SetExpression::new(
+                        PredicateOperator::In,
+                        Reference::new(name),
+                        d,
+                    )),
+                };
+                return Ok(Some(predicate));
+            }
+        };
+
+        Ok(None)
+    }
+
+    /// Create a new `Datum` with adjusted projection boundary.
+    /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral`
+    /// can not be projected
+    fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> 
Result<Option<Datum>> {
+        let literal = datum.literal();
+
+        let projected_boundary = match op {
+            PredicateOperator::LessThan => match literal {
+                PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)),
+                PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)),
+                PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?),
+                PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)),
+                PrimitiveLiteral::Timestamp(v) => 
Some(Datum::timestamp_micros(v - 1)),
+                _ => Some(datum.to_owned()),
+            },
+            PredicateOperator::GreaterThan => match literal {
+                PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)),
+                PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)),
+                PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?),
+                PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)),
+                PrimitiveLiteral::Timestamp(v) => 
Some(Datum::timestamp_micros(v + 1)),
+                _ => Some(datum.to_owned()),
+            },
+            PredicateOperator::Eq
+            | PredicateOperator::LessThanOrEq
+            | PredicateOperator::GreaterThanOrEq
+            | PredicateOperator::StartsWith
+            | PredicateOperator::NotStartsWith => Some(datum.to_owned()),
+            _ => None,
+        };
+
+        Ok(projected_boundary)
+    }
+
+    /// Create a new `PredicateOperator`, rewritten for projection
+    fn projected_operator(

Review Comment:
   Reasonable to me.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to