wgtmac commented on code in PR #399:
URL: https://github.com/apache/iceberg-cpp/pull/399#discussion_r2606548014


##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);

Review Comment:
   ```suggestion
       return And::Make(left_result, right_result);
   ```



##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Or(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::Or(left_result, right_result);

Review Comment:
   ```suggestion
       return Or::Make(left_result, right_result);
   ```



##########
src/iceberg/expression/projections.h:
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/expression/projections.h
+/// Utils to project expressions on rows to expressions on partitions.
+
+#include <memory>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+/// \brief A class that projects expressions for a table's data rows into 
expressions on
+/// the table's partition values, for a table's partition spec.
+class ICEBERG_EXPORT ProjectionEvaluator {
+ public:
+  ~ProjectionEvaluator();
+
+  /// \brief Project the given row expression to a partition expression.
+  ///
+  /// \param expr an expression on data rows
+  /// \return an expression on partition data (depends on the projection)
+  Result<std::shared_ptr<Expression>> Project(const 
std::shared_ptr<Expression>& expr);
+
+ private:
+  friend class Projections;
+
+  /// \brief Create a ProjectionEvaluator.
+  ///
+  /// \param visitor The projection visitor to use
+  explicit ProjectionEvaluator(std::unique_ptr<class ProjectionVisitor> 
visitor);
+
+  std::unique_ptr<class ProjectionVisitor> visitor_;

Review Comment:
   ```suggestion
     std::unique_ptr<ProjectionVisitor> visitor_;
   ```



##########
src/iceberg/expression/projections.h:
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/expression/projections.h
+/// Utils to project expressions on rows to expressions on partitions.
+
+#include <memory>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+/// \brief A class that projects expressions for a table's data rows into 
expressions on
+/// the table's partition values, for a table's partition spec.
+class ICEBERG_EXPORT ProjectionEvaluator {
+ public:
+  ~ProjectionEvaluator();
+
+  /// \brief Project the given row expression to a partition expression.
+  ///
+  /// \param expr an expression on data rows
+  /// \return an expression on partition data (depends on the projection)
+  Result<std::shared_ptr<Expression>> Project(const 
std::shared_ptr<Expression>& expr);
+
+ private:
+  friend class Projections;
+
+  /// \brief Create a ProjectionEvaluator.
+  ///
+  /// \param visitor The projection visitor to use
+  explicit ProjectionEvaluator(std::unique_ptr<class ProjectionVisitor> 
visitor);
+
+  std::unique_ptr<class ProjectionVisitor> visitor_;
+};
+
+/// \brief Utils to project expressions on rows to expressions on partitions.
+///
+/// There are two types of projections: inclusive and strict.
+///
+/// An inclusive projection guarantees that if an expression matches a row, 
the projected
+/// expression will match the row's partition.
+///
+/// A strict projection guarantees that if a partition matches a projected 
expression,
+/// then all rows in that partition will match the original expression.
+class ICEBERG_EXPORT Projections {
+ public:

Review Comment:
   ```suggestion
   struct ICEBERG_EXPORT Projections {
   ```



##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Or(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::Or(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<UnboundPredicate>& pred) override {
+    ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(*schema_, 
case_sensitive_));
+    if (bound_pred->is_bound_predicate()) {
+      auto bound_predicate = 
std::dynamic_pointer_cast<BoundPredicate>(bound_pred);
+      ICEBERG_DCHECK(
+          bound_predicate != nullptr,
+          "Expected bound_predicate to be non-null after is_bound_predicate() 
check");
+      return Predicate(bound_predicate);

Review Comment:
   ```suggestion
         return 
Predicate(internal::checked_pointer_cast<BoundPredicate>(bound_pred));
   ```



##########
src/iceberg/expression/projections.h:
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/expression/projections.h
+/// Utils to project expressions on rows to expressions on partitions.
+
+#include <memory>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+/// \brief A class that projects expressions for a table's data rows into 
expressions on
+/// the table's partition values, for a table's partition spec.
+class ICEBERG_EXPORT ProjectionEvaluator {
+ public:
+  ~ProjectionEvaluator();
+
+  /// \brief Project the given row expression to a partition expression.
+  ///
+  /// \param expr an expression on data rows
+  /// \return an expression on partition data (depends on the projection)
+  Result<std::shared_ptr<Expression>> Project(const 
std::shared_ptr<Expression>& expr);
+
+ private:
+  friend class Projections;
+
+  /// \brief Create a ProjectionEvaluator.
+  ///
+  /// \param visitor The projection visitor to use
+  explicit ProjectionEvaluator(std::unique_ptr<class ProjectionVisitor> 
visitor);
+
+  std::unique_ptr<class ProjectionVisitor> visitor_;
+};
+
+/// \brief Utils to project expressions on rows to expressions on partitions.
+///
+/// There are two types of projections: inclusive and strict.
+///
+/// An inclusive projection guarantees that if an expression matches a row, 
the projected
+/// expression will match the row's partition.
+///
+/// A strict projection guarantees that if a partition matches a projected 
expression,
+/// then all rows in that partition will match the original expression.
+class ICEBERG_EXPORT Projections {
+ public:
+  /// \brief Creates an inclusive ProjectionEvaluator for the partition spec.
+  ///
+  /// An evaluator is used to project expressions for a table's data rows into 
expressions
+  /// on the table's partition values. The evaluator returned by this function 
is
+  /// inclusive and will build expressions with the following guarantee: if 
the original
+  /// expression matches a row, then the projected expression will match that 
row's
+  /// partition.
+  ///
+  /// Each predicate in the expression is projected using Transform::Project.
+  ///
+  /// \param spec a partition spec
+  /// \param case_sensitive whether the Projection should consider case 
sensitivity on
+  /// column names or not. Defaults to true (case sensitive).
+  /// \return an inclusive projection evaluator for the partition spec
+  static std::unique_ptr<ProjectionEvaluator> Inclusive(
+      const std::shared_ptr<PartitionSpec>& spec, const 
std::shared_ptr<Schema>& schema,
+      bool case_sensitive = true);
+
+  /// \brief Creates a strict ProjectionEvaluator for the partition spec.
+  ///
+  /// An evaluator is used to project expressions for a table's data rows into 
expressions
+  /// on the table's partition values. The evaluator returned by this function 
is strict
+  /// and will build expressions with the following guarantee: if the 
projected expression
+  /// matches a partition, then the original expression will match all rows in 
that
+  /// partition.
+  ///
+  /// Each predicate in the expression is projected using 
Transform::ProjectStrict.
+  ///
+  /// \param spec a partition spec
+  /// \param case_sensitive whether the Projection should consider case 
sensitivity on
+  /// column names or not. Defaults to true (case sensitive).
+  /// \return a strict projection evaluator for the partition spec
+  static std::unique_ptr<ProjectionEvaluator> Strict(
+      const std::shared_ptr<PartitionSpec>& spec, const 
std::shared_ptr<Schema>& schema,
+      bool case_sensitive = true);
+
+ private:
+  Projections() = default;

Review Comment:
   ```suggestion
   ```



##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Or(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::Or(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<UnboundPredicate>& pred) override {
+    ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(*schema_, 
case_sensitive_));
+    if (bound_pred->is_bound_predicate()) {
+      auto bound_predicate = 
std::dynamic_pointer_cast<BoundPredicate>(bound_pred);
+      ICEBERG_DCHECK(
+          bound_predicate != nullptr,
+          "Expected bound_predicate to be non-null after is_bound_predicate() 
check");
+      return Predicate(bound_predicate);
+    }
+    return bound_pred;
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    return InvalidExpression("Bound predicates are not supported in 
projections");
+  }
+
+ protected:
+  const std::shared_ptr<PartitionSpec>& spec_;
+  const std::shared_ptr<Schema>& schema_;
+  bool case_sensitive_;
+
+  /// \brief Get partition fields that match the predicate's term.
+  std::vector<const PartitionField*> GetFieldsByPredicate(
+      const std::shared_ptr<BoundPredicate>& pred) const {
+    int32_t source_id;
+    switch (pred->term()->kind()) {
+      case Term::Kind::kReference: {
+        const auto& ref = pred->term()->reference();
+        source_id = ref->field().field_id();
+        break;
+      }
+      case Term::Kind::kTransform: {
+        const auto& transform =
+            internal::checked_pointer_cast<BoundTransform>(pred->term());
+        source_id = transform->reference()->field().field_id();
+        break;
+      }
+      default:
+        std::unreachable();
+    }
+
+    std::vector<const PartitionField*> result;
+    for (const auto& field : spec_->fields()) {
+      if (field.source_id() == source_id) {
+        result.push_back(&field);
+      }
+    }
+    return result;
+  }
+};
+
+ProjectionEvaluator::ProjectionEvaluator(std::unique_ptr<ProjectionVisitor> 
visitor)
+    : visitor_(std::move(visitor)) {}
+
+ProjectionEvaluator::~ProjectionEvaluator() = default;
+
+/// \brief Inclusive projection visitor.
+///
+/// Uses AND to combine projections from multiple partition fields.
+class InclusiveProjectionVisitor : public ProjectionVisitor {
+ public:
+  ~InclusiveProjectionVisitor() override = default;
+
+  InclusiveProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                             const std::shared_ptr<Schema>& schema, bool 
case_sensitive)
+      : ProjectionVisitor(spec, schema, case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
+    // Find partition fields that match the predicate's term
+    auto partition_fields = GetFieldsByPredicate(pred);
+    if (partition_fields.empty()) {
+      // The predicate has no partition column
+      return AlwaysTrue();
+    }
+
+    // Project the predicate for each partition field and combine with AND
+    //
+    // consider (d = 2019-01-01) with bucket(7, d) and bucket(5, d)
+    // projections: b1 = bucket(7, '2019-01-01') = 5, b2 = bucket(5, 
'2019-01-01') = 0
+    // any value where b1 != 5 or any value where b2 != 0 cannot be the 
'2019-01-01'
+    //
+    // similarly, if partitioning by day(ts) and hour(ts), the more restrictive
+    // projection should be used. ts = 2019-01-01T01:00:00 produces 
day=2019-01-01 and
+    // hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 
2019-01-01-02.
+    std::shared_ptr<Expression> result = True::Instance();
+    for (const auto* part_field : partition_fields) {
+      ICEBERG_ASSIGN_OR_RAISE(auto projected,
+                              
part_field->transform()->Project(part_field->name(), pred));
+      if (projected != nullptr) {
+        result =
+            Expressions::And(result, 
std::shared_ptr<Expression>(projected.release()));

Review Comment:
   ```suggestion
           ICEBERG_ASSIGN_OR_RAISE(result, And::Make(std::move(result), 
std::move(projected)));
   ```



##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Or(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::Or(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<UnboundPredicate>& pred) override {
+    ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(*schema_, 
case_sensitive_));
+    if (bound_pred->is_bound_predicate()) {
+      auto bound_predicate = 
std::dynamic_pointer_cast<BoundPredicate>(bound_pred);
+      ICEBERG_DCHECK(
+          bound_predicate != nullptr,
+          "Expected bound_predicate to be non-null after is_bound_predicate() 
check");
+      return Predicate(bound_predicate);
+    }
+    return bound_pred;
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    return InvalidExpression("Bound predicates are not supported in 
projections");
+  }
+
+ protected:
+  const std::shared_ptr<PartitionSpec>& spec_;
+  const std::shared_ptr<Schema>& schema_;
+  bool case_sensitive_;
+
+  /// \brief Get partition fields that match the predicate's term.
+  std::vector<const PartitionField*> GetFieldsByPredicate(
+      const std::shared_ptr<BoundPredicate>& pred) const {
+    int32_t source_id;
+    switch (pred->term()->kind()) {
+      case Term::Kind::kReference: {
+        const auto& ref = pred->term()->reference();
+        source_id = ref->field().field_id();
+        break;
+      }
+      case Term::Kind::kTransform: {
+        const auto& transform =
+            internal::checked_pointer_cast<BoundTransform>(pred->term());
+        source_id = transform->reference()->field().field_id();
+        break;
+      }
+      default:
+        std::unreachable();
+    }
+
+    std::vector<const PartitionField*> result;
+    for (const auto& field : spec_->fields()) {
+      if (field.source_id() == source_id) {
+        result.push_back(&field);
+      }
+    }
+    return result;
+  }
+};
+
+ProjectionEvaluator::ProjectionEvaluator(std::unique_ptr<ProjectionVisitor> 
visitor)
+    : visitor_(std::move(visitor)) {}
+
+ProjectionEvaluator::~ProjectionEvaluator() = default;
+
+/// \brief Inclusive projection visitor.
+///
+/// Uses AND to combine projections from multiple partition fields.
+class InclusiveProjectionVisitor : public ProjectionVisitor {
+ public:
+  ~InclusiveProjectionVisitor() override = default;
+
+  InclusiveProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                             const std::shared_ptr<Schema>& schema, bool 
case_sensitive)
+      : ProjectionVisitor(spec, schema, case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
+    // Find partition fields that match the predicate's term
+    auto partition_fields = GetFieldsByPredicate(pred);
+    if (partition_fields.empty()) {
+      // The predicate has no partition column
+      return AlwaysTrue();
+    }
+
+    // Project the predicate for each partition field and combine with AND
+    //
+    // consider (d = 2019-01-01) with bucket(7, d) and bucket(5, d)
+    // projections: b1 = bucket(7, '2019-01-01') = 5, b2 = bucket(5, 
'2019-01-01') = 0
+    // any value where b1 != 5 or any value where b2 != 0 cannot be the 
'2019-01-01'
+    //
+    // similarly, if partitioning by day(ts) and hour(ts), the more restrictive
+    // projection should be used. ts = 2019-01-01T01:00:00 produces 
day=2019-01-01 and
+    // hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 
2019-01-01-02.
+    std::shared_ptr<Expression> result = True::Instance();
+    for (const auto* part_field : partition_fields) {
+      ICEBERG_ASSIGN_OR_RAISE(auto projected,
+                              
part_field->transform()->Project(part_field->name(), pred));
+      if (projected != nullptr) {
+        result =
+            Expressions::And(result, 
std::shared_ptr<Expression>(projected.release()));
+      }
+    }
+
+    return result;
+  }
+
+ protected:

Review Comment:
   ```suggestion
   ```



##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Or(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::Or(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<UnboundPredicate>& pred) override {
+    ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(*schema_, 
case_sensitive_));
+    if (bound_pred->is_bound_predicate()) {
+      auto bound_predicate = 
std::dynamic_pointer_cast<BoundPredicate>(bound_pred);
+      ICEBERG_DCHECK(
+          bound_predicate != nullptr,
+          "Expected bound_predicate to be non-null after is_bound_predicate() 
check");
+      return Predicate(bound_predicate);
+    }
+    return bound_pred;
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    return InvalidExpression("Bound predicates are not supported in 
projections");
+  }
+
+ protected:
+  const std::shared_ptr<PartitionSpec>& spec_;
+  const std::shared_ptr<Schema>& schema_;
+  bool case_sensitive_;
+
+  /// \brief Get partition fields that match the predicate's term.
+  std::vector<const PartitionField*> GetFieldsByPredicate(
+      const std::shared_ptr<BoundPredicate>& pred) const {
+    int32_t source_id;
+    switch (pred->term()->kind()) {
+      case Term::Kind::kReference: {
+        const auto& ref = pred->term()->reference();
+        source_id = ref->field().field_id();
+        break;
+      }
+      case Term::Kind::kTransform: {
+        const auto& transform =
+            internal::checked_pointer_cast<BoundTransform>(pred->term());
+        source_id = transform->reference()->field().field_id();
+        break;
+      }
+      default:
+        std::unreachable();
+    }
+
+    std::vector<const PartitionField*> result;
+    for (const auto& field : spec_->fields()) {
+      if (field.source_id() == source_id) {
+        result.push_back(&field);
+      }
+    }
+    return result;
+  }
+};
+
+ProjectionEvaluator::ProjectionEvaluator(std::unique_ptr<ProjectionVisitor> 
visitor)
+    : visitor_(std::move(visitor)) {}
+
+ProjectionEvaluator::~ProjectionEvaluator() = default;
+
+/// \brief Inclusive projection visitor.
+///
+/// Uses AND to combine projections from multiple partition fields.
+class InclusiveProjectionVisitor : public ProjectionVisitor {
+ public:
+  ~InclusiveProjectionVisitor() override = default;
+
+  InclusiveProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                             const std::shared_ptr<Schema>& schema, bool 
case_sensitive)
+      : ProjectionVisitor(spec, schema, case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
+    // Find partition fields that match the predicate's term
+    auto partition_fields = GetFieldsByPredicate(pred);
+    if (partition_fields.empty()) {
+      // The predicate has no partition column
+      return AlwaysTrue();
+    }
+
+    // Project the predicate for each partition field and combine with AND
+    //
+    // consider (d = 2019-01-01) with bucket(7, d) and bucket(5, d)
+    // projections: b1 = bucket(7, '2019-01-01') = 5, b2 = bucket(5, 
'2019-01-01') = 0
+    // any value where b1 != 5 or any value where b2 != 0 cannot be the 
'2019-01-01'
+    //
+    // similarly, if partitioning by day(ts) and hour(ts), the more restrictive
+    // projection should be used. ts = 2019-01-01T01:00:00 produces 
day=2019-01-01 and
+    // hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 
2019-01-01-02.
+    std::shared_ptr<Expression> result = True::Instance();
+    for (const auto* part_field : partition_fields) {
+      ICEBERG_ASSIGN_OR_RAISE(auto projected,
+                              
part_field->transform()->Project(part_field->name(), pred));
+      if (projected != nullptr) {
+        result =
+            Expressions::And(result, 
std::shared_ptr<Expression>(projected.release()));
+      }
+    }
+
+    return result;
+  }
+
+ protected:
+};
+
+/// \brief Strict projection evaluator.
+///
+/// Uses OR to combine projections from multiple partition fields.
+class StrictProjectionVisitor : public ProjectionVisitor {
+ public:
+  ~StrictProjectionVisitor() override = default;
+
+  StrictProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                          const std::shared_ptr<Schema>& schema, bool 
case_sensitive)
+      : ProjectionVisitor(spec, schema, case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
+    // Find partition fields that match the predicate's term
+    auto partition_fields = GetFieldsByPredicate(pred);
+    if (partition_fields.empty()) {
+      // The predicate has no matching partition columns
+      return AlwaysFalse();
+    }
+
+    // Project the predicate for each partition field and combine with OR
+    //
+    // consider (ts > 2019-01-01T01:00:00) with day(ts) and hour(ts)
+    // projections: d >= 2019-01-02 and h >= 2019-01-01-02 (note the inclusive 
bounds).
+    // any timestamp where either projection predicate is true must match the 
original
+    // predicate. For example, ts = 2019-01-01T03:00:00 matches the hour 
projection but
+    // not the day, but does match the original predicate.
+    std::shared_ptr<Expression> result = False::Instance();
+    for (const auto* part_field : partition_fields) {
+      ICEBERG_ASSIGN_OR_RAISE(auto projected, 
part_field->transform()->ProjectStrict(
+                                                  part_field->name(), pred));
+      if (projected != nullptr) {
+        result =

Review Comment:
   ditto



##########
src/iceberg/expression/projections.cc:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/projections.h"
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/expressions.h"
+#include "iceberg/expression/predicate.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/partition_field.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
+#include "iceberg/transform.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+// Implementation detail - not exported
+class ProjectionVisitor : public 
ExpressionVisitor<std::shared_ptr<Expression>> {
+ public:
+  ~ProjectionVisitor() override = default;
+
+  ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
+                    const std::shared_ptr<Schema>& schema, bool case_sensitive)
+      : spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
+
+  Result<std::shared_ptr<Expression>> AlwaysTrue() override { return 
True::Instance(); }
+
+  Result<std::shared_ptr<Expression>> AlwaysFalse() override { return 
False::Instance(); }
+
+  Result<std::shared_ptr<Expression>> Not(
+      const std::shared_ptr<Expression>& child_result) override {
+    return InvalidExpression("Project called on expression with a not");
+  }
+
+  Result<std::shared_ptr<Expression>> And(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::And(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Or(
+      const std::shared_ptr<Expression>& left_result,
+      const std::shared_ptr<Expression>& right_result) override {
+    return Expressions::Or(left_result, right_result);
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<UnboundPredicate>& pred) override {
+    ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(*schema_, 
case_sensitive_));
+    if (bound_pred->is_bound_predicate()) {
+      auto bound_predicate = 
std::dynamic_pointer_cast<BoundPredicate>(bound_pred);
+      ICEBERG_DCHECK(
+          bound_predicate != nullptr,
+          "Expected bound_predicate to be non-null after is_bound_predicate() 
check");
+      return Predicate(bound_predicate);
+    }
+    return bound_pred;
+  }
+
+  Result<std::shared_ptr<Expression>> Predicate(
+      const std::shared_ptr<BoundPredicate>& pred) override {
+    return InvalidExpression("Bound predicates are not supported in 
projections");
+  }
+
+ protected:
+  const std::shared_ptr<PartitionSpec>& spec_;
+  const std::shared_ptr<Schema>& schema_;
+  bool case_sensitive_;
+
+  /// \brief Get partition fields that match the predicate's term.
+  std::vector<const PartitionField*> GetFieldsByPredicate(

Review Comment:
   Let's port `PartiitonSpec.getFieldsBySourceId` from Java since it is reused 
by multiple places.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to