This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 320a985  feat: add eval support to bound term (#320)
320a985 is described below

commit 320a985e6ede52ce05e513bbc198069333fea193
Author: Gang Wu <[email protected]>
AuthorDate: Wed Nov 19 16:11:24 2025 +0800

    feat: add eval support to bound term (#320)
    
    - add struct like accessor
    - support schema to find accessor by field id
    - bound term can evaluate struct-like
---
 src/iceberg/CMakeLists.txt           |   1 +
 src/iceberg/expression/term.cc       |  30 +++--
 src/iceberg/expression/term.h        |   6 +-
 src/iceberg/meson.build              |   1 +
 src/iceberg/row/struct_like.cc       | 129 ++++++++++++++++++
 src/iceberg/row/struct_like.h        |  29 ++++-
 src/iceberg/schema.cc                |  66 ++++++++++
 src/iceberg/schema.h                 |  10 ++
 src/iceberg/test/CMakeLists.txt      |   6 +
 src/iceberg/test/eval_expr_test.cc   | 245 +++++++++++++++++++++++++++++++++++
 src/iceberg/test/struct_like_test.cc |  51 ++++++++
 src/iceberg/type_fwd.h               |   3 +-
 12 files changed, 564 insertions(+), 13 deletions(-)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index c048d29..22c2221 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -43,6 +43,7 @@ set(ICEBERG_SOURCES
     partition_spec.cc
     row/arrow_array_wrapper.cc
     row/manifest_wrapper.cc
+    row/struct_like.cc
     schema.cc
     schema_field.cc
     schema_internal.cc
diff --git a/src/iceberg/expression/term.cc b/src/iceberg/expression/term.cc
index ba6e55e..34dfb91 100644
--- a/src/iceberg/expression/term.cc
+++ b/src/iceberg/expression/term.cc
@@ -21,8 +21,8 @@
 
 #include <format>
 
-#include "iceberg/exception.h"
 #include "iceberg/result.h"
+#include "iceberg/row/struct_like.h"
 #include "iceberg/schema.h"
 #include "iceberg/transform.h"
 #include "iceberg/util/checked_cast.h"
@@ -64,7 +64,11 @@ Result<std::shared_ptr<BoundReference>> 
NamedReference::Bind(const Schema& schem
     return InvalidExpression("Cannot find field '{}' in struct: {}", 
field_name_,
                              schema.ToString());
   }
-  return BoundReference::Make(field_opt.value().get());
+
+  int32_t field_id = field_opt.value().get().field_id();
+  ICEBERG_ASSIGN_OR_RAISE(auto accessor, schema.GetAccessorById(field_id));
+
+  return BoundReference::Make(field_opt.value().get(), std::move(accessor));
 }
 
 std::string NamedReference::ToString() const {
@@ -72,17 +76,25 @@ std::string NamedReference::ToString() const {
 }
 
 // BoundReference implementation
-Result<std::unique_ptr<BoundReference>> BoundReference::Make(SchemaField 
field) {
+Result<std::unique_ptr<BoundReference>> BoundReference::Make(
+    SchemaField field, std::unique_ptr<StructLikeAccessor> accessor) {
   if (auto status = field.Validate(); !status.has_value()) [[unlikely]] {
     return InvalidExpression("Cannot create BoundReference with invalid field: 
{}",
                              status.error().message);
   }
-  return std::unique_ptr<BoundReference>(new BoundReference(std::move(field)));
+  if (!accessor) [[unlikely]] {
+    return InvalidExpression("Cannot create BoundReference without accessor");
+  }
+  return std::unique_ptr<BoundReference>(
+      new BoundReference(std::move(field), std::move(accessor)));
 }
 
-BoundReference::BoundReference(SchemaField field) : field_(std::move(field)) {
+BoundReference::BoundReference(SchemaField field,
+                               std::unique_ptr<StructLikeAccessor> accessor)
+    : field_(std::move(field)), accessor_(std::move(accessor)) {
   ICEBERG_DCHECK(field_.Validate().has_value(),
                  "Cannot create BoundReference with invalid field");
+  ICEBERG_DCHECK(accessor_ != nullptr, "Cannot create BoundReference without 
accessor");
 }
 
 BoundReference::~BoundReference() = default;
@@ -92,7 +104,7 @@ std::string BoundReference::ToString() const {
 }
 
 Result<Literal> BoundReference::Evaluate(const StructLike& data) const {
-  return NotImplemented("BoundReference::Evaluate(StructLike) not 
implemented");
+  return accessor_->GetLiteral(data);
 }
 
 bool BoundReference::Equals(const BoundTerm& other) const {
@@ -167,14 +179,14 @@ std::string BoundTransform::ToString() const {
 }
 
 Result<Literal> BoundTransform::Evaluate(const StructLike& data) const {
-  throw IcebergError("BoundTransform::Evaluate(StructLike) not implemented");
+  ICEBERG_ASSIGN_OR_RAISE(auto literal, ref_->Evaluate(data));
+  return transform_func_->Transform(literal);
 }
 
 bool BoundTransform::MayProduceNull() const {
   // transforms must produce null for null input values
   // transforms may produce null for non-null inputs when not order-preserving
-  // FIXME: add Transform::is_order_preserving()
-  return ref_->MayProduceNull();  // || !transform_->is_order_preserving();
+  return ref_->MayProduceNull() || !transform_->PreservesOrder();
 }
 
 std::shared_ptr<Type> BoundTransform::type() const {
diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h
index 6259b82..e2a378f 100644
--- a/src/iceberg/expression/term.h
+++ b/src/iceberg/expression/term.h
@@ -163,7 +163,8 @@ class ICEBERG_EXPORT BoundReference
   /// \brief Create a bound reference.
   ///
   /// \param field The schema field
-  static Result<std::unique_ptr<BoundReference>> Make(SchemaField field);
+  static Result<std::unique_ptr<BoundReference>> Make(
+      SchemaField field, std::unique_ptr<StructLikeAccessor> accessor);
 
   ~BoundReference() override;
 
@@ -186,9 +187,10 @@ class ICEBERG_EXPORT BoundReference
   Kind kind() const override { return Kind::kReference; }
 
  private:
-  explicit BoundReference(SchemaField field);
+  BoundReference(SchemaField field, std::unique_ptr<StructLikeAccessor> 
accessor);
 
   SchemaField field_;
+  std::unique_ptr<StructLikeAccessor> accessor_;
 };
 
 /// \brief An unbound transform expression.
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 3a4f888..ae5f8ba 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -65,6 +65,7 @@ iceberg_sources = files(
     'partition_spec.cc',
     'row/arrow_array_wrapper.cc',
     'row/manifest_wrapper.cc',
+    'row/struct_like.cc',
     'schema.cc',
     'schema_field.cc',
     'schema_internal.cc',
diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc
new file mode 100644
index 0000000..b0fb67f
--- /dev/null
+++ b/src/iceberg/row/struct_like.cc
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/row/struct_like.h"
+
+#include <utility>
+
+#include "iceberg/result.h"
+#include "iceberg/util/checked_cast.h"
+#include "iceberg/util/formatter_internal.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
+                                       std::span<const size_t> position_path)
+    : type_(std::move(type)) {
+  if (position_path.size() == 1) {
+    accessor_ = [pos =
+                     position_path[0]](const StructLike& struct_like) -> 
Result<Scalar> {
+      return struct_like.GetField(pos);
+    };
+  } else if (position_path.size() == 2) {
+    accessor_ = [pos0 = position_path[0], pos1 = position_path[1]](
+                    const StructLike& struct_like) -> Result<Scalar> {
+      ICEBERG_ASSIGN_OR_RAISE(auto first_level_field, 
struct_like.GetField(pos0));
+      if 
(!std::holds_alternative<std::shared_ptr<StructLike>>(first_level_field)) {
+        return InvalidSchema("Encountered non-struct in the position path 
[{},{}]", pos0,
+                             pos1);
+      }
+      return 
std::get<std::shared_ptr<StructLike>>(first_level_field)->GetField(pos1);
+    };
+  } else if (!position_path.empty()) {
+    accessor_ = [position_path](const StructLike& struct_like) -> 
Result<Scalar> {
+      std::vector<std::shared_ptr<StructLike>> backups;
+      const StructLike* current_struct_like = &struct_like;
+      for (size_t i = 0; i < position_path.size() - 1; ++i) {
+        ICEBERG_ASSIGN_OR_RAISE(auto field,
+                                
current_struct_like->GetField(position_path[i]));
+        if (!std::holds_alternative<std::shared_ptr<StructLike>>(field)) {
+          return InvalidSchema("Encountered non-struct in the position path 
[{}]",
+                               position_path);
+        }
+        backups.push_back(std::get<std::shared_ptr<StructLike>>(field));
+        current_struct_like = backups.back().get();
+      }
+      return current_struct_like->GetField(position_path.back());
+    };
+  } else {
+    accessor_ = [](const StructLike&) -> Result<Scalar> {
+      return Invalid("Cannot read StructLike with empty position path");
+    };
+  }
+}
+
+Result<Literal> StructLikeAccessor::GetLiteral(const StructLike& struct_like) 
const {
+  if (!type_->is_primitive()) {
+    return NotSupported("Cannot get literal value for non-primitive type {}",
+                        type_->ToString());
+  }
+
+  ICEBERG_ASSIGN_OR_RAISE(auto scalar, Get(struct_like));
+
+  if (std::holds_alternative<std::monostate>(scalar)) {
+    return Literal::Null(internal::checked_pointer_cast<PrimitiveType>(type_));
+  }
+
+  switch (type_->type_id()) {
+    case TypeId::kBoolean:
+      return Literal::Boolean(std::get<bool>(scalar));
+    case TypeId::kInt:
+      return Literal::Int(std::get<int32_t>(scalar));
+    case TypeId::kLong:
+      return Literal::Long(std::get<int64_t>(scalar));
+    case TypeId::kFloat:
+      return Literal::Float(std::get<float>(scalar));
+    case TypeId::kDouble:
+      return Literal::Double(std::get<double>(scalar));
+    case TypeId::kString:
+      return Literal::String(std::string(std::get<std::string_view>(scalar)));
+    case TypeId::kBinary: {
+      auto binary_data = std::get<std::string_view>(scalar);
+      return Literal::Binary(
+          std::vector<uint8_t>(binary_data.cbegin(), binary_data.cend()));
+    }
+    case TypeId::kDecimal: {
+      const auto& decimal_type = internal::checked_cast<const 
DecimalType&>(*type_);
+      return Literal::Decimal(std::get<Decimal>(scalar).value(), 
decimal_type.precision(),
+                              decimal_type.scale());
+    }
+    case TypeId::kDate:
+      return Literal::Date(std::get<int32_t>(scalar));
+    case TypeId::kTime:
+      return Literal::Time(std::get<int64_t>(scalar));
+    case TypeId::kTimestamp:
+      return Literal::Timestamp(std::get<int64_t>(scalar));
+    case TypeId::kTimestampTz:
+      return Literal::TimestampTz(std::get<int64_t>(scalar));
+    case TypeId::kFixed: {
+      const auto& fixed_data = std::get<std::string_view>(scalar);
+      return Literal::Fixed(std::vector<uint8_t>(fixed_data.cbegin(), 
fixed_data.cend()));
+    }
+    case TypeId::kUuid:
+      // TODO(gangwu): Implement UUID type
+    default:
+      return NotSupported("Cannot convert scalar to literal of type {}",
+                          type_->ToString());
+  }
+
+  std::unreachable();
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/row/struct_like.h b/src/iceberg/row/struct_like.h
index 3093f75..4999da6 100644
--- a/src/iceberg/row/struct_like.h
+++ b/src/iceberg/row/struct_like.h
@@ -26,11 +26,13 @@
 /// ManifestEntry.  Note that they do not carry type information and should be
 /// used in conjunction with the schema to get the type information.
 
+#include <functional>
 #include <memory>
+#include <span>
 #include <string_view>
 #include <variant>
-#include <vector>
 
+#include "iceberg/expression/literal.h"
 #include "iceberg/result.h"
 #include "iceberg/type_fwd.h"
 #include "iceberg/util/decimal.h"
@@ -96,4 +98,29 @@ class ICEBERG_EXPORT MapLike {
   virtual size_t size() const = 0;
 };
 
+/// \brief An accessor for a struct-like object.
+class ICEBERG_EXPORT StructLikeAccessor {
+ public:
+  explicit StructLikeAccessor(std::shared_ptr<Type> type,
+                              std::span<const size_t> position_path);
+
+  /// \brief Get the scalar value at the given position.
+  Result<Scalar> Get(const StructLike& struct_like) const {
+    return accessor_(struct_like);
+  }
+
+  /// \brief Get the literal value at the given position.
+  ///
+  /// \return The literal value at the given position, or an error if it is
+  /// not a primitive type.
+  Result<Literal> GetLiteral(const StructLike& struct_like) const;
+
+  /// \brief Get the type of the value that this accessor is bound to.
+  const Type& type() const { return *type_; }
+
+ private:
+  std::shared_ptr<Type> type_;
+  std::function<Result<Scalar>(const StructLike&)> accessor_;
+};
+
 }  // namespace iceberg
diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc
index bfb47b3..8719f22 100644
--- a/src/iceberg/schema.cc
+++ b/src/iceberg/schema.cc
@@ -22,9 +22,12 @@
 #include <format>
 #include <functional>
 
+#include "iceberg/result.h"
+#include "iceberg/row/struct_like.h"
 #include "iceberg/schema_internal.h"
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
+#include "iceberg/util/formatter_internal.h"
 #include "iceberg/util/macros.h"
 #include "iceberg/util/visit_type.h"
 
@@ -69,6 +72,48 @@ class NameToIdVisitor {
   std::function<std::string(std::string_view)> quoting_func_;
 };
 
+class PositionPathVisitor {
+ public:
+  Status Visit(const PrimitiveType& type) {
+    if (current_field_id_ == kUnassignedFieldId) {
+      return InvalidSchema("Current field id is not assigned, type: {}", 
type.ToString());
+    }
+
+    if (auto ret = position_path_.try_emplace(current_field_id_, 
current_path_);
+        !ret.second) {
+      return InvalidSchema("Duplicate field id found: {}, prev path: {}, curr 
path: {}",
+                           current_field_id_, ret.first->second, 
current_path_);
+    }
+
+    return {};
+  }
+
+  Status Visit(const StructType& type) {
+    for (size_t i = 0; i < type.fields().size(); ++i) {
+      const auto& field = type.fields()[i];
+      current_field_id_ = field.field_id();
+      current_path_.push_back(i);
+      ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*field.type(), this));
+      current_path_.pop_back();
+    }
+    return {};
+  }
+
+  // Non-struct types are not supported yet, but it is not an error.
+  Status Visit(const ListType& type) { return {}; }
+  Status Visit(const MapType& type) { return {}; }
+
+  std::unordered_map<int32_t, std::vector<size_t>> Finish() {
+    return std::move(position_path_);
+  }
+
+ private:
+  constexpr static int32_t kUnassignedFieldId = -1;
+  int32_t current_field_id_ = kUnassignedFieldId;
+  std::vector<size_t> current_path_;
+  std::unordered_map<int32_t, std::vector<size_t>> position_path_;
+};
+
 Schema::Schema(std::vector<SchemaField> fields, std::optional<int32_t> 
schema_id)
     : StructType(std::move(fields)), schema_id_(schema_id) {}
 
@@ -144,6 +189,27 @@ Result<std::optional<std::reference_wrapper<const 
SchemaField>>> Schema::FindFie
   return it->second;
 }
 
+Result<std::unordered_map<int32_t, std::vector<size_t>>> 
Schema::InitIdToPositionPath(
+    const Schema& self) {
+  PositionPathVisitor visitor;
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(self, &visitor));
+  return visitor.Finish();
+}
+
+Result<std::unique_ptr<StructLikeAccessor>> Schema::GetAccessorById(
+    int32_t field_id) const {
+  ICEBERG_ASSIGN_OR_RAISE(auto id_to_position_path, 
id_to_position_path_.Get(*this));
+  if (auto it = id_to_position_path.get().find(field_id);
+      it != id_to_position_path.get().cend()) {
+    ICEBERG_ASSIGN_OR_RAISE(auto field, FindFieldById(field_id));
+    if (!field.has_value()) {
+      return NotFound("Cannot get accessor for field id: {}", field_id);
+    }
+    return std::make_unique<StructLikeAccessor>(field.value().get().type(), 
it->second);
+  }
+  return NotFound("Cannot get accessor for field id: {}", field_id);
+}
+
 IdToFieldVisitor::IdToFieldVisitor(
     std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
     : id_to_field_(id_to_field) {}
diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h
index 32914be..94a8764 100644
--- a/src/iceberg/schema.h
+++ b/src/iceberg/schema.h
@@ -75,6 +75,12 @@ class ICEBERG_EXPORT Schema : public StructType {
   Result<std::optional<std::reference_wrapper<const SchemaField>>> 
FindFieldById(
       int32_t field_id) const;
 
+  /// \brief Get the accessor to access the field by field id.
+  ///
+  /// \param field_id The id of the field to get the accessor for.
+  /// \return The accessor to access the field, or NotFound if the field is 
not found.
+  Result<std::unique_ptr<StructLikeAccessor>> GetAccessorById(int32_t 
field_id) const;
+
   /// \brief Creates a projected schema from selected field names.
   ///
   /// \param names Selected field names and nested names are dot-concatenated.
@@ -106,6 +112,8 @@ class ICEBERG_EXPORT Schema : public StructType {
   InitNameToIdMap(const Schema&);
   static Result<std::unordered_map<std::string, int32_t, StringHash, 
std::equal_to<>>>
   InitLowerCaseNameToIdMap(const Schema&);
+  static Result<std::unordered_map<int32_t, std::vector<size_t>>> 
InitIdToPositionPath(
+      const Schema&);
 
   const std::optional<int32_t> schema_id_;
   /// Mapping from field id to field.
@@ -114,6 +122,8 @@ class ICEBERG_EXPORT Schema : public StructType {
   Lazy<InitNameToIdMap> name_to_id_;
   /// Mapping from lowercased field name to field id
   Lazy<InitLowerCaseNameToIdMap> lowercase_name_to_id_;
+  /// Mapping from field id to (nested) position path to access the field.
+  Lazy<InitIdToPositionPath> id_to_position_path_;
 };
 
 }  // namespace iceberg
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index f1eb77e..d82fe17 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -141,6 +141,12 @@ if(ICEBERG_BUILD_BUNDLE)
                    test_common.cc
                    in_memory_catalog_test.cc)
 
+  add_iceberg_test(eval_expr_test
+                   USE_BUNDLE
+                   SOURCES
+                   eval_expr_test.cc
+                   test_common.cc)
+
   add_iceberg_test(parquet_test
                    USE_BUNDLE
                    SOURCES
diff --git a/src/iceberg/test/eval_expr_test.cc 
b/src/iceberg/test/eval_expr_test.cc
new file mode 100644
index 0000000..880f1ff
--- /dev/null
+++ b/src/iceberg/test/eval_expr_test.cc
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow/c/bridge.h>
+#include <arrow/json/from_string.h>
+#include <arrow/type.h>
+#include <arrow/type_fwd.h>
+#include <gtest/gtest.h>
+
+#include "iceberg/arrow_c_data.h"
+#include "iceberg/arrow_c_data_guard_internal.h"
+#include "iceberg/expression/expression.h"
+#include "iceberg/expression/literal.h"
+#include "iceberg/expression/term.h"
+#include "iceberg/row/arrow_array_wrapper.h"
+#include "iceberg/schema.h"
+#include "iceberg/schema_internal.h"
+#include "iceberg/test/matchers.h"
+#include "iceberg/transform.h"
+#include "iceberg/type.h"
+
+namespace iceberg {
+
+class BoundExpressionTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    schema_ = std::make_unique<Schema>(std::vector<SchemaField>{
+        SchemaField::MakeOptional(1, "id", int32()),
+        SchemaField::MakeOptional(2, "name", string()),
+        SchemaField::MakeRequired(3, "timestamp_field", timestamp()),
+        SchemaField::MakeRequired(4, "string_field", string())});
+
+    arrow_data_type_ = ::arrow::struct_(
+        {::arrow::field("id", ::arrow::int32()), ::arrow::field("name", 
::arrow::utf8()),
+         ::arrow::field("timestamp_field", 
::arrow::timestamp(::arrow::TimeUnit::MICRO)),
+         ::arrow::field("string_field", ::arrow::utf8())});
+
+    arrow_array_ = ::arrow::json::ArrayFromJSONString(arrow_data_type_, R"([
+          {"id": 1, "name": "Alice", "timestamp_field": 1609459200000000, 
"string_field": "hello_world"},
+          {"id": 2, "name": null, "timestamp_field": 1609459200000000, 
"string_field": "hello_world"}
+        ])")
+                       .ValueOrDie();
+
+    ASSERT_TRUE(::arrow::ExportType(*arrow_data_type_, &arrow_c_schema_).ok());
+    ASSERT_TRUE(::arrow::ExportArray(*arrow_array_, &arrow_c_array_).ok());
+  }
+
+  void TearDown() override {
+    if (arrow_c_schema_.release != nullptr) {
+      ArrowSchemaRelease(&arrow_c_schema_);
+    }
+    if (arrow_c_array_.release != nullptr) {
+      ArrowArrayRelease(&arrow_c_array_);
+    }
+  }
+
+  std::unique_ptr<Schema> schema_;
+  std::shared_ptr<::arrow::DataType> arrow_data_type_;
+  std::shared_ptr<::arrow::Array> arrow_array_;
+  ArrowSchema arrow_c_schema_;
+  ArrowArray arrow_c_array_;
+};
+
+TEST_F(BoundExpressionTest, EvaluateBoundReference) {
+  ICEBERG_UNWRAP_OR_FAIL(auto id_ref, NamedReference::Make("id"));
+  ICEBERG_UNWRAP_OR_FAIL(auto id_bound_ref,
+                         id_ref->Bind(*schema_, /*case_sensitive=*/true));
+
+  ICEBERG_UNWRAP_OR_FAIL(auto name_ref, NamedReference::Make("name"));
+  ICEBERG_UNWRAP_OR_FAIL(auto name_bound_ref,
+                         name_ref->Bind(*schema_, /*case_sensitive=*/true));
+
+  struct TestCase {
+    size_t row_index;
+    Literal expected_id;
+    Literal expected_name;
+  };
+
+  for (const auto& test_case : std::vector<TestCase>{
+           {.row_index = 0,
+            .expected_id = Literal::Int(1),
+            .expected_name = Literal::String("Alice")},
+           {.row_index = 1,
+            .expected_id = Literal::Int(2),
+            .expected_name = Literal::Null(string())},
+       }) {
+    ICEBERG_UNWRAP_OR_FAIL(
+        auto struct_like,
+        ArrowArrayStructLike::Make(arrow_c_schema_, arrow_c_array_, 
test_case.row_index));
+
+    ICEBERG_UNWRAP_OR_FAIL(auto id_literal, 
id_bound_ref->Evaluate(*struct_like));
+    EXPECT_EQ(id_literal, test_case.expected_id);
+
+    ICEBERG_UNWRAP_OR_FAIL(auto name_literal, 
name_bound_ref->Evaluate(*struct_like));
+    if (test_case.expected_name.IsNull()) {
+      EXPECT_TRUE(name_literal.IsNull());
+    } else {
+      EXPECT_EQ(name_literal, test_case.expected_name);
+    }
+  }
+}
+
+TEST_F(BoundExpressionTest, IdentityTransform) {
+  ICEBERG_UNWRAP_OR_FAIL(auto name_ref, NamedReference::Make("name"));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto name_transform,
+      UnboundTransform::Make(std::move(name_ref), Transform::Identity()));
+  ICEBERG_UNWRAP_OR_FAIL(auto bound_transform,
+                         name_transform->Bind(*schema_, 
/*case_sensitive=*/true));
+
+  struct TestCase {
+    size_t row_index;
+    Literal expected_name;
+  };
+
+  for (const auto& test_case : std::vector<TestCase>{
+           {.row_index = 0, .expected_name = Literal::String("Alice")},
+           {.row_index = 1, .expected_name = Literal::Null(string())},
+       }) {
+    ICEBERG_UNWRAP_OR_FAIL(
+        auto struct_like,
+        ArrowArrayStructLike::Make(arrow_c_schema_, arrow_c_array_, 
test_case.row_index));
+    ICEBERG_UNWRAP_OR_FAIL(auto result, 
bound_transform->Evaluate(*struct_like));
+    if (test_case.expected_name.IsNull()) {
+      EXPECT_TRUE(result.IsNull());
+    } else {
+      EXPECT_EQ(result, test_case.expected_name);
+    }
+  }
+}
+
+TEST_F(BoundExpressionTest, YearTransform) {
+  // Create and bind year transform
+  ICEBERG_UNWRAP_OR_FAIL(auto timestamp_ref, 
NamedReference::Make("timestamp_field"));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto unbound_transform,
+      UnboundTransform::Make(std::move(timestamp_ref), Transform::Year()));
+  ICEBERG_UNWRAP_OR_FAIL(auto bound_transform,
+                         unbound_transform->Bind(*schema_, 
/*case_sensitive=*/true));
+
+  // Test data: 2021-01-01 00:00:00 UTC = 1609459200000000 microseconds
+  ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
+                         ArrowArrayStructLike::Make(arrow_c_schema_, 
arrow_c_array_, 0));
+
+  // Evaluate (2021)
+  ICEBERG_UNWRAP_OR_FAIL(auto result, bound_transform->Evaluate(*struct_like));
+  EXPECT_FALSE(result.IsNull());
+  EXPECT_EQ(std::get<int32_t>(result.value()), 2021);  // Year value
+}
+
+TEST_F(BoundExpressionTest, MonthTransform) {
+  // Create and bind month transform
+  ICEBERG_UNWRAP_OR_FAIL(auto timestamp_ref, 
NamedReference::Make("timestamp_field"));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto unbound_transform,
+      UnboundTransform::Make(std::move(timestamp_ref), Transform::Month()));
+  ICEBERG_UNWRAP_OR_FAIL(auto bound_transform,
+                         unbound_transform->Bind(*schema_, 
/*case_sensitive=*/true));
+
+  // Test data: 2021-01-01
+  ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
+                         ArrowArrayStructLike::Make(arrow_c_schema_, 
arrow_c_array_, 0));
+
+  // Evaluate (2021-01)
+  ICEBERG_UNWRAP_OR_FAIL(auto result, bound_transform->Evaluate(*struct_like));
+  EXPECT_FALSE(result.IsNull());
+  EXPECT_EQ(std::get<int32_t>(result.value()), 612);  // Months since 1970-01
+}
+
+TEST_F(BoundExpressionTest, DayTransform) {
+  // Create and bind day transform
+  ICEBERG_UNWRAP_OR_FAIL(auto timestamp_ref, 
NamedReference::Make("timestamp_field"));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto unbound_transform,
+      UnboundTransform::Make(std::move(timestamp_ref), Transform::Day()));
+  ICEBERG_UNWRAP_OR_FAIL(auto bound_transform,
+                         unbound_transform->Bind(*schema_, 
/*case_sensitive=*/true));
+
+  // Test data: 2021-01-01
+  ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
+                         ArrowArrayStructLike::Make(arrow_c_schema_, 
arrow_c_array_, 0));
+
+  // Evaluate
+  ICEBERG_UNWRAP_OR_FAIL(auto result, bound_transform->Evaluate(*struct_like));
+  EXPECT_FALSE(result.IsNull());
+  EXPECT_EQ(std::get<int32_t>(result.value()), 18628);  // Days since 
1970-01-01
+}
+
+TEST_F(BoundExpressionTest, BucketTransform) {
+  // Create and bind bucket[4] transform
+  ICEBERG_UNWRAP_OR_FAIL(auto string_ref, 
NamedReference::Make("string_field"));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto unbound_transform,
+      UnboundTransform::Make(std::move(string_ref), Transform::Bucket(4)));
+  ICEBERG_UNWRAP_OR_FAIL(auto bound_transform,
+                         unbound_transform->Bind(*schema_, 
/*case_sensitive=*/true));
+
+  // Test data: "hello_world"
+  ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
+                         ArrowArrayStructLike::Make(arrow_c_schema_, 
arrow_c_array_, 0));
+
+  // Evaluate - verify result is in range [0, 3]
+  ICEBERG_UNWRAP_OR_FAIL(auto result, bound_transform->Evaluate(*struct_like));
+  EXPECT_FALSE(result.IsNull());
+  auto bucket_value = std::get<int32_t>(result.value());
+  EXPECT_GE(bucket_value, 0);
+  EXPECT_LT(bucket_value, 4);
+}
+
+TEST_F(BoundExpressionTest, TruncateTransform) {
+  // Create and bind truncate[5] transform
+  ICEBERG_UNWRAP_OR_FAIL(auto string_ref, 
NamedReference::Make("string_field"));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto unbound_transform,
+      UnboundTransform::Make(std::move(string_ref), Transform::Truncate(5)));
+  ICEBERG_UNWRAP_OR_FAIL(auto bound_transform,
+                         unbound_transform->Bind(*schema_, 
/*case_sensitive=*/true));
+
+  // Test data: "hello_world"
+  ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
+                         ArrowArrayStructLike::Make(arrow_c_schema_, 
arrow_c_array_, 0));
+
+  // Evaluate - "hello_world" truncated to 5 chars = "hello"
+  ICEBERG_UNWRAP_OR_FAIL(auto result, bound_transform->Evaluate(*struct_like));
+  EXPECT_FALSE(result.IsNull());
+  EXPECT_EQ(std::get<std::string>(result.value()), "hello");
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/test/struct_like_test.cc 
b/src/iceberg/test/struct_like_test.cc
index b18ab8c..3683ed2 100644
--- a/src/iceberg/test/struct_like_test.cc
+++ b/src/iceberg/test/struct_like_test.cc
@@ -20,6 +20,7 @@
 #include <arrow/c/bridge.h>
 #include <arrow/json/from_string.h>
 #include <arrow/type.h>
+#include <arrow/type_fwd.h>
 #include <arrow/util/decimal.h>
 
 #include "iceberg/arrow_c_data_guard_internal.h"
@@ -27,8 +28,10 @@
 #include "iceberg/manifest_reader_internal.h"
 #include "iceberg/row/arrow_array_wrapper.h"
 #include "iceberg/row/manifest_wrapper.h"
+#include "iceberg/schema.h"
 #include "iceberg/schema_internal.h"
 #include "iceberg/test/matchers.h"
+#include "iceberg/type.h"
 
 namespace iceberg {
 
@@ -386,4 +389,52 @@ TEST(ArrowArrayStructLike, PrimitiveMap) {
   }
 }
 
+TEST(ArrowArrayStructLike, Accessor) {
+  Schema schema{std::vector<SchemaField>{
+      SchemaField::MakeOptional(1, "c1", int32()),
+      SchemaField::MakeOptional(
+          2, "c2",
+          struct_({
+              SchemaField::MakeOptional(3, "c3", int32()),
+              SchemaField::MakeOptional(4, "c4",
+                                        struct_({
+                                            SchemaField::MakeOptional(5, "c5", 
int32()),
+                                        })),
+          })),
+  }};
+
+  auto arrow_schema = ::arrow::struct_({
+      ::arrow::field("c1", ::arrow::int32()),
+      ::arrow::field("c2",
+                     ::arrow::struct_({
+                         ::arrow::field("c3", ::arrow::int32()),
+                         ::arrow::field("c4", ::arrow::struct_({
+                                                  ::arrow::field("c5", 
::arrow::int32()),
+                                              })),
+                     })),
+  });
+
+  auto arrow_array =
+      ::arrow::json::ArrayFromJSONString(
+          arrow_schema, R"([ {"c1": 1, "c2": {"c3": 3, "c4": {"c5": 5}}} ])")
+          .ValueOrDie();
+
+  ArrowSchema c_schema;
+  ArrowArray c_array;
+  internal::ArrowSchemaGuard schema_guard(&c_schema);
+  internal::ArrowArrayGuard array_guard(&c_array);
+  ASSERT_TRUE(::arrow::ExportType(*arrow_schema, &c_schema).ok());
+  ASSERT_TRUE(::arrow::ExportArray(*arrow_array, &c_array).ok());
+
+  ICEBERG_UNWRAP_OR_FAIL(auto struct_like, 
ArrowArrayStructLike::Make(c_schema, c_array));
+
+  // Test nested accessors from 1 to 3 levels deep
+  for (int32_t field_id : {1, 3, 5}) {
+    ICEBERG_UNWRAP_OR_FAIL(auto accessor, schema.GetAccessorById(field_id));
+    ICEBERG_UNWRAP_OR_FAIL(auto scalar, accessor->Get(*struct_like));
+    ASSERT_TRUE(std::holds_alternative<int32_t>(scalar));
+    EXPECT_EQ(std::get<int32_t>(scalar), field_id);
+  }
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 5485d83..79b43f5 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -144,9 +144,10 @@ struct WriterOptions;
 class Reader;
 class Writer;
 
-class StructLike;
 class ArrayLike;
 class MapLike;
+class StructLike;
+class StructLikeAccessor;
 
 class TableUpdate;
 class TableRequirement;

Reply via email to