This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new c0f1dce9 fix: dangling span capture in StructLikeAccessor (#423)
c0f1dce9 is described below
commit c0f1dce98ffc7d00c3cb22ac012b417859a9e506
Author: Feiyang Li <[email protected]>
AuthorDate: Fri Dec 19 17:01:49 2025 +0800
fix: dangling span capture in StructLikeAccessor (#423)
---
src/iceberg/expression/manifest_evaluator.cc | 8 +++-----
src/iceberg/expression/residual_evaluator.cc | 21 ++++++++++-----------
src/iceberg/row/struct_like.cc | 10 +++++-----
src/iceberg/schema.h | 11 +++++++----
src/iceberg/table_scan.cc | 4 ++--
src/iceberg/test/manifest_writer_versions_test.cc | 7 +++----
src/iceberg/type.cc | 21 +++++++++++++++++++++
src/iceberg/type.h | 2 ++
8 files changed, 53 insertions(+), 31 deletions(-)
diff --git a/src/iceberg/expression/manifest_evaluator.cc
b/src/iceberg/expression/manifest_evaluator.cc
index 845ecb6c..9c4e708f 100644
--- a/src/iceberg/expression/manifest_evaluator.cc
+++ b/src/iceberg/expression/manifest_evaluator.cc
@@ -363,12 +363,10 @@ Result<std::unique_ptr<ManifestEvaluator>>
ManifestEvaluator::MakePartitionFilte
std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>&
spec,
const Schema& schema, bool case_sensitive) {
ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec->PartitionType(schema));
- auto field_span = partition_type->fields();
- std::vector<SchemaField> fields(field_span.begin(), field_span.end());
- auto partition_schema = std::make_shared<Schema>(fields);
ICEBERG_ASSIGN_OR_RAISE(auto rewrite_expr,
RewriteNot::Visit(std::move(expr)));
- ICEBERG_ASSIGN_OR_RAISE(auto partition_expr,
- Binder::Bind(*partition_schema, rewrite_expr,
case_sensitive));
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto partition_expr,
+ Binder::Bind(*partition_type->ToSchema(), rewrite_expr, case_sensitive));
return std::unique_ptr<ManifestEvaluator>(
new ManifestEvaluator(std::move(partition_expr)));
}
diff --git a/src/iceberg/expression/residual_evaluator.cc
b/src/iceberg/expression/residual_evaluator.cc
index e818199e..cf8f9812 100644
--- a/src/iceberg/expression/residual_evaluator.cc
+++ b/src/iceberg/expression/residual_evaluator.cc
@@ -27,6 +27,7 @@
#include "iceberg/schema.h"
#include "iceberg/schema_internal.h"
#include "iceberg/transform.h"
+#include "iceberg/type.h"
#include "iceberg/util/macros.h"
namespace iceberg {
@@ -42,8 +43,7 @@ class ResidualVisitor : public
BoundVisitor<std::shared_ptr<Expression>> {
const StructLike& partition_data,
bool case_sensitive) {
ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec.PartitionType(schema));
- auto partition_schema = FromStructType(std::move(*partition_type),
std::nullopt);
- return ResidualVisitor(spec, schema, std::move(partition_schema),
partition_data,
+ return ResidualVisitor(spec, schema, std::move(partition_type),
partition_data,
case_sensitive);
}
@@ -202,17 +202,17 @@ class ResidualVisitor : public
BoundVisitor<std::shared_ptr<Expression>> {
private:
ResidualVisitor(const PartitionSpec& spec, const Schema& schema,
- std::unique_ptr<Schema> partition_schema,
+ std::unique_ptr<StructType> partition_type,
const StructLike& partition_data, bool case_sensitive)
: spec_(spec),
schema_(schema),
- partition_schema_(std::move(partition_schema)),
+ partition_type_(std::move(partition_type)),
partition_data_(partition_data),
case_sensitive_(case_sensitive) {}
const PartitionSpec& spec_;
const Schema& schema_;
- std::unique_ptr<Schema> partition_schema_;
+ std::unique_ptr<StructType> partition_type_;
const StructLike& partition_data_;
bool case_sensitive_;
};
@@ -235,6 +235,7 @@ Result<std::shared_ptr<Expression>>
ResidualVisitor::Predicate(
// Not associated with a partition field, can't be evaluated
return pred;
}
+ auto schema = partition_type_->ToSchema();
for (const auto& part : parts) {
// Check the strict projection
@@ -243,9 +244,8 @@ Result<std::shared_ptr<Expression>>
ResidualVisitor::Predicate(
std::shared_ptr<Expression> strict_result = nullptr;
if (strict_projection != nullptr) {
- ICEBERG_ASSIGN_OR_RAISE(
- auto bound_strict,
- strict_projection->Bind(*partition_schema_, case_sensitive_));
+ ICEBERG_ASSIGN_OR_RAISE(auto bound_strict,
+ strict_projection->Bind(*schema,
case_sensitive_));
if (bound_strict->is_bound_predicate()) {
ICEBERG_ASSIGN_OR_RAISE(
strict_result, BoundVisitor::Predicate(
@@ -268,9 +268,8 @@ Result<std::shared_ptr<Expression>>
ResidualVisitor::Predicate(
std::shared_ptr<Expression> inclusive_result = nullptr;
if (inclusive_projection != nullptr) {
- ICEBERG_ASSIGN_OR_RAISE(
- auto bound_inclusive,
- inclusive_projection->Bind(*partition_schema_, case_sensitive_));
+ ICEBERG_ASSIGN_OR_RAISE(auto bound_inclusive,
+ inclusive_projection->Bind(*schema,
case_sensitive_));
if (bound_inclusive->is_bound_predicate()) {
ICEBERG_ASSIGN_OR_RAISE(
diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc
index 24e61644..5b814204 100644
--- a/src/iceberg/row/struct_like.cc
+++ b/src/iceberg/row/struct_like.cc
@@ -87,20 +87,20 @@
StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
return
std::get<std::shared_ptr<StructLike>>(first_level_field)->GetField(pos1);
};
} else if (!position_path.empty()) {
- accessor_ = [position_path](const StructLike& struct_like) ->
Result<Scalar> {
+ accessor_ = [this](const StructLike& struct_like) -> Result<Scalar> {
std::vector<std::shared_ptr<StructLike>> backups;
const StructLike* current_struct_like = &struct_like;
- for (size_t i = 0; i < position_path.size() - 1; ++i) {
+ for (size_t i = 0; i < position_path_.size() - 1; ++i) {
ICEBERG_ASSIGN_OR_RAISE(auto field,
-
current_struct_like->GetField(position_path[i]));
+
current_struct_like->GetField(position_path_[i]));
if (!std::holds_alternative<std::shared_ptr<StructLike>>(field)) {
return InvalidSchema("Encountered non-struct in the position path
[{}]",
- position_path);
+ position_path_);
}
backups.push_back(std::get<std::shared_ptr<StructLike>>(field));
current_struct_like = backups.back().get();
}
- return current_struct_like->GetField(position_path.back());
+ return current_struct_like->GetField(position_path_.back());
};
} else {
accessor_ = [](const StructLike&) -> Result<Scalar> {
diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h
index 94a8764d..f6c459d8 100644
--- a/src/iceberg/schema.h
+++ b/src/iceberg/schema.h
@@ -59,19 +59,22 @@ class ICEBERG_EXPORT Schema : public StructType {
std::string ToString() const override;
- /// \brief Find the SchemaField by field name.
+ /// \brief Recursively find the SchemaField by field name.
///
/// Short names for maps and lists are included for any name that does not
conflict with
/// a canonical name. For example, a list, 'l', of structs with field 'x'
will produce
- /// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm',
if its
+ /// short name 'l.x' in addition to canonical name 'l.element.x'. A map 'm',
if its
/// value include a structs with field 'x' wil produce short name 'm.x' in
addition to
- /// canonical name 'm.value.x'
+ /// canonical name 'm.value.x'.
/// FIXME: Currently only handles ASCII lowercase conversion; extend to
support
/// non-ASCII characters (e.g., using std::towlower or ICU)
Result<std::optional<std::reference_wrapper<const SchemaField>>>
FindFieldByName(
std::string_view name, bool case_sensitive = true) const;
- /// \brief Find the SchemaField by field id.
+ /// \brief Recursively find the SchemaField by field id.
+ ///
+ /// \param field_id The id of the field to get the accessor for.
+ /// \return The field with the given id, or std::nullopt if not found.
Result<std::optional<std::reference_wrapper<const SchemaField>>>
FindFieldById(
int32_t field_id) const;
diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc
index 56c6ee2c..1c4a73ff 100644
--- a/src/iceberg/table_scan.cc
+++ b/src/iceberg/table_scan.cc
@@ -273,13 +273,13 @@ Result<std::vector<std::shared_ptr<FileScanTask>>>
DataTableScan::PlanFiles() co
// Get the table schema and partition type
ICEBERG_ASSIGN_OR_RAISE(auto current_schema,
context_.table_metadata->Schema());
- ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructType> partition_schema,
+ ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructType> partition_type,
partition_spec->PartitionType(*current_schema));
for (const auto& manifest_file : manifest_files) {
ICEBERG_ASSIGN_OR_RAISE(
auto manifest_reader,
- ManifestReader::Make(manifest_file, file_io_, partition_schema));
+ ManifestReader::Make(manifest_file, file_io_, partition_type));
ICEBERG_ASSIGN_OR_RAISE(auto manifests, manifest_reader->Entries());
// TODO(gty404): filter manifests using partition spec and filter
expression
diff --git a/src/iceberg/test/manifest_writer_versions_test.cc
b/src/iceberg/test/manifest_writer_versions_test.cc
index 0bbdbcc5..36f9445f 100644
--- a/src/iceberg/test/manifest_writer_versions_test.cc
+++ b/src/iceberg/test/manifest_writer_versions_test.cc
@@ -241,10 +241,9 @@ class ManifestWriterVersionsTest : public ::testing::Test {
}
std::vector<ManifestEntry> ReadManifest(const ManifestFile& manifest_file) {
- auto partition_schema_result = spec_->PartitionType(*schema_);
- EXPECT_THAT(partition_schema_result, IsOk());
- std::shared_ptr<StructType> partition_type =
- std::move(partition_schema_result.value());
+ auto partition_type_result = spec_->PartitionType(*schema_);
+ EXPECT_THAT(partition_type_result, IsOk());
+ std::shared_ptr<StructType> partition_type =
std::move(partition_type_result.value());
auto reader_result = ManifestReader::Make(manifest_file, file_io_,
partition_type);
EXPECT_THAT(reader_result, IsOk());
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index 104ddad7..1cd5fb3e 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -25,6 +25,7 @@
#include <utility>
#include "iceberg/exception.h"
+#include "iceberg/schema.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
@@ -48,6 +49,7 @@ std::string StructType::ToString() const {
repr += ">";
return repr;
}
+
std::span<const SchemaField> StructType::fields() const { return fields_; }
Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldById(
int32_t field_id) const {
@@ -56,6 +58,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldById(
if (it == field_by_id.get().end()) return std::nullopt;
return it->second;
}
+
Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByIndex(
int32_t index) const {
if (index < 0 || static_cast<size_t>(index) >= fields_.size()) {
@@ -63,6 +66,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByInd
}
return fields_[index];
}
+
Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
@@ -81,6 +85,11 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByNam
}
return std::nullopt;
}
+
+std::unique_ptr<Schema> StructType::ToSchema() const {
+ return std::make_unique<Schema>(fields_);
+}
+
bool StructType::Equals(const Type& other) const {
if (other.type_id() != TypeId::kStruct) {
return false;
@@ -88,6 +97,7 @@ bool StructType::Equals(const Type& other) const {
const auto& struct_ = static_cast<const StructType&>(other);
return fields_ == struct_.fields_;
}
+
Result<std::unordered_map<int32_t, StructType::SchemaFieldConstRef>>
StructType::InitFieldById(const StructType& self) {
std::unordered_map<int32_t, SchemaFieldConstRef> field_by_id;
@@ -100,6 +110,7 @@ StructType::InitFieldById(const StructType& self) {
}
return field_by_id;
}
+
Result<std::unordered_map<std::string_view, StructType::SchemaFieldConstRef>>
StructType::InitFieldByName(const StructType& self) {
std::unordered_map<std::string_view, StructType::SchemaFieldConstRef>
field_by_name;
@@ -113,6 +124,7 @@ StructType::InitFieldByName(const StructType& self) {
}
return field_by_name;
}
+
Result<std::unordered_map<std::string, StructType::SchemaFieldConstRef>>
StructType::InitFieldByLowerCaseName(const StructType& self) {
std::unordered_map<std::string, SchemaFieldConstRef> field_by_lowercase_name;
@@ -146,6 +158,7 @@ std::string ListType::ToString() const {
repr += ">";
return repr;
}
+
std::span<const SchemaField> ListType::fields() const { return {&element_, 1};
}
Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldById(
int32_t field_id) const {
@@ -154,6 +167,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
ListType::GetFieldById(
}
return std::nullopt;
}
+
Result<std::optional<NestedType::SchemaFieldConstRef>>
ListType::GetFieldByIndex(
int index) const {
if (index == 0) {
@@ -161,6 +175,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
ListType::GetFieldByIndex
}
return InvalidArgument("Invalid index {} to get field from list", index);
}
+
Result<std::optional<NestedType::SchemaFieldConstRef>>
ListType::GetFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
@@ -174,6 +189,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
ListType::GetFieldByName(
}
return std::nullopt;
}
+
bool ListType::Equals(const Type& other) const {
if (other.type_id() != TypeId::kList) {
return false;
@@ -195,6 +211,7 @@ MapType::MapType(SchemaField key, SchemaField value)
const SchemaField& MapType::key() const { return fields_[0]; }
const SchemaField& MapType::value() const { return fields_[1]; }
TypeId MapType::type_id() const { return kTypeId; }
+
std::string MapType::ToString() const {
// XXX: work around Clang/libc++: "<{}>" in a format string appears to get
// parsed as {<>} or something; split up the format string to avoid that
@@ -204,6 +221,7 @@ std::string MapType::ToString() const {
repr += ">";
return repr;
}
+
std::span<const SchemaField> MapType::fields() const { return fields_; }
Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldById(
int32_t field_id) const {
@@ -214,6 +232,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
MapType::GetFieldById(
}
return std::nullopt;
}
+
Result<std::optional<NestedType::SchemaFieldConstRef>>
MapType::GetFieldByIndex(
int32_t index) const {
if (index == 0) {
@@ -223,6 +242,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
MapType::GetFieldByIndex(
}
return InvalidArgument("Invalid index {} to get field from map", index);
}
+
Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
@@ -241,6 +261,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
MapType::GetFieldByName(
}
return std::nullopt;
}
+
bool MapType::Equals(const Type& other) const {
if (other.type_id() != TypeId::kMap) {
return false;
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index 49866c44..7e17b78d 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -123,6 +123,8 @@ class ICEBERG_EXPORT StructType : public NestedType {
std::string_view name, bool case_sensitive) const override;
using NestedType::GetFieldByName;
+ std::unique_ptr<Schema> ToSchema() const;
+
protected:
bool Equals(const Type& other) const override;