mapleFU commented on code in PR #39065:
URL: https://github.com/apache/arrow/pull/39065#discussion_r1416873982
##########
cpp/src/arrow/dataset/file_parquet.cc:
##########
@@ -893,20 +902,29 @@ Result<std::vector<compute::Expression>>
ParquetFileFragment::TestRowGroups(
return std::vector<compute::Expression>{};
}
+ const SchemaField* schema_field = nullptr;
for (const FieldRef& ref : FieldsInExpression(predicate)) {
ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOneOrNone(*physical_schema_));
-
if (match.empty()) continue;
- if (statistics_expressions_complete_[match[0]]) continue;
- statistics_expressions_complete_[match[0]] = true;
+ schema_field = &manifest_->schema_fields[match[0]];
+
+ for (size_t i = 1; i < match.indices().size(); ++i) {
+ if (schema_field->field->type()->id() != Type::STRUCT) {
+ return Status::Invalid("nested paths only supported for structs");
+ }
Review Comment:
So this limit user passing an filter on Map/List?
##########
cpp/src/arrow/dataset/file_parquet.cc:
##########
@@ -415,6 +418,12 @@ std::optional<compute::Expression>
ParquetFileFragment::EvaluateStatisticsAsExpr
return std::nullopt;
}
+std::optional<compute::Expression>
ParquetFileFragment::EvaluateStatisticsAsExpression(
+ const Field& field, const parquet::Statistics& statistics) {
+ const auto field_name = field.name();
+ return EvaluateStatisticsAsExpression(field, FieldRef(field_name),
statistics);
Review Comment:
style:
```suggestion
return EvaluateStatisticsAsExpression(field,
FieldRef(std::move(field_name)), statistics);
```
##########
cpp/src/arrow/dataset/file_parquet.cc:
##########
@@ -893,20 +902,29 @@ Result<std::vector<compute::Expression>>
ParquetFileFragment::TestRowGroups(
return std::vector<compute::Expression>{};
}
+ const SchemaField* schema_field = nullptr;
Review Comment:
Why move the declaration of `schema_field` outside the loop?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]