lidavidm commented on a change in pull request #12664:
URL: https://github.com/apache/arrow/pull/12664#discussion_r830209919
##########
File path: cpp/src/arrow/dataset/file_parquet.cc
##########
@@ -160,18 +160,30 @@ Status ResolveOneFieldRef(
const std::unordered_map<std::string, const SchemaField*>& field_lookup,
const std::unordered_set<std::string>& duplicate_fields,
std::vector<int>* columns_selection) {
- if (const std::string* name = field_ref.name()) {
+ auto resolve_field_ref = [&](const std::string* name) -> Status {
auto it = field_lookup.find(*name);
if (it != field_lookup.end()) {
AddColumnIndices(*it->second, columns_selection);
} else if (duplicate_fields.find(*name) != duplicate_fields.end()) {
- // We shouldn't generally get here because SetProjection will reject
such references
+ // We shouldn't generally get here because SetProjection will reject such
+ // references
return Status::Invalid("Ambiguous reference to column '", *name,
"' which occurs more than once");
}
+ return Status::OK();
+ };
+
+ if (const std::string* name = field_ref.name()) {
+ RETURN_NOT_OK(resolve_field_ref(name));
// "Virtual" column: field is not in file but is in the ScanOptions.
// Ignore it here, as projection will pad the batch with a null column.
Review comment:
This comment should go above L173 above not here
##########
File path: cpp/src/arrow/dataset/file_parquet.cc
##########
@@ -160,18 +160,30 @@ Status ResolveOneFieldRef(
const std::unordered_map<std::string, const SchemaField*>& field_lookup,
const std::unordered_set<std::string>& duplicate_fields,
std::vector<int>* columns_selection) {
- if (const std::string* name = field_ref.name()) {
+ auto resolve_field_ref = [&](const std::string* name) -> Status {
auto it = field_lookup.find(*name);
if (it != field_lookup.end()) {
AddColumnIndices(*it->second, columns_selection);
} else if (duplicate_fields.find(*name) != duplicate_fields.end()) {
- // We shouldn't generally get here because SetProjection will reject
such references
+ // We shouldn't generally get here because SetProjection will reject such
+ // references
return Status::Invalid("Ambiguous reference to column '", *name,
"' which occurs more than once");
}
+ return Status::OK();
+ };
+
+ if (const std::string* name = field_ref.name()) {
+ RETURN_NOT_OK(resolve_field_ref(name));
// "Virtual" column: field is not in file but is in the ScanOptions.
// Ignore it here, as projection will pad the batch with a null column.
return Status::OK();
+ } else if (const FieldPath* path = field_ref.field_path()) {
+ int index = path->indices()[0];
Review comment:
Should we explicitly reject nested field paths here for now?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]