bkietz commented on a change in pull request #7704: URL: https://github.com/apache/arrow/pull/7704#discussion_r453238293
########## File path: cpp/src/parquet/arrow/schema.h ########## @@ -163,24 +165,28 @@ struct PARQUET_EXPORT SchemaManifest { return it->second; } - bool GetFieldIndices(const std::vector<int>& column_indices, std::vector<int>* out) { + ::arrow::Result<std::vector<int>> GetFieldIndices( + const std::vector<int>& column_indices) { // Coalesce a list of schema field indices which are the roots of the // columns referred to by a list of column indices const schema::GroupNode* group = descr->group_node(); std::unordered_set<int> already_added; - out->clear(); - for (auto& column_idx : column_indices) { + + std::vector<int> out; + for (int column_idx : column_indices) { + if (column_idx < 0 || column_idx >= descr->num_columns()) { + return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid"); + } auto field_node = descr->GetColumnRoot(column_idx); auto field_idx = group->FieldIndex(*field_node); if (field_idx < 0) { Review comment: https://github.com/apache/arrow/pull/7704#issuecomment-657119271 In this case the passed data is `column_indices`, which *are* checked. The field index is looked up from the group using a node pulled from that same group, so it seems redundant to check it again. I intended the assertion as a test for that intuition. If there is a way for `field_idx == -1` to occur then I agree that we should raise an exception rather than abort, but otherwise shouldn't the redundant check be removed? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org