Dan Hecht created IMPALA-5861:
---------------------------------
Summary: HdfsParquetScanner::GetNextInternal()
IsZeroSlotTableScan() case double counts
Key: IMPALA-5861
URL: https://issues.apache.org/jira/browse/IMPALA-5861
Project: IMPALA
Issue Type: Bug
Components: Backend
Affects Versions: Impala 2.10.0
Reporter: Dan Hecht
It appears that this code is double counting into {{rows_read_counter()}},
since {{row_group_rows_read_}} is already accumulating:
{code:title=HdfsParquetScanner::GetNextInternal()}
} else if (scan_node_->IsZeroSlotTableScan()) {
// There are no materialized slots and we are not optimizing count(*), e.g.
// "select 1 from alltypes". We can serve this query from just the file
metadata.
// We don't need to read the column data.
if (row_group_rows_read_ == file_metadata_.num_rows) {
eos_ = true;
return Status::OK();
}
assemble_rows_timer_.Start();
DCHECK_LE(row_group_rows_read_, file_metadata_.num_rows);
int64_t rows_remaining = file_metadata_.num_rows - row_group_rows_read_;
int max_tuples = min<int64_t>(row_batch->capacity(), rows_remaining);
TupleRow* current_row = row_batch->GetRow(row_batch->AddRow());
int num_to_commit = WriteTemplateTuples(current_row, max_tuples);
Status status = CommitRows(row_batch, num_to_commit);
assemble_rows_timer_.Stop();
RETURN_IF_ERROR(status);
row_group_rows_read_ += num_to_commit;
COUNTER_ADD(scan_node_->rows_read_counter(), row_group_rows_read_); <======
return Status::OK();
}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)