Dan Hecht created IMPALA-5861:
---------------------------------

             Summary: HdfsParquetScanner::GetNextInternal() 
IsZeroSlotTableScan() case double counts
                 Key: IMPALA-5861
                 URL: https://issues.apache.org/jira/browse/IMPALA-5861
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
    Affects Versions: Impala 2.10.0
            Reporter: Dan Hecht


It appears that this code is double counting into {{rows_read_counter()}}, 
since {{row_group_rows_read_}} is already accumulating:

{code:title=HdfsParquetScanner::GetNextInternal()}
  } else if (scan_node_->IsZeroSlotTableScan()) {
    // There are no materialized slots and we are not optimizing count(*), e.g.
    // "select 1 from alltypes". We can serve this query from just the file 
metadata.
    // We don't need to read the column data.
    if (row_group_rows_read_ == file_metadata_.num_rows) {
      eos_ = true;
      return Status::OK();
    }
    assemble_rows_timer_.Start();
    DCHECK_LE(row_group_rows_read_, file_metadata_.num_rows);
    int64_t rows_remaining = file_metadata_.num_rows - row_group_rows_read_;
    int max_tuples = min<int64_t>(row_batch->capacity(), rows_remaining);
    TupleRow* current_row = row_batch->GetRow(row_batch->AddRow());
    int num_to_commit = WriteTemplateTuples(current_row, max_tuples);
    Status status = CommitRows(row_batch, num_to_commit);
    assemble_rows_timer_.Stop();
    RETURN_IF_ERROR(status);
    row_group_rows_read_ += num_to_commit;
    COUNTER_ADD(scan_node_->rows_read_counter(), row_group_rows_read_);  <======
    return Status::OK();
  }



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to