Remove conjuncts_passed arg in various Parquet scanner functions. The argument is not used anywhere, so let's remove it.
Change-Id: Iff511d6c4ba309eead6222701c17dee1b909a638 Reviewed-on: http://gerrit.cloudera.org:8080/2732 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/344fbc09 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/344fbc09 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/344fbc09 Branch: refs/heads/master Commit: 344fbc096644a5d06753aedeafe36eca82910cbc Parents: ee8c309 Author: Alex Behm <[email protected]> Authored: Thu Apr 7 16:24:31 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Tue Apr 12 14:03:44 2016 -0700 ---------------------------------------------------------------------- be/src/exec/hdfs-parquet-scanner.cc | 71 ++++++++++++-------------------- 1 file changed, 27 insertions(+), 44 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/344fbc09/be/src/exec/hdfs-parquet-scanner.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc index a910243..9e18c66 100644 --- a/be/src/exec/hdfs-parquet-scanner.cc +++ b/be/src/exec/hdfs-parquet-scanner.cc @@ -253,10 +253,6 @@ class HdfsParquetScanner::ColumnReader { /// set, the query is cancelled, or the scan node limit was reached. Otherwise returns /// true. /// - /// *conjuncts_passed is an in/out parameter. ReadValue() sets to false if the current - /// row should be filtered based on this column's value. If already false on input, the - /// row has already been filtered and ReadValue() only needs to advance the value. - /// /// NextLevels() must be called on this reader before calling ReadValue() for the first /// time. This is to initialize the current value that ReadValue() will read. /// @@ -266,12 +262,11 @@ class HdfsParquetScanner::ColumnReader { /// TODO: another option is to materialize col by col for the entire row batch in /// one call. e.g. MaterializeCol would write out 1024 values. Our row batches /// are currently dense so we'll need to figure out something there. - virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) = 0; + virtual bool ReadValue(MemPool* pool, Tuple* tuple) = 0; /// Same as ReadValue() but does not advance repetition level. Only valid for columns not /// in collections. - virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple, - bool* conjuncts_passed) = 0; + virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple) = 0; /// Advances this column reader's def and rep levels to the next logical value, i.e. to /// the next scalar value or the beginning of the next collection, without attempting to @@ -375,11 +370,11 @@ class HdfsParquetScanner::CollectionColumnReader : /// Materializes CollectionValue into tuple slot (if materializing) and advances to next /// value. - virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed); + virtual bool ReadValue(MemPool* pool, Tuple* tuple); /// Same as ReadValue but does not advance repetition level. Only valid for columns not /// in collections. - virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed); + virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple); /// Advances all child readers to the beginning of the next collection and updates this /// reader's state. @@ -409,7 +404,7 @@ class HdfsParquetScanner::CollectionColumnReader : /// Returns false if execution should be aborted for some reason, e.g. parse_error_ is /// set, the query is cancelled, or the scan node limit was reached. Otherwise returns /// true. - inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed); + inline bool ReadSlot(void* slot, MemPool* pool); }; /// Reader for a single column from the parquet file. It's associated with a @@ -565,7 +560,7 @@ class HdfsParquetScanner::BaseScalarColumnReader : /// set, the query is cancelled, or the scan node limit was reached. Otherwise returns /// true. template <bool IN_COLLECTION> - inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed); + inline bool ReadSlot(void* slot, MemPool* pool); }; /// Per column type reader. If MATERIALIZED is true, the column values are materialized @@ -605,17 +600,17 @@ class HdfsParquetScanner::ScalarColumnReader : virtual ~ScalarColumnReader() { } - virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { - return ReadValue<true>(pool, tuple, conjuncts_passed); + virtual bool ReadValue(MemPool* pool, Tuple* tuple) { + return ReadValue<true>(pool, tuple); } - virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { - return ReadValue<false>(pool, tuple, conjuncts_passed); + virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple) { + return ReadValue<false>(pool, tuple); } protected: template <bool IN_COLLECTION> - inline bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { + inline bool ReadValue(MemPool* pool, Tuple* tuple) { // NextLevels() should have already been called and def and rep levels should be in // valid range. DCHECK_GE(rep_level_, 0); @@ -628,8 +623,7 @@ class HdfsParquetScanner::ScalarColumnReader : if (!MATERIALIZED) { return NextLevels<IN_COLLECTION>(); } else if (def_level_ >= max_def_level()) { - return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool, - conjuncts_passed); + return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool); } else { // Null value tuple->SetNull(null_indicator_offset_); @@ -682,7 +676,7 @@ class HdfsParquetScanner::ScalarColumnReader : /// set, the query is cancelled, or the scan node limit was reached. Otherwise returns /// true. template <bool IN_COLLECTION> - inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed) { + inline bool ReadSlot(void* slot, MemPool* pool) { T val; T* val_ptr = NeedsConversion() ? &val : reinterpret_cast<T*>(slot); if (page_encoding_ == parquet::Encoding::PLAIN_DICTIONARY) { @@ -794,13 +788,12 @@ class HdfsParquetScanner::BoolColumnReader : virtual ~BoolColumnReader() { } - virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { - return ReadValue<true>(pool, tuple, conjuncts_passed); + virtual bool ReadValue(MemPool* pool, Tuple* tuple) { + return ReadValue<true>(pool, tuple); } - virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple, - bool* conjuncts_passed) { - return ReadValue<false>(pool, tuple, conjuncts_passed); + virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple) { + return ReadValue<false>(pool, tuple); } protected: @@ -825,7 +818,7 @@ class HdfsParquetScanner::BoolColumnReader : private: template<bool IN_COLLECTION> - inline bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { + inline bool ReadValue(MemPool* pool, Tuple* tuple) { DCHECK(slot_desc_ != NULL); // Def and rep levels should be in valid range. DCHECK_GE(rep_level_, 0); @@ -836,8 +829,7 @@ class HdfsParquetScanner::BoolColumnReader : "Caller should have called NextLevels() until we are ready to read a value"; if (def_level_ >= max_def_level()) { - return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool, - conjuncts_passed); + return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool); } else { // Null value tuple->SetNull(null_indicator_offset_); @@ -852,7 +844,7 @@ class HdfsParquetScanner::BoolColumnReader : /// set, the query is cancelled, or the scan node limit was reached. Otherwise returns /// true. template <bool IN_COLLECTION> - inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed) { + inline bool ReadSlot(void* slot, MemPool* pool) { if (!bool_values_.GetValue(1, reinterpret_cast<bool*>(slot))) { parent_->parse_status_ = Status("Invalid bool column."); return false; @@ -1345,8 +1337,7 @@ bool HdfsParquetScanner::CollectionColumnReader::NextLevels() { return true; } -bool HdfsParquetScanner::CollectionColumnReader::ReadValue( - MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { +bool HdfsParquetScanner::CollectionColumnReader::ReadValue(MemPool* pool, Tuple* tuple) { DCHECK_GE(rep_level_, 0); DCHECK_GE(def_level_, 0); DCHECK_GE(def_level_, def_level_of_immediate_repeated_ancestor()) << @@ -1355,7 +1346,7 @@ bool HdfsParquetScanner::CollectionColumnReader::ReadValue( if (tuple_offset_ == -1) { return CollectionColumnReader::NextLevels(); } else if (def_level_ >= max_def_level()) { - return ReadSlot(tuple->GetSlot(tuple_offset_), pool, conjuncts_passed); + return ReadSlot(tuple->GetSlot(tuple_offset_), pool); } else { // Null value tuple->SetNull(null_indicator_offset_); @@ -1364,19 +1355,14 @@ bool HdfsParquetScanner::CollectionColumnReader::ReadValue( } bool HdfsParquetScanner::CollectionColumnReader::ReadNonRepeatedValue( - MemPool* pool, Tuple* tuple, bool* conjuncts_passed) { - return CollectionColumnReader::ReadValue(pool, tuple, conjuncts_passed); + MemPool* pool, Tuple* tuple) { + return CollectionColumnReader::ReadValue(pool, tuple); } -// TODO for 2.3: test query where *conjuncts_passed == false -bool HdfsParquetScanner::CollectionColumnReader::ReadSlot( - void* slot, MemPool* pool, bool* conjuncts_passed) { +bool HdfsParquetScanner::CollectionColumnReader::ReadSlot(void* slot, MemPool* pool) { DCHECK(!children_.empty()); DCHECK_LE(rep_level_, new_collection_rep_level()); - // TODO: do something with conjuncts_passed? We still need to "read" the value in order - // to advance children_ but we don't need to materialize the collection. - // Recursively read the collection into a new CollectionValue. CollectionValue* coll_slot = reinterpret_cast<CollectionValue*>(slot); *coll_slot = CollectionValue(); @@ -1716,7 +1702,6 @@ inline bool HdfsParquetScanner::ReadRow(const vector<ColumnReader*>& column_read Tuple* tuple, MemPool* pool, bool* materialize_tuple) { DCHECK(!column_readers.empty()); bool continue_execution = true; - bool conjuncts_passed = true; int size = column_readers.size(); for (int c = 0; c < size; ++c) { ColumnReader* col_reader = column_readers[c]; @@ -1724,15 +1709,14 @@ inline bool HdfsParquetScanner::ReadRow(const vector<ColumnReader*>& column_read DCHECK(*materialize_tuple); DCHECK(col_reader->pos_slot_desc() == NULL); // We found a value, read it - continue_execution = col_reader->ReadNonRepeatedValue(pool, tuple, - &conjuncts_passed); + continue_execution = col_reader->ReadNonRepeatedValue(pool, tuple); } else if (*materialize_tuple) { // All column readers for this tuple should a value to materialize. FILE_CHECK_GE(col_reader->def_level(), col_reader->def_level_of_immediate_repeated_ancestor()); // Fill in position slot if applicable if (col_reader->pos_slot_desc() != NULL) col_reader->ReadPosition(tuple); - continue_execution = col_reader->ReadValue(pool, tuple, &conjuncts_passed); + continue_execution = col_reader->ReadValue(pool, tuple); } else { // A containing repeated field is empty or NULL FILE_CHECK_LT(col_reader->def_level(), @@ -1741,7 +1725,6 @@ inline bool HdfsParquetScanner::ReadRow(const vector<ColumnReader*>& column_read } if (UNLIKELY(!continue_execution)) break; } - *materialize_tuple &= conjuncts_passed; if (!IN_COLLECTION && *materialize_tuple) { TupleRow* tuple_row_mem = reinterpret_cast<TupleRow*>(&tuple);
