IMPALA-5548 Fix some minor nits with HDFS parquet column readers Replace some std::map instances with unordered maps. std::map is unnecessary in many cases where an unordered map should suffice, and in almost all circumstances, perform better. Also discovered was a slightly dangerous initialization of an empty NullOffsetIndicator, which could conceivably result in undefined behavior by writing arr[ofs] |= b, where ofs was mistakenly initialized to -1 (and b to 0, so such behavior may not be detected, but could cause a crash by underrunning a buffer). Also add warn unused to status results while we are changing this.
Testing: Running exhaustive tests against a Jenkins build. Change-Id: I08e653cae6f2188599f4a23e4f44692166d9c119 Reviewed-on: http://gerrit.cloudera.org:8080/7240 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/7135d8cc Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/7135d8cc Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/7135d8cc Branch: refs/heads/master Commit: 7135d8cc913984a47fd9e9d1dae655ad464a3dc4 Parents: 29d2587 Author: Zach Amsden <[email protected]> Authored: Thu Jun 15 17:02:00 2017 +0000 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Jun 22 02:09:16 2017 +0000 ---------------------------------------------------------------------- be/src/exec/hdfs-scan-node-base.cc | 2 +- be/src/exec/hdfs-scan-node-base.h | 33 +++++++++++++++++-------------- be/src/exec/hdfs-scanner.h | 2 +- be/src/exec/parquet-column-readers.h | 2 +- be/src/runtime/descriptors.h | 2 +- 5 files changed, 22 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/hdfs-scan-node-base.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc index 32b7372..7e55418 100644 --- a/be/src/exec/hdfs-scan-node-base.cc +++ b/be/src/exec/hdfs-scan-node-base.cc @@ -638,7 +638,7 @@ void HdfsScanNodeBase::SetFileMetadata(const string& filename, void* metadata) { void* HdfsScanNodeBase::GetFileMetadata(const string& filename) { unique_lock<mutex> l(metadata_lock_); - map<string, void*>::iterator it = per_file_metadata_.find(filename); + auto it = per_file_metadata_.find(filename); if (it == per_file_metadata_.end()) return NULL; return it->second; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/hdfs-scan-node-base.h ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h index 071f1b6..affe2a5 100644 --- a/be/src/exec/hdfs-scan-node-base.h +++ b/be/src/exec/hdfs-scan-node-base.h @@ -117,11 +117,11 @@ class HdfsScanNodeBase : public ScanNode { HdfsScanNodeBase(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); ~HdfsScanNodeBase(); - virtual Status Init(const TPlanNode& tnode, RuntimeState* state); - virtual Status Prepare(RuntimeState* state); + virtual Status Init(const TPlanNode& tnode, RuntimeState* state) WARN_UNUSED_RESULT; + virtual Status Prepare(RuntimeState* state) WARN_UNUSED_RESULT; virtual void Codegen(RuntimeState* state); - virtual Status Open(RuntimeState* state); - virtual Status Reset(RuntimeState* state); + virtual Status Open(RuntimeState* state) WARN_UNUSED_RESULT; + virtual Status Reset(RuntimeState* state) WARN_UNUSED_RESULT; virtual void Close(RuntimeState* state); /// Returns true if this node uses separate threads for scanners that append RowBatches @@ -155,7 +155,8 @@ class HdfsScanNodeBase : public ScanNode { int skip_header_line_count() const { return skip_header_line_count_; } DiskIoRequestContext* reader_context() { return reader_context_; } - typedef std::map<TupleId, std::vector<ScalarExprEvaluator*>> ConjunctEvaluatorsMap; + typedef std::unordered_map<TupleId, std::vector<ScalarExprEvaluator*>> + ConjunctEvaluatorsMap; const ConjunctEvaluatorsMap& conjuncts_map() const { return conjunct_evals_map_; } /// Slot Id => Dictionary Filter eligible conjuncts for that slot @@ -216,11 +217,11 @@ class HdfsScanNodeBase : public ScanNode { /// threads needed to process those in 'ranges'. /// Can be overridden to add scan-node specific actions like starting scanner threads. virtual Status AddDiskIoRanges(const std::vector<DiskIoMgr::ScanRange*>& ranges, - int num_files_queued); + int num_files_queued) WARN_UNUSED_RESULT; /// Adds all splits for file_desc to the io mgr queue and indicates one file has /// been added completely. - inline Status AddDiskIoRanges(const HdfsFileDesc* file_desc) { + inline Status AddDiskIoRanges(const HdfsFileDesc* file_desc) WARN_UNUSED_RESULT { return AddDiskIoRanges(file_desc->splits, 1); } @@ -342,21 +343,22 @@ class HdfsScanNodeBase : public ScanNode { std::unordered_set<int64_t> partition_ids_; /// File path => file descriptor (which includes the file's splits) - typedef std::map<std::string, HdfsFileDesc*> FileDescMap; + typedef std::unordered_map<std::string, HdfsFileDesc*> FileDescMap; FileDescMap file_descs_; /// File format => file descriptors. - typedef std::map<THdfsFileFormat::type, std::vector<HdfsFileDesc*>> FileFormatsMap; + typedef std::map<THdfsFileFormat::type, std::vector<HdfsFileDesc*>> + FileFormatsMap; FileFormatsMap per_type_files_; /// Scanner specific per file metadata (e.g. header information) and associated lock. /// TODO: Remove this lock when removing the legacy scanners and scan nodes. boost::mutex metadata_lock_; - std::map<std::string, void*> per_file_metadata_; + std::unordered_map<std::string, void*> per_file_metadata_; /// Conjuncts for each materialized tuple (top-level row batch tuples and collection /// item tuples). Includes a copy of ExecNode.conjuncts_. - typedef std::map<TupleId, std::vector<ScalarExpr*>> ConjunctsMap; + typedef std::unordered_map<TupleId, std::vector<ScalarExpr*>> ConjunctsMap; ConjunctsMap conjuncts_map_; ConjunctEvaluatorsMap conjunct_evals_map_; @@ -372,7 +374,7 @@ class HdfsScanNodeBase : public ScanNode { AtomicInt32 num_unqueued_files_; /// Per scanner type codegen'd fn. - typedef std::map<THdfsFileFormat::type, void*> CodegendFnMap; + typedef boost::unordered_map<THdfsFileFormat::type, void*> CodegendFnMap; CodegendFnMap codegend_fn_map_; /// Maps from a slot's path to its index into materialized_slots_. @@ -465,12 +467,13 @@ class HdfsScanNodeBase : public ScanNode { /// Performs dynamic partition pruning, i.e., applies runtime filters to files, and /// issues initial ranges for all file types. Waits for runtime filters if necessary. /// Only valid to call if !initial_ranges_issued_. Sets initial_ranges_issued_ to true. - Status IssueInitialScanRanges(RuntimeState* state); + Status IssueInitialScanRanges(RuntimeState* state) WARN_UNUSED_RESULT; /// Create and open new scanner for this partition type. /// If the scanner is successfully created and opened, it is returned in 'scanner'. Status CreateAndOpenScanner(HdfsPartitionDescriptor* partition, - ScannerContext* context, boost::scoped_ptr<HdfsScanner>* scanner); + ScannerContext* context, boost::scoped_ptr<HdfsScanner>* scanner) + WARN_UNUSED_RESULT; /// Recursively initializes all NULL collection slots to an empty CollectionValue in /// addition to maintaining the null bit. Hack to allow UnnestNode to project out @@ -504,7 +507,7 @@ class HdfsScanNodeBase : public ScanNode { /// Calls ExecNode::ExecDebugAction() with 'phase'. Returns the status based on the /// debug action specified for the query. - Status ScanNodeDebugAction(TExecNodePhase::type phase); + Status ScanNodeDebugAction(TExecNodePhase::type phase) WARN_UNUSED_RESULT; }; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/hdfs-scanner.h ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h index 03ca624..e4a1192 100644 --- a/be/src/exec/hdfs-scanner.h +++ b/be/src/exec/hdfs-scanner.h @@ -238,7 +238,7 @@ class HdfsScanner { /// descs) has a template tuple, or NULL if there are no partition key or default slots. /// Template tuples are computed once for each file and are allocated from /// template_tuple_pool_. - std::map<const TupleDescriptor*, Tuple*> template_tuple_map_; + std::unordered_map<const TupleDescriptor*, Tuple*> template_tuple_map_; /// Convenience variable set to the top-level template tuple /// (i.e. template_tuple_map_[scan_node_->tuple_desc()]). http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/parquet-column-readers.h ---------------------------------------------------------------------- diff --git a/be/src/exec/parquet-column-readers.h b/be/src/exec/parquet-column-readers.h index 593699d..493997a 100644 --- a/be/src/exec/parquet-column-readers.h +++ b/be/src/exec/parquet-column-readers.h @@ -278,7 +278,7 @@ class ParquetColumnReader { def_level_(HdfsParquetScanner::INVALID_LEVEL), max_def_level_(node_.max_def_level), tuple_offset_(slot_desc == NULL ? -1 : slot_desc->tuple_offset()), - null_indicator_offset_(slot_desc == NULL ? NullIndicatorOffset(-1, -1) : + null_indicator_offset_(slot_desc == NULL ? NullIndicatorOffset() : slot_desc->null_indicator_offset()) { DCHECK_GE(node_.max_rep_level, 0); DCHECK_LE(node_.max_rep_level, std::numeric_limits<int16_t>::max()); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/runtime/descriptors.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index bdb8f8c..572bfda 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -90,7 +90,7 @@ struct NullIndicatorOffset { int byte_offset; uint8_t bit_mask; /// to extract null indicator - NullIndicatorOffset(int byte_offset, int bit_offset) + NullIndicatorOffset(int byte_offset = 0, int bit_offset = -1) : byte_offset(byte_offset), bit_mask(bit_offset == -1 ? 0 : 1 << bit_offset) { }
