IMPALA-5548 Fix some minor nits with HDFS parquet column readers

Replace some std::map instances with unordered maps.  std::map is
unnecessary in many cases where an unordered map should suffice, and
in almost all circumstances, perform better.  Also discovered was a
slightly dangerous initialization of an empty NullOffsetIndicator,
which could conceivably result in undefined behavior by writing
arr[ofs] |= b, where ofs was mistakenly initialized to -1 (and b to 0,
so such behavior may not be detected, but could cause a crash by
underrunning a buffer).  Also add warn unused to status results while
we are changing this.

Testing: Running exhaustive tests against a Jenkins build.

Change-Id: I08e653cae6f2188599f4a23e4f44692166d9c119
Reviewed-on: http://gerrit.cloudera.org:8080/7240
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/7135d8cc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/7135d8cc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/7135d8cc

Branch: refs/heads/master
Commit: 7135d8cc913984a47fd9e9d1dae655ad464a3dc4
Parents: 29d2587
Author: Zach Amsden <[email protected]>
Authored: Thu Jun 15 17:02:00 2017 +0000
Committer: Impala Public Jenkins <[email protected]>
Committed: Thu Jun 22 02:09:16 2017 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scan-node-base.cc   |  2 +-
 be/src/exec/hdfs-scan-node-base.h    | 33 +++++++++++++++++--------------
 be/src/exec/hdfs-scanner.h           |  2 +-
 be/src/exec/parquet-column-readers.h |  2 +-
 be/src/runtime/descriptors.h         |  2 +-
 5 files changed, 22 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/hdfs-scan-node-base.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.cc 
b/be/src/exec/hdfs-scan-node-base.cc
index 32b7372..7e55418 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -638,7 +638,7 @@ void HdfsScanNodeBase::SetFileMetadata(const string& 
filename, void* metadata) {
 
 void* HdfsScanNodeBase::GetFileMetadata(const string& filename) {
   unique_lock<mutex> l(metadata_lock_);
-  map<string, void*>::iterator it = per_file_metadata_.find(filename);
+  auto it = per_file_metadata_.find(filename);
   if (it == per_file_metadata_.end()) return NULL;
   return it->second;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/hdfs-scan-node-base.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.h 
b/be/src/exec/hdfs-scan-node-base.h
index 071f1b6..affe2a5 100644
--- a/be/src/exec/hdfs-scan-node-base.h
+++ b/be/src/exec/hdfs-scan-node-base.h
@@ -117,11 +117,11 @@ class HdfsScanNodeBase : public ScanNode {
   HdfsScanNodeBase(ObjectPool* pool, const TPlanNode& tnode, const 
DescriptorTbl& descs);
   ~HdfsScanNodeBase();
 
-  virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
-  virtual Status Prepare(RuntimeState* state);
+  virtual Status Init(const TPlanNode& tnode, RuntimeState* state) 
WARN_UNUSED_RESULT;
+  virtual Status Prepare(RuntimeState* state) WARN_UNUSED_RESULT;
   virtual void Codegen(RuntimeState* state);
-  virtual Status Open(RuntimeState* state);
-  virtual Status Reset(RuntimeState* state);
+  virtual Status Open(RuntimeState* state) WARN_UNUSED_RESULT;
+  virtual Status Reset(RuntimeState* state) WARN_UNUSED_RESULT;
   virtual void Close(RuntimeState* state);
 
   /// Returns true if this node uses separate threads for scanners that append 
RowBatches
@@ -155,7 +155,8 @@ class HdfsScanNodeBase : public ScanNode {
   int skip_header_line_count() const { return skip_header_line_count_; }
   DiskIoRequestContext* reader_context() { return reader_context_; }
 
-  typedef std::map<TupleId, std::vector<ScalarExprEvaluator*>> 
ConjunctEvaluatorsMap;
+  typedef std::unordered_map<TupleId, std::vector<ScalarExprEvaluator*>>
+    ConjunctEvaluatorsMap;
   const ConjunctEvaluatorsMap& conjuncts_map() const { return 
conjunct_evals_map_; }
 
   /// Slot Id => Dictionary Filter eligible conjuncts for that slot
@@ -216,11 +217,11 @@ class HdfsScanNodeBase : public ScanNode {
   /// threads needed to process those in 'ranges'.
   /// Can be overridden to add scan-node specific actions like starting 
scanner threads.
   virtual Status AddDiskIoRanges(const std::vector<DiskIoMgr::ScanRange*>& 
ranges,
-      int num_files_queued);
+      int num_files_queued) WARN_UNUSED_RESULT;
 
   /// Adds all splits for file_desc to the io mgr queue and indicates one file 
has
   /// been added completely.
-  inline Status AddDiskIoRanges(const HdfsFileDesc* file_desc) {
+  inline Status AddDiskIoRanges(const HdfsFileDesc* file_desc) 
WARN_UNUSED_RESULT {
     return AddDiskIoRanges(file_desc->splits, 1);
   }
 
@@ -342,21 +343,22 @@ class HdfsScanNodeBase : public ScanNode {
   std::unordered_set<int64_t> partition_ids_;
 
   /// File path => file descriptor (which includes the file's splits)
-  typedef std::map<std::string, HdfsFileDesc*> FileDescMap;
+  typedef std::unordered_map<std::string, HdfsFileDesc*> FileDescMap;
   FileDescMap file_descs_;
 
   /// File format => file descriptors.
-  typedef std::map<THdfsFileFormat::type, std::vector<HdfsFileDesc*>> 
FileFormatsMap;
+  typedef std::map<THdfsFileFormat::type, std::vector<HdfsFileDesc*>>
+    FileFormatsMap;
   FileFormatsMap per_type_files_;
 
   /// Scanner specific per file metadata (e.g. header information) and 
associated lock.
   /// TODO: Remove this lock when removing the legacy scanners and scan nodes.
   boost::mutex metadata_lock_;
-  std::map<std::string, void*> per_file_metadata_;
+  std::unordered_map<std::string, void*> per_file_metadata_;
 
   /// Conjuncts for each materialized tuple (top-level row batch tuples and 
collection
   /// item tuples). Includes a copy of ExecNode.conjuncts_.
-  typedef std::map<TupleId, std::vector<ScalarExpr*>> ConjunctsMap;
+  typedef std::unordered_map<TupleId, std::vector<ScalarExpr*>> ConjunctsMap;
   ConjunctsMap conjuncts_map_;
   ConjunctEvaluatorsMap conjunct_evals_map_;
 
@@ -372,7 +374,7 @@ class HdfsScanNodeBase : public ScanNode {
   AtomicInt32 num_unqueued_files_;
 
   /// Per scanner type codegen'd fn.
-  typedef std::map<THdfsFileFormat::type, void*> CodegendFnMap;
+  typedef boost::unordered_map<THdfsFileFormat::type, void*> CodegendFnMap;
   CodegendFnMap codegend_fn_map_;
 
   /// Maps from a slot's path to its index into materialized_slots_.
@@ -465,12 +467,13 @@ class HdfsScanNodeBase : public ScanNode {
   /// Performs dynamic partition pruning, i.e., applies runtime filters to 
files, and
   /// issues initial ranges for all file types. Waits for runtime filters if 
necessary.
   /// Only valid to call if !initial_ranges_issued_. Sets 
initial_ranges_issued_ to true.
-  Status IssueInitialScanRanges(RuntimeState* state);
+  Status IssueInitialScanRanges(RuntimeState* state) WARN_UNUSED_RESULT;
 
   /// Create and open new scanner for this partition type.
   /// If the scanner is successfully created and opened, it is returned in 
'scanner'.
   Status CreateAndOpenScanner(HdfsPartitionDescriptor* partition,
-      ScannerContext* context, boost::scoped_ptr<HdfsScanner>* scanner);
+      ScannerContext* context, boost::scoped_ptr<HdfsScanner>* scanner)
+      WARN_UNUSED_RESULT;
 
   /// Recursively initializes all NULL collection slots to an empty 
CollectionValue in
   /// addition to maintaining the null bit. Hack to allow UnnestNode to 
project out
@@ -504,7 +507,7 @@ class HdfsScanNodeBase : public ScanNode {
 
   /// Calls ExecNode::ExecDebugAction() with 'phase'. Returns the status based 
on the
   /// debug action specified for the query.
-  Status ScanNodeDebugAction(TExecNodePhase::type phase);
+  Status ScanNodeDebugAction(TExecNodePhase::type phase) WARN_UNUSED_RESULT;
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/hdfs-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h
index 03ca624..e4a1192 100644
--- a/be/src/exec/hdfs-scanner.h
+++ b/be/src/exec/hdfs-scanner.h
@@ -238,7 +238,7 @@ class HdfsScanner {
   /// descs) has a template tuple, or NULL if there are no partition key or 
default slots.
   /// Template tuples are computed once for each file and are allocated from
   /// template_tuple_pool_.
-  std::map<const TupleDescriptor*, Tuple*> template_tuple_map_;
+  std::unordered_map<const TupleDescriptor*, Tuple*> template_tuple_map_;
 
   /// Convenience variable set to the top-level template tuple
   /// (i.e. template_tuple_map_[scan_node_->tuple_desc()]).

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/exec/parquet-column-readers.h
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-column-readers.h 
b/be/src/exec/parquet-column-readers.h
index 593699d..493997a 100644
--- a/be/src/exec/parquet-column-readers.h
+++ b/be/src/exec/parquet-column-readers.h
@@ -278,7 +278,7 @@ class ParquetColumnReader {
       def_level_(HdfsParquetScanner::INVALID_LEVEL),
       max_def_level_(node_.max_def_level),
       tuple_offset_(slot_desc == NULL ? -1 : slot_desc->tuple_offset()),
-      null_indicator_offset_(slot_desc == NULL ? NullIndicatorOffset(-1, -1) :
+      null_indicator_offset_(slot_desc == NULL ? NullIndicatorOffset() :
           slot_desc->null_indicator_offset()) {
     DCHECK_GE(node_.max_rep_level, 0);
     DCHECK_LE(node_.max_rep_level, std::numeric_limits<int16_t>::max());

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7135d8cc/be/src/runtime/descriptors.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index bdb8f8c..572bfda 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -90,7 +90,7 @@ struct NullIndicatorOffset {
   int byte_offset;
   uint8_t bit_mask;  /// to extract null indicator
 
-  NullIndicatorOffset(int byte_offset, int bit_offset)
+  NullIndicatorOffset(int byte_offset = 0, int bit_offset = -1)
     : byte_offset(byte_offset),
       bit_mask(bit_offset == -1 ? 0 : 1 << bit_offset) {
   }

Reply via email to