IMPALA-3220: Skip logging empty ScannerContext's stream in parse error.

It's possible for a scanner thread to run to completion and call
ScannerContext::ReleaseCompletedResources() only to pick up a cancellation
or a bad status from the the runtime state in CommitRows(), both of which
are scan node wide's states instead of per-thread state. In this case, the
streams in the scanner context may have been removed already so it's unsafe
to access them when logging parse error. Instead, check if the streams still
exist in the scanner context before using them in logging.

Change-Id: Ic8e7ab10e62fff755a0acb9c5649d6a062217045
Reviewed-on: http://gerrit.cloudera.org:8080/2605
Reviewed-by: Michael Ho <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/077d0fab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/077d0fab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/077d0fab

Branch: refs/heads/master
Commit: 077d0fab112ec16d416571898adc1c3b12165944
Parents: 10e7de7
Author: Michael Ho <[email protected]>
Authored: Mon Mar 21 14:36:07 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Thu Mar 24 04:54:07 2016 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scan-node.cc | 10 ++++++----
 be/src/exec/scanner-context.h |  4 ++++
 2 files changed, 10 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077d0fab/be/src/exec/hdfs-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node.cc b/be/src/exec/hdfs-scan-node.cc
index 017c5f0..c43bdcb 100644
--- a/be/src/exec/hdfs-scan-node.cc
+++ b/be/src/exec/hdfs-scan-node.cc
@@ -1192,10 +1192,12 @@ Status HdfsScanNode::ProcessSplit(const 
vector<FilterContext>& filter_ctxs,
     ss << "Scan node (id=" << id() << ") ran into a parse error for scan range 
"
        << scan_range->file() << "(" << scan_range->offset() << ":"
        << scan_range->len() << ").";
-    if (partition->file_format() != THdfsFileFormat::PARQUET) {
-      // Parquet doesn't read the range end to end so the current offset isn't 
useful.
-      // TODO: make sure the parquet reader is outputting as much diagnostic
-      // information as possible.
+    // Parquet doesn't read the range end to end so the current offset isn't 
useful.
+    // TODO: make sure the parquet reader is outputting as much diagnostic
+    // information as possible.
+    // The error status may not necessarily be related to this scanner thread 
so this
+    // thread may have run to completion and closed all its streams already.
+    if (partition->file_format() != THdfsFileFormat::PARQUET && 
context.HasStream()) {
       ScannerContext::Stream* stream = context.GetStream();
       ss << " Processed " << stream->total_bytes_returned() << " bytes.";
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077d0fab/be/src/exec/scanner-context.h
----------------------------------------------------------------------
diff --git a/be/src/exec/scanner-context.h b/be/src/exec/scanner-context.h
index b02d301..22e0928 100644
--- a/be/src/exec/scanner-context.h
+++ b/be/src/exec/scanner-context.h
@@ -244,6 +244,10 @@ class ScannerContext {
     Status ReportInvalidRead(int64_t length);
   };
 
+  bool HasStream() {
+    return !streams_.empty();
+  }
+
   Stream* GetStream(int idx = 0) {
     DCHECK_GE(idx, 0);
     DCHECK_LT(idx, streams_.size());

Reply via email to