IMPALA-3220: Skip logging empty ScannerContext's stream in parse error. It's possible for a scanner thread to run to completion and call ScannerContext::ReleaseCompletedResources() only to pick up a cancellation or a bad status from the the runtime state in CommitRows(), both of which are scan node wide's states instead of per-thread state. In this case, the streams in the scanner context may have been removed already so it's unsafe to access them when logging parse error. Instead, check if the streams still exist in the scanner context before using them in logging.
Change-Id: Ic8e7ab10e62fff755a0acb9c5649d6a062217045 Reviewed-on: http://gerrit.cloudera.org:8080/2605 Reviewed-by: Michael Ho <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/077d0fab Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/077d0fab Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/077d0fab Branch: refs/heads/master Commit: 077d0fab112ec16d416571898adc1c3b12165944 Parents: 10e7de7 Author: Michael Ho <[email protected]> Authored: Mon Mar 21 14:36:07 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Thu Mar 24 04:54:07 2016 +0000 ---------------------------------------------------------------------- be/src/exec/hdfs-scan-node.cc | 10 ++++++---- be/src/exec/scanner-context.h | 4 ++++ 2 files changed, 10 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077d0fab/be/src/exec/hdfs-scan-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scan-node.cc b/be/src/exec/hdfs-scan-node.cc index 017c5f0..c43bdcb 100644 --- a/be/src/exec/hdfs-scan-node.cc +++ b/be/src/exec/hdfs-scan-node.cc @@ -1192,10 +1192,12 @@ Status HdfsScanNode::ProcessSplit(const vector<FilterContext>& filter_ctxs, ss << "Scan node (id=" << id() << ") ran into a parse error for scan range " << scan_range->file() << "(" << scan_range->offset() << ":" << scan_range->len() << ")."; - if (partition->file_format() != THdfsFileFormat::PARQUET) { - // Parquet doesn't read the range end to end so the current offset isn't useful. - // TODO: make sure the parquet reader is outputting as much diagnostic - // information as possible. + // Parquet doesn't read the range end to end so the current offset isn't useful. + // TODO: make sure the parquet reader is outputting as much diagnostic + // information as possible. + // The error status may not necessarily be related to this scanner thread so this + // thread may have run to completion and closed all its streams already. + if (partition->file_format() != THdfsFileFormat::PARQUET && context.HasStream()) { ScannerContext::Stream* stream = context.GetStream(); ss << " Processed " << stream->total_bytes_returned() << " bytes."; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077d0fab/be/src/exec/scanner-context.h ---------------------------------------------------------------------- diff --git a/be/src/exec/scanner-context.h b/be/src/exec/scanner-context.h index b02d301..22e0928 100644 --- a/be/src/exec/scanner-context.h +++ b/be/src/exec/scanner-context.h @@ -244,6 +244,10 @@ class ScannerContext { Status ReportInvalidRead(int64_t length); }; + bool HasStream() { + return !streams_.empty(); + } + Stream* GetStream(int idx = 0) { DCHECK_GE(idx, 0); DCHECK_LT(idx, streams_.size());
