[3/3] incubator-impala git commit: IMPALA-(3895, 3859): Don't log file data on parse errors

mjacobs Thu, 25 Aug 2016 07:06:00 -0700

IMPALA-(3895,3859): Don't log file data on parse errors

Logging file or table data is a bad idea, and doing it by default is
particularly bad. This patch changes HdfsScanNode::LogRowParseError() to
log a file and offset only.


Testing: See rewritten tests.

To support testing this change, we also fix IMPALA-3895, by introducing
a canonical string __HDFS_FILENAME__ that all Hadoop filenames in the ERROR
output are replaced with before comparing with the expected
results. This fixes a number of issues with the old way of matching
filenames which purported to be a regex, but really wasn't. In
particular, we can now match the rest of an ERROR line after the
filename, which was not possible before.

In some cases, we don't want to substitute filenames because the ERROR
output is looking for a very specific output. In that case we can write:

$NAMENODE/<filename>

and this patch will not perform _any_ filename substitutions on ERROR
sections that contain the $NAMENODE string.

Finally, this patch fixes a bug where a test that had an ERRORS section
but no RESULTS section would silently pass without testing anything.

Change-Id: I5a604f8784a9ff7b4bf878f82ee7f56697df3272
Reviewed-on: http://gerrit.cloudera.org:8080/4020
Reviewed-by: Henry Robinson <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/34b5f1c4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/34b5f1c4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/34b5f1c4

Branch: refs/heads/master
Commit: 34b5f1c416148f95a34324d66c1ebbf9585d1845
Parents: 480efc9
Author: Henry Robinson <[email protected]>
Authored: Thu Jul 21 14:26:17 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Thu Aug 25 10:20:36 2016 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scanner-ir.cc                  |   4 +-
 be/src/exec/hdfs-scanner.cc                     |  26 +-
 be/src/exec/hdfs-scanner.h                      |  11 +-
 be/src/exec/hdfs-sequence-scanner.cc            |   8 +-
 be/src/exec/hdfs-sequence-scanner.h             |   4 -
 be/src/exec/hdfs-text-scanner.cc                |  24 +-
 be/src/exec/hdfs-text-scanner.h                 |   4 -
 .../queries/DataErrorsTest/avro-errors.test     |  14 +-
 .../DataErrorsTest/hbase-scan-node-errors.test  | 132 +++++-----
 .../hdfs-rcfile-scan-node-errors.test           | 255 +++++++++++--------
 .../DataErrorsTest/hdfs-scan-node-errors.test   | 135 ++++------
 .../hdfs-sequence-scan-errors.test              |   4 +-
 .../QueryTest/parquet-continue-on-error.test    |  12 +-
 .../queries/QueryTest/strict-mode-abort.test    |  12 +-
 .../queries/QueryTest/strict-mode.test          |  31 +--
 tests/common/impala_test_suite.py               |  59 +++--
 tests/common/test_result_verifier.py            |  26 +-
 tests/util/filesystem_utils.py                  |   4 +
 tests/util/hdfs_util.py                         |  35 ++-
 19 files changed, 393 insertions(+), 407 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-scanner-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc
index 098c8e5..2b053ca 100644
--- a/be/src/exec/hdfs-scanner-ir.cc
+++ b/be/src/exec/hdfs-scanner-ir.cc
@@ -64,9 +64,7 @@ int HdfsScanner::WriteAlignedTuples(MemPool* pool, TupleRow* 
tuple_row, int row_
 
     // Report parse errors
     if (UNLIKELY(error_in_row)) {
-      if (!ReportTupleParseError(fields, error, i + row_idx_start)) {
-        return -1;
-      }
+      if (!ReportTupleParseError(fields, error)) return -1;
     }
 
     // Advance to the start of the next tuple

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index b0a30d9..ad3d834 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -619,8 +619,7 @@ Status HdfsScanner::UpdateDecompressor(const string& codec) 
{
   return Status::OK();
 }
 
-bool HdfsScanner::ReportTupleParseError(FieldLocation* fields, uint8_t* errors,
-    int row_idx) {
+bool HdfsScanner::ReportTupleParseError(FieldLocation* fields, uint8_t* 
errors) {
   for (int i = 0; i < scan_node_->materialized_slots().size(); ++i) {
     if (errors[i]) {
       const SlotDescriptor* desc = scan_node_->materialized_slots()[i];
@@ -628,36 +627,25 @@ bool HdfsScanner::ReportTupleParseError(FieldLocation* 
fields, uint8_t* errors,
       errors[i] = false;
     }
   }
-
-  // Call into subclass to log a more accurate error message.
-  if (state_->LogHasSpace()) {
-    stringstream ss;
-    ss << "file: " << stream_->filename() << endl << "record: ";
-    LogRowParseError(row_idx, &ss);
-    state_->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()), 2);
-  }
+  LogRowParseError();
 
   if (state_->abort_on_error()) DCHECK(!parse_status_.ok());
   return parse_status_.ok();
 }
 
-void HdfsScanner::LogRowParseError(int row_idx, stringstream* ss) {
-  // This is only called for text and seq files which should override this 
function.
-  DCHECK(false);
+void HdfsScanner::LogRowParseError() {
+  const string& s = Substitute("Error parsing row: file: $0, before offset: 
$1",
+      stream_->filename(), stream_->file_offset());
+  state_->LogError(ErrorMsg(TErrorCode::GENERAL, s));
 }
 
 void HdfsScanner::ReportColumnParseError(const SlotDescriptor* desc,
     const char* data, int len) {
-  // len < 0 is used to indicate the data contains escape characters.  We 
don't care
-  // about that here and can just output the raw string.
-  if (len < 0) len = -len;
-
   if (state_->LogHasSpace() || state_->abort_on_error()) {
     stringstream ss;
     ss << "Error converting column: "
        << desc->col_pos() - scan_node_->num_partition_keys()
-       << " TO " << desc->type()
-       << " (Data is: " << string(data,len) << ")";
+       << " to " << desc->type();
 
     // When skipping multiple header lines we only try to skip them in the 
first scan
     // range. For subsequent scan ranges, it's impossible to determine how 
many lines

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h
index b7bc89b..6245e5f 100644
--- a/be/src/exec/hdfs-scanner.h
+++ b/be/src/exec/hdfs-scanner.h
@@ -372,23 +372,18 @@ class HdfsScanner {
 
   /// Utility function to report parse errors for each field.
   /// If errors[i] is nonzero, fields[i] had a parse error.
-  /// row_idx is the idx of the row in the current batch that had the parse 
error
   /// Returns false if parsing should be aborted.  In this case parse_status_ 
is set
   /// to the error.
   /// This is called from WriteAlignedTuples.
-  bool ReportTupleParseError(FieldLocation* fields, uint8_t* errors, int 
row_idx);
+  bool ReportTupleParseError(FieldLocation* fields, uint8_t* errors);
 
   /// Triggers debug action of the scan node. This is currently used by 
parquet column
   /// readers to exercise various failure paths in parquet scanner. Returns 
the status
   /// returned by the scan node's TriggerDebugAction().
   Status TriggerDebugAction() { return scan_node_->TriggerDebugAction(); }
 
-  /// Utility function to append an error message for an invalid row.  This is 
called
-  /// from ReportTupleParseError()
-  /// row_idx is the index of the row in the current batch.  Subclasses should 
override
-  /// this function (i.e. text needs to join boundary rows).  Since this is 
only in the
-  /// error path, vtable overhead is acceptable.
-  virtual void LogRowParseError(int row_idx, std::stringstream*);
+  /// Utility function to append an error message for an invalid row.
+  void LogRowParseError();
 
   /// Writes out all slots for 'tuple' from 'fields'. 'fields' must be aligned
   /// to the start of the tuple (e.g. fields[0] maps to slots[0]).

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-sequence-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.cc 
b/be/src/exec/hdfs-sequence-scanner.cc
index b5542f6..ec52901 100644
--- a/be/src/exec/hdfs-sequence-scanner.cc
+++ b/be/src/exec/hdfs-sequence-scanner.cc
@@ -332,7 +332,7 @@ Status HdfsSequenceScanner::ProcessRange() {
           template_tuple_, &errors[0], &error_in_row);
 
       if (UNLIKELY(error_in_row)) {
-        ReportTupleParseError(&field_locations_[0], errors, 0);
+        ReportTupleParseError(&field_locations_[0], errors);
         RETURN_IF_ERROR(parse_status_);
       }
     } else {
@@ -505,9 +505,3 @@ Status HdfsSequenceScanner::ReadCompressedBlock() {
 
   return Status::OK();
 }
-
-void HdfsSequenceScanner::LogRowParseError(int row_idx, stringstream* ss) {
-  DCHECK_LT(row_idx, record_locations_.size());
-  *ss << string(reinterpret_cast<const 
char*>(record_locations_[row_idx].record),
-                  record_locations_[row_idx].len);
-}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-sequence-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.h 
b/be/src/exec/hdfs-sequence-scanner.h
index 1246b5a..e2dcee9 100644
--- a/be/src/exec/hdfs-sequence-scanner.h
+++ b/be/src/exec/hdfs-sequence-scanner.h
@@ -219,10 +219,6 @@ class HdfsSequenceScanner : public BaseSequenceScanner {
   ///   record_len: length of the record
   Status GetRecord(uint8_t** record_ptr, int64_t *record_len);
 
-  /// Appends the current file and line to the RuntimeState's error log.
-  /// row_idx is 0-based (in current batch) where the parse error occurred.
-  virtual void LogRowParseError(int row_idx, std::stringstream*);
-
   /// Helper class for picking fields and rows from delimited text.
   boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
   std::vector<FieldLocation> field_locations_;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-text-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.cc b/be/src/exec/hdfs-text-scanner.cc
index 4be4b02..f1c80b0 100644
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -722,25 +722,6 @@ Status HdfsTextScanner::Open(ScannerContext* context) {
   return Status::OK();
 }
 
-void HdfsTextScanner::LogRowParseError(int row_idx, stringstream* ss) {
-  DCHECK_LT(row_idx, row_end_locations_.size());
-  char* row_end = row_end_locations_[row_idx];
-  char* row_start;
-  if (row_idx == 0) {
-    row_start = batch_start_ptr_;
-  } else {
-    // Row start at 1 past the row end (i.e. the row delimiter) for the 
previous row
-    row_start = row_end_locations_[row_idx - 1] + 1;
-  }
-
-  if (!boundary_row_.IsEmpty()) {
-    // Log the beginning of the line from the previous file buffer(s).
-    *ss << string(boundary_row_.buffer(), boundary_row_.len());
-  }
-  // Log the erroneous line (or the suffix of a line if 
!boundary_line.empty()).
-  *ss << string(row_start, row_end - row_start);
-}
-
 // This function writes fields in 'field_locations_' to the row_batch.  This 
function
 // deals with tuples that straddle batches.  There are two cases:
 // 1. There is already a partial tuple in flight from the previous time around.
@@ -776,10 +757,7 @@ int HdfsTextScanner::WriteFields(MemPool* pool, TupleRow* 
tuple_row,
         if (state_->abort_on_error()) {
           parse_status_ = Status(state_->ErrorLog());
         } else {
-          stringstream ss;
-          ss << "file: " << stream_->filename() << endl << "record: ";
-          LogRowParseError(0, &ss);
-          state_->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()));
+          LogRowParseError();
         }
         if (!parse_status_.ok()) return 0;
         error_in_row_ = false;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/be/src/exec/hdfs-text-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.h b/be/src/exec/hdfs-text-scanner.h
index b355ac8..937626f 100644
--- a/be/src/exec/hdfs-text-scanner.h
+++ b/be/src/exec/hdfs-text-scanner.h
@@ -165,10 +165,6 @@ class HdfsTextScanner : public HdfsScanner {
   /// the boundary pool.
   void WritePartialTuple(FieldLocation*, int num_fields, bool copy_strings);
 
-  /// Appends the current file and line to the RuntimeState's error log.
-  /// row_idx is 0-based (in current batch) where the parse error occured.
-  virtual void LogRowParseError(int row_idx, std::stringstream*);
-
   /// Mem pool for boundary_row_ and boundary_column_.
   boost::scoped_ptr<MemPool> boundary_pool_;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test 
b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
index aaf59e9..6ca2af6 100644
--- 
a/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
@@ -7,11 +7,11 @@ select * from bad_avro_snap_strings
 ---- TYPES
 string
 ---- ERRORS
-row_regex: .*Problem parsing file.*
-row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/truncated_string.avro' 
is corrupt: truncated data block at offset 155.*
-row_regex: .*File 
'.*/bad_avro_snap_strings_avro_snap/negative_string_len.avro' is corrupt: 
invalid length -7 at offset 164.*
-row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/invalid_union.avro' is 
corrupt: invalid union value 4 at offset 174.*
-row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/invalid_union.avro' is 
corrupt: invalid encoded integer at offset 191.*
+row_regex: .*Problem parsing file $NAMENODE/.*
+File 
'$NAMENODE/test-warehouse/bad_avro_snap_strings_avro_snap/truncated_string.avro'
 is corrupt: truncated data block at offset 155
+File 
'$NAMENODE/test-warehouse/bad_avro_snap_strings_avro_snap/negative_string_len.avro'
 is corrupt: invalid length -7 at offset 164
+File 
'$NAMENODE/test-warehouse/bad_avro_snap_strings_avro_snap/invalid_union.avro' 
is corrupt: invalid union value 4 at offset 174
+File 
'$NAMENODE/test-warehouse/bad_avro_snap_strings_avro_snap/invalid_union.avro' 
is corrupt: invalid encoded integer at offset 191
 ====
 ---- QUERY
 # Read from the corrupt files. We may get partial results.
@@ -21,6 +21,6 @@ select * from bad_avro_snap_floats
 ---- TYPES
 float
 ---- ERRORS
-row_regex: .*Problem parsing file.*
-row_regex: .*File '.*/bad_avro_snap_floats_avro_snap/truncated_float.avro' is 
corrupt: truncated data block at offset 159.*
+Problem parsing file 
$NAMENODE/test-warehouse/bad_avro_snap_floats_avro_snap/truncated_float.avro at 
159
+File 
'$NAMENODE/test-warehouse/bad_avro_snap_floats_avro_snap/truncated_float.avro' 
is corrupt: truncated data block at offset 159
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test
 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test
index 45a8beb..7934b17 100644
--- 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test
@@ -2,86 +2,86 @@
 ---- QUERY
 select * from hbasealltypeserror
 ---- ERRORS
-Error converting column d:timestamp_col: '0' TO TIMESTAMP
+Error converting column d:timestamp_col: '0' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 0
-Error converting column d:bool_col: 'errfalse' TO BOOL
+Error converting column d:bool_col: 'errfalse' to BOOL
 hbase table: functional_hbase.hbasealltypeserror
 row key: 1
-Error converting column d:double_col: 'xyz30.300000' TO DOUBLE
-Error converting column d:float_col: 'xyz3.000000' TO FLOAT
+Error converting column d:double_col: 'xyz30.300000' to DOUBLE
+Error converting column d:float_col: 'xyz3.000000' to FLOAT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 13
-Error converting column d:timestamp_col: '0009-01-01 00:00:00' TO TIMESTAMP
-Error converting column d:tinyint_col: 'xyz5' TO TINYINT
+Error converting column d:timestamp_col: '0009-01-01 00:00:00' to TIMESTAMP
+Error converting column d:tinyint_col: 'xyz5' to TINYINT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 15
-Error converting column d:timestamp_col: '0' TO TIMESTAMP
+Error converting column d:timestamp_col: '0' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 16
-Error converting column d:double_col: 'xyz70.700000' TO DOUBLE
+Error converting column d:double_col: 'xyz70.700000' to DOUBLE
 hbase table: functional_hbase.hbasealltypeserror
 row key: 17
-Error converting column d:timestamp_col: '1999-10-10 90:10:10' TO TIMESTAMP
-Error converting column d:tinyint_col: 'err2' TO TINYINT
+Error converting column d:timestamp_col: '1999-10-10 90:10:10' to TIMESTAMP
+Error converting column d:tinyint_col: 'err2' to TINYINT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 2
-Error converting column d:timestamp_col: '2020-20-10 10:10:10.123' TO TIMESTAMP
+Error converting column d:timestamp_col: '2020-20-10 10:10:10.123' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 21
-Error converting column d:timestamp_col: '2020-10-40 10:10:10.123' TO TIMESTAMP
+Error converting column d:timestamp_col: '2020-10-40 10:10:10.123' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 22
-Error converting column d:smallint_col: 'abc3' TO SMALLINT
-Error converting column d:timestamp_col: '2020-10-10 60:10:10.123' TO TIMESTAMP
+Error converting column d:smallint_col: 'abc3' to SMALLINT
+Error converting column d:timestamp_col: '2020-10-10 60:10:10.123' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 23
-Error converting column d:timestamp_col: '2020-10-10 10:70:10.123' TO TIMESTAMP
+Error converting column d:timestamp_col: '2020-10-10 10:70:10.123' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 24
-Error converting column d:int_col: 'abc5' TO INT
+Error converting column d:int_col: 'abc5' to INT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 25
-Error converting column d:tinyint_col: 'abc7' TO TINYINT
+Error converting column d:tinyint_col: 'abc7' to TINYINT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 27
-Error converting column d:int_col: 'abc9' TO INT
+Error converting column d:int_col: 'abc9' to INT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 29
-Error converting column d:smallint_col: 'err3' TO SMALLINT
-Error converting column d:timestamp_col: '2002-14-10 00:00:00' TO TIMESTAMP
+Error converting column d:smallint_col: 'err3' to SMALLINT
+Error converting column d:timestamp_col: '2002-14-10 00:00:00' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserror
 row key: 3
-Error converting column d:bigint_col: 'err300' TO BIGINT
-Error converting column d:bool_col: 't\rue' TO BOOL
-Error converting column d:double_col: 'err300.900000' TO DOUBLE
-Error converting column d:float_col: 'err30..000000' TO FLOAT
-Error converting column d:int_col: 'err30' TO INT
-Error converting column d:smallint_col: 'err30' TO SMALLINT
-Error converting column d:timestamp_col: '0000-01-01 00:00:00' TO TIMESTAMP
-Error converting column d:tinyint_col: 'err30' TO TINYINT
+Error converting column d:bigint_col: 'err300' to BIGINT
+Error converting column d:bool_col: 't\rue' to BOOL
+Error converting column d:double_col: 'err300.900000' to DOUBLE
+Error converting column d:float_col: 'err30..000000' to FLOAT
+Error converting column d:int_col: 'err30' to INT
+Error converting column d:smallint_col: 'err30' to SMALLINT
+Error converting column d:timestamp_col: '0000-01-01 00:00:00' to TIMESTAMP
+Error converting column d:tinyint_col: 'err30' to TINYINT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 30
-Error converting column d:int_col: 'err4' TO INT
+Error converting column d:int_col: 'err4' to INT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 4
-Error converting column d:bigint_col: 'err50' TO BIGINT
+Error converting column d:bigint_col: 'err50' to BIGINT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 5
-Error converting column d:float_col: 'err6.000000' TO FLOAT
+Error converting column d:float_col: 'err6.000000' to FLOAT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 6
-Error converting column d:double_col: 'err70.700000' TO DOUBLE
+Error converting column d:double_col: 'err70.700000' to DOUBLE
 hbase table: functional_hbase.hbasealltypeserror
 row key: 7
-Error converting column d:bigint_col: 'err90' TO BIGINT
-Error converting column d:bool_col: 'errtrue' TO BOOL
-Error converting column d:double_col: 'err90.900000' TO DOUBLE
-Error converting column d:float_col: 'err9.000000' TO FLOAT
-Error converting column d:int_col: 'err9' TO INT
-Error converting column d:smallint_col: 'err9' TO SMALLINT
-Error converting column d:timestamp_col: '0000-01-01 00:00:00' TO TIMESTAMP
-Error converting column d:tinyint_col: 'err9' TO TINYINT
+Error converting column d:bigint_col: 'err90' to BIGINT
+Error converting column d:bool_col: 'errtrue' to BOOL
+Error converting column d:double_col: 'err90.900000' to DOUBLE
+Error converting column d:float_col: 'err9.000000' to FLOAT
+Error converting column d:int_col: 'err9' to INT
+Error converting column d:smallint_col: 'err9' to SMALLINT
+Error converting column d:timestamp_col: '0000-01-01 00:00:00' to TIMESTAMP
+Error converting column d:tinyint_col: 'err9' to TINYINT
 hbase table: functional_hbase.hbasealltypeserror
 row key: 9
 ---- FILEERRORS
@@ -124,64 +124,64 @@ int, bigint, boolean, string, double, float, int, 
smallint, string, timestamp, t
 ---- QUERY
 select * from hbasealltypeserrornonulls
 ---- ERRORS
-Error converting column d:timestamp_col: '123456' TO TIMESTAMP
+Error converting column d:timestamp_col: '123456' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 0
-Error converting column d:bool_col: 'errfalse' TO BOOL
-Error converting column d:timestamp_col: '1990-00-01 10:10:10' TO TIMESTAMP
+Error converting column d:bool_col: 'errfalse' to BOOL
+Error converting column d:timestamp_col: '1990-00-01 10:10:10' to TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 1
-Error converting column d:double_col: 'xyz30.300000' TO DOUBLE
-Error converting column d:float_col: 'xyz3.000000' TO FLOAT
+Error converting column d:double_col: 'xyz30.300000' to DOUBLE
+Error converting column d:float_col: 'xyz3.000000' to FLOAT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 13
-Error converting column d:tinyint_col: 'xyz5' TO TINYINT
+Error converting column d:tinyint_col: 'xyz5' to TINYINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 15
-Error converting column d:double_col: 'xyz70.700000' TO DOUBLE
+Error converting column d:double_col: 'xyz70.700000' to DOUBLE
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 17
-Error converting column d:tinyint_col: 'err2' TO TINYINT
+Error converting column d:tinyint_col: 'err2' to TINYINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 2
-Error converting column d:smallint_col: 'abc3' TO SMALLINT
+Error converting column d:smallint_col: 'abc3' to SMALLINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 23
-Error converting column d:int_col: 'abc5' TO INT
-Error converting column d:timestamp_col: '2012-Mar-22 11:20:01.123' TO 
TIMESTAMP
+Error converting column d:int_col: 'abc5' to INT
+Error converting column d:timestamp_col: '2012-Mar-22 11:20:01.123' to 
TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 25
-Error converting column d:tinyint_col: 'abc7' TO TINYINT
+Error converting column d:tinyint_col: 'abc7' to TINYINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 27
-Error converting column d:timestamp_col: '11:20:01.123 2012-03-22 ' TO 
TIMESTAMP
+Error converting column d:timestamp_col: '11:20:01.123 2012-03-22 ' to 
TIMESTAMP
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 28
-Error converting column d:int_col: 'abc9' TO INT
+Error converting column d:int_col: 'abc9' to INT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 29
-Error converting column d:smallint_col: 'err3' TO SMALLINT
+Error converting column d:smallint_col: 'err3' to SMALLINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 3
-Error converting column d:int_col: 'err4' TO INT
+Error converting column d:int_col: 'err4' to INT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 4
-Error converting column d:bigint_col: 'err50' TO BIGINT
+Error converting column d:bigint_col: 'err50' to BIGINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 5
-Error converting column d:float_col: 'err6.000000' TO FLOAT
+Error converting column d:float_col: 'err6.000000' to FLOAT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 6
-Error converting column d:double_col: 'err70.700000' TO DOUBLE
+Error converting column d:double_col: 'err70.700000' to DOUBLE
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 7
-Error converting column d:bigint_col: 'err90' TO BIGINT
-Error converting column d:bool_col: 'errtrue' TO BOOL
-Error converting column d:double_col: 'err90.900000' TO DOUBLE
-Error converting column d:float_col: 'err9.000000' TO FLOAT
-Error converting column d:int_col: 'err9' TO INT
-Error converting column d:smallint_col: 'err9' TO SMALLINT
-Error converting column d:tinyint_col: 'err9' TO TINYINT
+Error converting column d:bigint_col: 'err90' to BIGINT
+Error converting column d:bool_col: 'errtrue' to BOOL
+Error converting column d:double_col: 'err90.900000' to DOUBLE
+Error converting column d:float_col: 'err9.000000' to FLOAT
+Error converting column d:int_col: 'err9' to INT
+Error converting column d:smallint_col: 'err9' to SMALLINT
+Error converting column d:tinyint_col: 'err9' to TINYINT
 hbase table: functional_hbase.hbasealltypeserrornonulls
 row key: 9
 ---- FILEERRORS

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-rcfile-scan-node-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-rcfile-scan-node-errors.test
 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-rcfile-scan-node-errors.test
index 4e06af9..17ab362 100644
--- 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-rcfile-scan-node-errors.test
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-rcfile-scan-node-errors.test
@@ -3,113 +3,158 @@
 select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
 double_col, date_string_col, string_col from alltypeserror
 ---- ERRORS
-Error converting column: 1 TO BOOL
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 1,errfalse,,1,1,10,1.000000,10.100000,01/01/09,1
-Error converting column: 2 TO TINYINT
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 2,true,err2,,2,20,2.000000,20.200000,01/01/09,2
-Error converting column: 3 TO SMALLINT
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 3,false,3,err3,,30,3.000000,30.300000,01/01/09,3
-Error converting column: 4 TO INT
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 4,true,4,4,err4,,4.000000,40.400000,01/01/09,4
-Error converting column: 5 TO BIGINT
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 5,false,5,5,5,err50,,50.500000,01/01/09,5
-Error converting column: 6 TO FLOAT
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 6,true,6,6,6,60,err6.000000,,01/01/09,6
-Error converting column: 7 TO DOUBLE
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 7,,,7,7,70,7.000000,err70.700000,01/01/09,7
-Error converting column: 1 TO BOOL
-Error converting column: 2 TO TINYINT
-Error converting column: 3 TO SMALLINT
-Error converting column: 4 TO INT
-Error converting column: 5 TO BIGINT
-Error converting column: 6 TO FLOAT
-Error converting column: 7 TO DOUBLE
-file: alltypeserror_rc/year=2009/month=1/000001_0
-line: 9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9
-Error converting column: 6 TO FLOAT
-Error converting column: 7 TO DOUBLE
-file: alltypeserror_rc/year=2009/month=2/000002_0
-line: 13,false,3,3,,,xyz3.000000,xyz30.300000,02/01/09,3
-Error converting column: 2 TO TINYINT
-file: alltypeserror_rc/year=2009/month=2/000002_0
-line: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5
-Error converting column: 7 TO DOUBLE
-file: alltypeserror_rc/year=2009/month=2/000002_0
-line: 17,false,7,7,7,70,7.000000,xyz70.700000,02/01/09,7
-Error converting column: 3 TO SMALLINT
-file: alltypeserror_rc/year=2009/month=3/000000_0
-line: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3
-Error converting column: 4 TO INT
-file: alltypeserror_rc/year=2009/month=3/000000_0
-line: 25,false,5,5,abc5,50,5.000000,50.500000,03/01/09,5
-Error converting column: 2 TO TINYINT
-file: alltypeserror_rc/year=2009/month=3/000000_0
-line: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7
-Error converting column: 4 TO INT
-file: alltypeserror_rc/year=2009/month=3/000000_0
-line: 29,false,9,9,abc9,90,9.000000,90.900000,03/01/09,9
+Error converting column: 3 to SMALLINT
+file: __HDFS_FILENAME__
+Error converting column: 4 to INT
+file: __HDFS_FILENAME__
+Error converting column: 2 to TINYINT
+file: __HDFS_FILENAME__
+Error converting column: 4 to INT
+file: __HDFS_FILENAME__
+Error converting column: 1 to BOOLEAN
+Error converting column: 2 to TINYINT
+Error converting column: 3 to SMALLINT
+Error converting column: 4 to INT
+Error converting column: 5 to BIGINT
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 2 to TINYINT
+file: __HDFS_FILENAME__
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 1 to BOOLEAN
+file: __HDFS_FILENAME__
+Error converting column: 2 to TINYINT
+file: __HDFS_FILENAME__
+Error converting column: 3 to SMALLINT
+file: __HDFS_FILENAME__
+Error converting column: 4 to INT
+file: __HDFS_FILENAME__
+Error converting column: 5 to BIGINT
+file: __HDFS_FILENAME__
+Error converting column: 6 to FLOAT
+file: __HDFS_FILENAME__
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 1 to BOOLEAN
+Error converting column: 2 to TINYINT
+Error converting column: 3 to SMALLINT
+Error converting column: 4 to INT
+Error converting column: 5 to BIGINT
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+---- RESULTS
+0,NULL,NULL,0,0,0,0,0,'01/01/09','0'
+1,NULL,NULL,1,1,10,1,10.1,'01/01/09','1'
+10,NULL,NULL,NULL,0,0,0,0,'02/01/09','0'
+11,false,NULL,NULL,NULL,10,1,10.1,'02/01/09','1'
+12,true,2,NULL,NULL,NULL,2,20.2,'02/01/09','2'
+13,false,3,3,NULL,NULL,NULL,NULL,'02/01/09','3'
+14,true,4,4,4,40,NULL,NULL,'02/01/09','4'
+15,false,NULL,5,5,50,5,50.5,'02/01/09','5'
+16,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'02/01/09','6'
+17,false,7,7,7,70,7,NULL,'02/01/09','7'
+18,true,8,8,8,80,8,80.8,'02/01/09','8'
+19,false,9,9,9,90,9,90.90000000000001,'02/01/09','9'
+2,true,NULL,NULL,2,20,2,20.2,'01/01/09','2'
+20,true,0,0,0,0,0,0,'03/01/09','0'
+21,false,1,1,1,10,1,10.1,'03/01/09','1'
+22,true,2,2,2,20,2,20.2,'03/01/09','2'
+23,false,3,NULL,3,30,3,30.3,'03/01/09','3'
+24,true,4,4,4,40,4,40.4,'03/01/09','4'
+25,false,5,5,NULL,50,5,50.5,'03/01/09','5'
+26,true,6,6,6,60,6,60.6,'03/01/09','6'
+27,false,NULL,7,7,70,7,70.7,'03/01/09','7'
+28,true,8,8,8,80,8,80.8,'03/01/09','8'
+29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9'
+3,false,3,NULL,NULL,30,3,30.3,'01/01/09','3'
+30,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','10'
+4,true,4,4,NULL,NULL,4,40.4,'01/01/09','4'
+5,false,5,5,5,NULL,NULL,50.5,'01/01/09','5'
+6,true,6,6,6,60,NULL,NULL,'01/01/09','6'
+7,NULL,NULL,7,7,70,7,NULL,'01/01/09','7'
+8,false,NULL,NULL,8,80,8,80.8,'01/01/09','8'
+9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9'
+---- TYPES
+INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING
 ====
 ---- QUERY
 select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
 double_col, date_string_col, string_col from alltypeserrornonulls
+---- RESULTS
+0,true,0,0,0,0,0,0,'01/01/09','0'
+1,NULL,1,1,1,10,1,10.1,'01/01/09','1'
+10,true,0,0,0,0,0,0,'02/01/09','0'
+11,false,1,1,1,10,1,10.1,'02/01/09','1'
+12,true,2,2,2,20,2,20.2,'02/01/09','2'
+13,false,3,3,3,30,NULL,NULL,'02/01/09','3'
+14,true,4,4,4,40,4,40.4,'02/01/09','4'
+15,false,NULL,5,5,50,5,50.5,'02/01/09','5'
+16,true,6,6,6,60,6,60.6,'02/01/09','6'
+17,false,7,7,7,70,7,NULL,'02/01/09','7'
+18,true,8,8,8,80,8,80.8,'02/01/09','8'
+19,false,9,9,9,90,9,90.90000000000001,'02/01/09','9'
+2,true,NULL,2,2,20,2,20.2,'01/01/09','2'
+20,true,0,0,0,0,0,0,'03/01/09','0'
+21,false,1,1,1,10,1,10.1,'03/01/09','1'
+22,true,2,2,2,20,2,20.2,'03/01/09','2'
+23,false,3,NULL,3,30,3,30.3,'03/01/09','3'
+24,true,4,4,4,40,4,40.4,'03/01/09','4'
+25,false,5,5,NULL,50,5,50.5,'03/01/09','5'
+26,true,6,6,6,60,6,60.6,'03/01/09','6'
+27,false,NULL,7,7,70,7,70.7,'03/01/09','7'
+28,true,8,8,8,80,8,80.8,'03/01/09','8'
+29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9'
+3,false,3,NULL,3,30,3,30.3,'01/01/09','3'
+4,true,4,4,NULL,40,4,40.4,'01/01/09','4'
+5,false,5,5,5,NULL,5,50.5,'01/01/09','5'
+6,true,6,6,6,60,NULL,60.6,'01/01/09','6'
+7,false,7,7,7,70,7,NULL,'01/01/09','7'
+8,false,8,8,8,80,8,80.8,'01/01/09','8'
+9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9'
+---- TYPES
+INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING
 ---- ERRORS
-Error converting column: 1 TO BOOL
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 1,errfalse,1,1,1,10,1.000000,10.100000,01/01/09,1
-Error converting column: 2 TO TINYINT
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 2,true,err2,2,2,20,2.000000,20.200000,01/01/09,2
-Error converting column: 3 TO SMALLINT
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 3,false,3,err3,3,30,3.000000,30.300000,01/01/09,3
-Error converting column: 4 TO INT
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 4,true,4,4,err4,40,4.000000,40.400000,01/01/09,4
-Error converting column: 5 TO BIGINT
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 5,false,5,5,5,err50,5.000000,50.500000,01/01/09,5
-Error converting column: 6 TO FLOAT
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 6,true,6,6,6,60,err6.000000,60.600000,01/01/09,6
-Error converting column: 7 TO DOUBLE
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 7,false,7,7,7,70,7.000000,err70.700000,01/01/09,7
-Error converting column: 1 TO BOOL
-Error converting column: 2 TO TINYINT
-Error converting column: 3 TO SMALLINT
-Error converting column: 4 TO INT
-Error converting column: 5 TO BIGINT
-Error converting column: 6 TO FLOAT
-Error converting column: 7 TO DOUBLE
-file: alltypeserrornonulls_rc/year=2009/month=1/000001_0
-line: 9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9
-Error converting column: 6 TO FLOAT
-Error converting column: 7 TO DOUBLE
-file: alltypeserrornonulls_rc/year=2009/month=2/000002_0
-line: 13,false,3,3,3,30,xyz3.000000,xyz30.300000,02/01/09,3
-Error converting column: 2 TO TINYINT
-file: alltypeserrornonulls_rc/year=2009/month=2/000002_0
-line: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5
-Error converting column: 7 TO DOUBLE
-file: alltypeserrornonulls_rc/year=2009/month=2/000002_0
-line: 17,false,7,7,7,70,7.000000,xyz70.700000,02/01/09,7
-Error converting column: 3 TO SMALLINT
-file: alltypeserrornonulls_rc/year=2009/month=3/000000_0
-line: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3
-Error converting column: 4 TO INT
-file: alltypeserrornonulls_rc/year=2009/month=3/000000_0
-line: 25,false,5,5,abc5,50,5.000000,50.500000,03/01/09,5
-Error converting column: 2 TO TINYINT
-file: alltypeserrornonulls_rc/year=2009/month=3/000000_0
-line: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7
-Error converting column: 4 TO INT
-file: alltypeserrornonulls_rc/year=2009/month=3/000000_0
-line: 29,false,9,9,abc9,90,9.000000,90.900000,03/01/09,9
-====
\ No newline at end of file
+Error converting column: 3 to SMALLINT
+file: __HDFS_FILENAME__
+Error converting column: 4 to INT
+file: __HDFS_FILENAME__
+Error converting column: 2 to TINYINT
+file: __HDFS_FILENAME__
+Error converting column: 4 to INT
+file: __HDFS_FILENAME__
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 2 to TINYINT
+file: __HDFS_FILENAME__
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 1 to BOOLEAN
+file: __HDFS_FILENAME__
+Error converting column: 2 to TINYINT
+file: __HDFS_FILENAME__
+Error converting column: 3 to SMALLINT
+file: __HDFS_FILENAME__
+Error converting column: 4 to INT
+file: __HDFS_FILENAME__
+Error converting column: 5 to BIGINT
+file: __HDFS_FILENAME__
+Error converting column: 6 to FLOAT
+file: __HDFS_FILENAME__
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+Error converting column: 1 to BOOLEAN
+Error converting column: 2 to TINYINT
+Error converting column: 3 to SMALLINT
+Error converting column: 4 to INT
+Error converting column: 5 to BIGINT
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+file: __HDFS_FILENAME__
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
index fe1b087..2357f9a 100644
--- 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
@@ -8,33 +8,25 @@
 ## implemented).
 #select id, bool_col, tinyint_col, smallint_col from alltypeserror
 #---- ERRORS
-#Error converting column: 3 TO SMALLINT (Data is: abc3)
+#Error converting column: 3 to SMALLINT
 #file: hdfs://regex:.$
-#record: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3,2020-10-10 
60:10:10.123
-#Error converting column: 2 TO TINYINT (Data is: abc7)
+#Error converting column: 2 to TINYINT
 #file: hdfs://regex:.$
-#record: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7,2020-10-10 
10:10:10.123
-#Error converting column: 2 TO TINYINT (Data is: err30)
-#Error converting column: 3 TO SMALLINT (Data is: err30)
+#Error converting column: 2 to TINYINT
+#Error converting column: 3 to SMALLINT
 #file: hdfs://regex:.$
-#record: 
30,t\rue,err30,err30,err30,err300,err30..000000,err300.900000,01/01/10,10,0000-01-01
 00:00:00
-#Error converting column: 2 TO TINYINT (Data is: xyz5)
+#Error converting column: 2 to TINYINT
 #file: hdfs://regex:.$
-#record: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5,0009-01-01 00:00:00
-#Error converting column: 1 TO BOOLEAN (Data is: errfalse)
+#Error converting column: 1 to BOOLEAN
 #file: hdfs://regex:.$
-#record: 1,errfalse,,1,1,10,1.000000,10.100000,01/01/09,1,1999-10-10
-#Error converting column: 2 TO TINYINT (Data is: err2)
+#Error converting column: 2 to TINYINT
 #file: hdfs://regex:.$
-#record: 2,true,err2,,2,20,2.000000,20.200000,01/01/09,2,1999-10-10 90:10:10
-#Error converting column: 3 TO SMALLINT (Data is: err3)
+#Error converting column: 3 to SMALLINT
 #file: hdfs://regex:.$
-#record: 3,false,3,err3,,30,3.000000,30.300000,01/01/09,3,2002-14-10 00:00:00
-#Error converting column: 1 TO BOOLEAN (Data is: errtrue)
-#Error converting column: 2 TO TINYINT (Data is: err9)
-#Error converting column: 3 TO SMALLINT (Data is: err9)
+#Error converting column: 1 to BOOLEAN
+#Error converting column: 2 to TINYINT
+#Error converting column: 3 to SMALLINT
 #file: hdfs://regex:.$
-#record: 
9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9,0000-01-01 
00:00:00
 #
 #---- RESULTS
 #0,NULL,NULL,0
@@ -92,68 +84,49 @@ bigint
 ---- QUERY
 select * from alltypeserrornonulls
 ---- ERRORS
-
-Error converting column: 3 TO SMALLINT (Data is: abc3)
-file: hdfs://regex:.$
-record: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3,2012-03-22 
11:20:01.123
-Error converting column: 4 TO INT (Data is: abc5)
-Error converting column: 10 TO TIMESTAMP (Data is: 2012-Mar-22 11:20:01.123)
-file: hdfs://regex:.$
-record: 25,false,5,5,abc5,50,5.000000,50.500000,03/01/09,5,2012-Mar-22 
11:20:01.123
-Error converting column: 2 TO TINYINT (Data is: abc7)
-file: hdfs://regex:.$
-record: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7,2012-03-22 
11:20:01.123
-Error converting column: 10 TO TIMESTAMP (Data is: 11:20:01.123 2012-03-22 )
-file: hdfs://regex:.$
-record: 28,true,8,8,8,80,8.000000,80.800000,03/01/09,8,11:20:01.123 2012-03-22 
-Error converting column: 4 TO INT (Data is: abc9)
-file: hdfs://regex:.$
-record: 29,false,9,9,abc9,90,9.000000,90.900000,03/01/09,9,2012-03-22
-Error converting column: 6 TO FLOAT (Data is: xyz3.000000)
-Error converting column: 7 TO DOUBLE (Data is: xyz30.300000)
-file: hdfs://regex:.$
-record: 13,false,3,3,3,30,xyz3.000000,xyz30.300000,02/01/09,3,2012-03-22 
11:20:01.123
-Error converting column: 2 TO TINYINT (Data is: xyz5)
-file: hdfs://regex:.$
-record: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5,2012-03-22 
11:20:01.123
-Error converting column: 7 TO DOUBLE (Data is: xyz70.700000)
-file: hdfs://regex:.$
-record: 17,false,7,7,7,70,7.000000,xyz70.700000,02/01/09,7,2012-03-22 
11:20:01.123
-Error converting column: 10 TO TIMESTAMP (Data is: 123456)
-file: hdfs://regex:.$
-record: 0,true,0,0,0,0,0.000000,0.000000,01/01/09,0,123456
-Error converting column: 1 TO BOOLEAN (Data is: errfalse)
-Error converting column: 10 TO TIMESTAMP (Data is: 1990-00-01 10:10:10)
-file: hdfs://regex:.$
-record: 1,errfalse,1,1,1,10,1.000000,10.100000,01/01/09,1,1990-00-01 10:10:10
-Error converting column: 2 TO TINYINT (Data is: err2)
-file: hdfs://regex:.$
-record: 2,true,err2,2,2,20,2.000000,20.200000,01/01/09,2,2012-03-22 
11:20:01.123
-Error converting column: 3 TO SMALLINT (Data is: err3)
-file: hdfs://regex:.$
-record: 3,false,3,err3,3,30,3.000000,30.300000,01/01/09,3,2012-03-22 
11:20:01.123
-Error converting column: 4 TO INT (Data is: err4)
-file: hdfs://regex:.$
-record: 4,true,4,4,err4,40,4.000000,40.400000,01/01/09,4,2012-03-22 
11:20:01.123
-Error converting column: 5 TO BIGINT (Data is: err50)
-file: hdfs://regex:.$
-record: 5,false,5,5,5,err50,5.000000,50.500000,01/01/09,5,2012-03-22 
11:20:01.123
-Error converting column: 6 TO FLOAT (Data is: err6.000000)
-file: hdfs://regex:.$
-record: 6,true,6,6,6,60,err6.000000,60.600000,01/01/09,6,2012-03-22 
11:20:01.123
-Error converting column: 7 TO DOUBLE (Data is: err70.700000)
-file: hdfs://regex:.$
-record: 7,false,7,7,7,70,7.000000,err70.700000,01/01/09,7,2012-03-22 
11:20:01.123
-Error converting column: 1 TO BOOLEAN (Data is: errtrue)
-Error converting column: 2 TO TINYINT (Data is: err9)
-Error converting column: 3 TO SMALLINT (Data is: err9)
-Error converting column: 4 TO INT (Data is: err9)
-Error converting column: 5 TO BIGINT (Data is: err90)
-Error converting column: 6 TO FLOAT (Data is: err9.000000)
-Error converting column: 7 TO DOUBLE (Data is: err90.900000)
-file: hdfs://regex:.$
-record: 
9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9,2012-03-22 
11:20:01.123
-
+Error converting column: 3 to SMALLINT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 4 to INT
+Error converting column: 10 to TIMESTAMP
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 2 to TINYINT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 10 to TIMESTAMP
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 4 to INT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 2 to TINYINT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 7 to DOUBLE
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 10 to TIMESTAMP
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 1 to BOOLEAN
+Error converting column: 10 to TIMESTAMP
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 2 to TINYINT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 3 to SMALLINT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 4 to INT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 5 to BIGINT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 6 to FLOAT
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 7 to DOUBLE
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 1 to BOOLEAN
+Error converting column: 2 to TINYINT
+Error converting column: 3 to SMALLINT
+Error converting column: 4 to INT
+Error converting column: 5 to BIGINT
+Error converting column: 6 to FLOAT
+Error converting column: 7 to DOUBLE
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
 ---- RESULTS
 0,true,0,0,0,0,0,0,'01/01/09','0',NULL,2009,1
 1,NULL,1,1,1,10,1,10.1,'01/01/09','1',NULL,2009,1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-sequence-scan-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-sequence-scan-errors.test
 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-sequence-scan-errors.test
index abc578d..e65c06d 100644
--- 
a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-sequence-scan-errors.test
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-sequence-scan-errors.test
@@ -5,9 +5,9 @@ select count(*) from functional_seq_snap.bad_seq_snap
 Bad synchronization marker
   Expected: '6e 91 6 ec be 78 a0 ac 72 10 7e 41 b4 da 93 3c '
   Actual:   '6e 91 6 78 78 78 a0 ac 72 10 7e 41 b4 da 93 3c '
-Problem parsing file: hdfs://regex:.$
+Problem parsing file __HDFS_FILENAME__ at 899514 (1 of 5 similar)
 Decompressor: invalid compressed length.  Data is likely corrupt. (1 of 3 
similar)
-Tried to read 896782 bytes but could only read 896777 bytes. This may indicate 
data file corruption. (file: hdfs://regex:.$
+Tried to read 896782 bytes but could only read 896777 bytes. This may indicate 
data file corruption. (file __HDFS_FILENAME__, byte offset: 2691508)
 
 ---- RESULTS
 9434

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/QueryTest/parquet-continue-on-error.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/parquet-continue-on-error.test
 
b/testdata/workloads/functional-query/queries/QueryTest/parquet-continue-on-error.test
index 1a16d75..2952706 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/parquet-continue-on-error.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/parquet-continue-on-error.test
@@ -40,7 +40,7 @@ bigint,bigint
 29,10
 30,10
 ---- ERRORS
-Column metadata states there are 50 values, but read 100 values from column 
element. file: hdfs://regex:.$
+Column metadata states there are 50 values, but read 100 values from column 
element. file=__HDFS_FILENAME__ (1 of 2 similar)
 ====
 ---- QUERY
 # Same as above but only selecting a single scalar column.
@@ -81,7 +81,7 @@ bigint
 29
 30
 ---- ERRORS
-Column metadata states there are 11 values, but read 10 values from column id. 
file: hdfs://regex:.$
+Column metadata states there are 11 values, but read 10 values from column id. 
file=__HDFS_FILENAME__
 ====
 ---- QUERY
 SELECT * from bad_parquet_strings_negative_len
@@ -89,8 +89,8 @@ SELECT * from bad_parquet_strings_negative_len
 STRING
 ---- RESULTS
 ---- ERRORS
-row_regex: .*File '.*/plain-encoded-negative-len.parq' is corrupt: error 
decoding value of type STRING at offset 58.*
-row_regex: .*File '.*/dict-encoded-negative-len.parq' is corrupt: error 
reading dictionary for data of type STRING: could not decode dictionary.*
+File 
'$NAMENODE/test-warehouse/bad_parquet_strings_negative_len_parquet/plain-encoded-negative-len.parq'
 is corrupt: error decoding value of type STRING at offset 58
+File 
'$NAMENODE/test-warehouse/bad_parquet_strings_negative_len_parquet/dict-encoded-negative-len.parq'
 is corrupt: error reading dictionary for data of type STRING: could not decode 
dictionary
 ====
 ---- QUERY
 SELECT * from bad_parquet_strings_out_of_bounds
@@ -98,6 +98,6 @@ SELECT * from bad_parquet_strings_out_of_bounds
 STRING
 ---- RESULTS
 ---- ERRORS
-row_regex: .*File '.*/plain-encoded-out-of-bounds.parq' is corrupt: error 
decoding value of type STRING at offset 58.*
-row_regex: .*File '.*/dict-encoded-out-of-bounds.parq' is corrupt: error 
reading dictionary for data of type STRING: could not decode dictionary.*
+File 
'$NAMENODE/test-warehouse/bad_parquet_strings_out_of_bounds_parquet/plain-encoded-out-of-bounds.parq'
 is corrupt: error decoding value of type STRING at offset 58
+File 
'$NAMENODE/test-warehouse/bad_parquet_strings_out_of_bounds_parquet/dict-encoded-out-of-bounds.parq'
 is corrupt: error reading dictionary for data of type STRING: could not decode 
dictionary
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/QueryTest/strict-mode-abort.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/strict-mode-abort.test 
b/testdata/workloads/functional-query/queries/QueryTest/strict-mode-abort.test
index 8dccc07..808346f 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/strict-mode-abort.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/strict-mode-abort.test
@@ -2,30 +2,30 @@
 ---- QUERY
 select tinyint_col from overflow
 ---- CATCH
-Error converting column: 0 TO TINYINT (Data is: 1000)
+Error converting column: 0 to TINYINT
 ====
 ---- QUERY
 select smallint_col from overflow
 ---- CATCH
-Error converting column: 1 TO SMALLINT (Data is: 100000)
+Error converting column: 1 to SMALLINT
 ====
 ---- QUERY
 select int_col from overflow
 ---- CATCH
-Error converting column: 2 TO INT (Data is: 10000000000000000)
+Error converting column: 2 to INT
 ====
 ---- QUERY
 select bigint_col from overflow
 ---- CATCH
-Error converting column: 3 TO BIGINT (Data is: 10000000000000000000)
+Error converting column: 3 to BIGINT
 ====
 ---- QUERY
 select float_col from overflow
 ---- CATCH
-Error converting column: 4 TO FLOAT (Data is: 1e1000000)
+Error converting column: 4 to FLOAT
 ====
 ---- QUERY
 select double_col from overflow
 ---- CATCH
-Error converting column: 5 TO DOUBLE (Data is: 1e10000)
+Error converting column: 5 to DOUBLE
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/testdata/workloads/functional-query/queries/QueryTest/strict-mode.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/strict-mode.test 
b/testdata/workloads/functional-query/queries/QueryTest/strict-mode.test
index 2d85a74..b70c272 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/strict-mode.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/strict-mode.test
@@ -2,23 +2,20 @@
 ---- QUERY
 select * from overflow
 ---- ERRORS
-Error converting column: 0 TO TINYINT (Data is: 1000)
-Error converting column: 1 TO SMALLINT (Data is: 100000)
-Error converting column: 2 TO INT (Data is: 10000000000000000)
-Error converting column: 3 TO BIGINT (Data is: 10000000000000000000)
-Error converting column: 4 TO FLOAT (Data is: 1e1000000)
-Error converting column: 5 TO DOUBLE (Data is: 1e10000)
-file: hdfs://regex:.$
-record: 1000,100000,10000000000000000,10000000000000000000,1e1000000,1e10000
-Error converting column: 0 TO TINYINT (Data is: -1000)
-Error converting column: 1 TO SMALLINT (Data is: -100000)
-Error converting column: 2 TO INT (Data is: -10000000000000000)
-Error converting column: 3 TO BIGINT (Data is: -10000000000000000000)
-Error converting column: 4 TO FLOAT (Data is: -1e1000000)
-Error converting column: 5 TO DOUBLE (Data is: -1e10000)
-file: hdfs://regex:.$
-record: 
-1000,-100000,-10000000000000000,-10000000000000000000,-1e1000000,-1e10000
-
+Error converting column: 0 to TINYINT
+Error converting column: 1 to SMALLINT
+Error converting column: 2 to INT
+Error converting column: 3 to BIGINT
+Error converting column: 4 to FLOAT
+Error converting column: 5 to DOUBLE
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
+Error converting column: 0 to TINYINT
+Error converting column: 1 to SMALLINT
+Error converting column: 2 to INT
+Error converting column: 3 to BIGINT
+Error converting column: 4 to FLOAT
+Error converting column: 5 to DOUBLE
+row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
 ---- RESULTS
 1,2,3,4,5.5,6.6
 NULL,NULL,NULL,NULL,NULL,NULL

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/tests/common/impala_test_suite.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_test_suite.py 
b/tests/common/impala_test_suite.py
index 36e7442..2c0f3c6 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -48,13 +48,17 @@ from tests.common.test_vector import TestDimension
 from tests.performance.query import Query
 from tests.performance.query_exec_functions import execute_using_jdbc
 from tests.performance.query_executor import JdbcQueryExecConfig
-from tests.util.filesystem_utils import IS_S3, S3_BUCKET_NAME
-from tests.util.hdfs_util import HdfsConfig, get_hdfs_client, 
get_hdfs_client_from_conf
+from tests.util.filesystem_utils import IS_S3, S3_BUCKET_NAME, 
FILESYSTEM_PREFIX
+from tests.util.hdfs_util import (
+  HdfsConfig,
+  get_hdfs_client,
+  get_hdfs_client_from_conf,
+  NAMENODE)
 from tests.util.s3_util import S3Client
 from tests.util.test_file_parser import (
-    QueryTestSectionReader,
-    parse_query_test_file,
-    write_test_file)
+  QueryTestSectionReader,
+  parse_query_test_file,
+  write_test_file)
 from tests.util.thrift_util import create_transport
 
 # Imports required for Hive Metastore Client
@@ -72,19 +76,8 @@ IMPALAD_HS2_HOST_PORT =\
 HIVE_HS2_HOST_PORT = pytest.config.option.hive_server2
 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
 HDFS_CONF = HdfsConfig(pytest.config.option.minicluster_xml_conf)
-CORE_CONF = HdfsConfig(os.path.join(os.environ['HADOOP_CONF_DIR'], 
"core-site.xml"))
 TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs"
 IMPALA_HOME = os.getenv("IMPALA_HOME")
-# FILESYSTEM_PREFIX is the path prefix that should be used in queries.  When 
running
-# the tests against the default filesystem (fs.defaultFS), FILESYSTEM_PREFIX 
is the
-# empty string.  When running against a secondary filesystem, it will be the 
scheme
-# and authority porotion of the qualified path.
-FILESYSTEM_PREFIX = os.getenv("FILESYSTEM_PREFIX")
-# NAMENODE is the path prefix that should be used in results, since paths that 
come
-# out of Impala have been qualified.  When running against the default 
filesystem,
-# this will be the same as fs.defaultFS.  When running against a secondary 
filesystem,
-# this will be the same as FILESYSTEM_PREFIX.
-NAMENODE = FILESYSTEM_PREFIX or CORE_CONF.get('fs.defaultFS')
 # Match any SET statement. Assume that query options' names
 # only contain alphabets and underscores.
 SET_PATTERN = re.compile(r'\s*set\s*([a-zA-Z_]+)=*', re.I)
@@ -220,6 +213,27 @@ class ImpalaTestSuite(BaseTestSuite):
       if expected_str in actual_str: return
     assert False, 'Unexpected exception string: %s' % actual_str
 
+  def __verify_results_and_errors(self, vector, test_section, result, use_db):
+    """Verifies that both results and error sections are as expected. Rewrites 
both
+      by replacing $NAMENODE, $DATABASE and $IMPALA_HOME with their actual 
values, and
+      optionally rewriting filenames with __HDFS_FILENAME__, to ensure that 
expected and
+      actual values are easily compared.
+    """
+    replace_filenames_with_placeholder = True
+    for section_name in ('RESULTS', 'ERRORS'):
+      if section_name in test_section:
+        if "$NAMENODE" in test_section[section_name]:
+          replace_filenames_with_placeholder = False
+        test_section[section_name] = test_section[section_name] \
+                                     .replace('$NAMENODE', NAMENODE) \
+                                     .replace('$IMPALA_HOME', IMPALA_HOME)
+      if use_db:
+        test_section['RESULTS'] = test_section['RESULTS'].replace('$DATABASE', 
use_db)
+    verify_raw_results(test_section, result, 
vector.get_value('table_format').file_format,
+                       pytest.config.option.update_results,
+                       replace_filenames_with_placeholder)
+
+
   def run_test_case(self, test_file_name, vector, use_db=None, 
multiple_impalad=False,
       encoding=None, wait_secs_between_stmts=None):
     """
@@ -325,14 +339,11 @@ class ImpalaTestSuite(BaseTestSuite):
       if encoding: result.data = [row.decode(encoding) for row in result.data]
       # Replace $NAMENODE in the expected results with the actual namenode URI.
       if 'RESULTS' in test_section:
-        test_section['RESULTS'] = test_section['RESULTS'] \
-            .replace('$NAMENODE', NAMENODE) \
-            .replace('$IMPALA_HOME', IMPALA_HOME)
-        if use_db:
-          test_section['RESULTS'] = 
test_section['RESULTS'].replace('$DATABASE', use_db)
-        verify_raw_results(test_section, result,
-                         vector.get_value('table_format').file_format,
-                         pytest.config.option.update_results)
+        self.__verify_results_and_errors(vector, test_section, result, use_db)
+      else:
+        # TODO: Can't validate errors without expected results for now.
+        assert 'ERRORS' not in test_section,\
+          "'ERRORS' sections must have accompanying 'RESULTS' sections"
       # If --update_results, then replace references to the namenode URI with 
$NAMENODE.
       if pytest.config.option.update_results and 'RESULTS' in test_section:
         test_section['RESULTS'] = test_section['RESULTS'] \

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/tests/common/test_result_verifier.py
----------------------------------------------------------------------
diff --git a/tests/common/test_result_verifier.py 
b/tests/common/test_result_verifier.py
index 8130b4d..1410b92 100644
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -24,6 +24,7 @@ import re
 from functools import wraps
 from tests.util.test_file_parser import (join_section_lines, remove_comments,
     split_section_lines)
+from tests.util.hdfs_util import NAMENODE
 
 logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s')
 LOG = logging.getLogger('test_result_verfier')
@@ -272,18 +273,20 @@ def verify_errors(expected_errors, actual_errors):
       ['DUMMY_LABEL'], order_matters=False)
   VERIFIER_MAP['VERIFY_IS_EQUAL'](expected, actual)
 
-def apply_error_match_filter(error_list):
+def apply_error_match_filter(error_list, replace_filenames=True):
   """Applies a filter to each entry in the given list of errors to ensure 
result matching
   is stable."""
-  updated_errors = list()
-  for row in error_list:
-    # The actual file path isn't very interesting and can vary. Filter it out.
-    row = re.sub(r'^file:.+$|file=.+$|file hdfs:.+$', 'file: hdfs://regex:.$', 
row)
+  file_regex = r'%s.*/[\w\.\-]+' % NAMENODE
+  def replace_fn(row):
+    # The actual file path isn't very interesting and can vary. Change it to a 
canonical
+    # string that allows result rows to sort in the same order as expected 
rows.
+    if replace_filenames: row = re.sub(file_regex, '__HDFS_FILENAME__', row)
     # The "Backend <id>" can also vary, so filter it out as well.
-    updated_errors.append(re.sub(r'Backend \d+:', '', row))
-  return updated_errors
+    return re.sub(r'Backend \d+:', '', row)
+  return [replace_fn(row) for row in error_list]
 
-def verify_raw_results(test_section, exec_result, file_format, 
update_section=False):
+def verify_raw_results(test_section, exec_result, file_format, 
update_section=False,
+                       replace_filenames=True):
   """
   Accepts a raw exec_result object and verifies it matches the expected 
results.
   If update_section is true, updates test_section with the actual results
@@ -294,16 +297,16 @@ def verify_raw_results(test_section, exec_result, 
file_format, update_section=Fa
   result format used in the tests.
   """
   expected_results = None
-
   if 'RESULTS' in test_section:
     expected_results = remove_comments(test_section['RESULTS'])
   else:
+    assert 'ERRORS' not in test_section, "'ERRORS' section must have 
accompanying 'RESULTS' section"
     LOG.info("No results found. Skipping verification");
     return
-
   if 'ERRORS' in test_section:
     expected_errors = 
split_section_lines(remove_comments(test_section['ERRORS']))
-    actual_errors = apply_error_match_filter(exec_result.log.split('\n'))
+    actual_errors = apply_error_match_filter(exec_result.log.split('\n'),
+                                             replace_filenames)
     try:
       verify_errors(expected_errors, actual_errors)
     except AssertionError:
@@ -476,4 +479,3 @@ def verify_runtime_profile(expected, actual):
   assert len(unmatched_lines) == 0, ("Did not find matches for lines in 
runtime profile:"
       "\nEXPECTED LINES:\n%s\n\nACTUAL PROFILE:\n%s" % 
('\n'.join(unmatched_lines),
         actual))
-

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/tests/util/filesystem_utils.py
----------------------------------------------------------------------
diff --git a/tests/util/filesystem_utils.py b/tests/util/filesystem_utils.py
index 1adc055..d435720 100644
--- a/tests/util/filesystem_utils.py
+++ b/tests/util/filesystem_utils.py
@@ -18,6 +18,10 @@
 # Utilities for supporting different filesystems.
 import os
 
+# FILESYSTEM_PREFIX is the path prefix that should be used in queries.  When 
running
+# the tests against the default filesystem (fs.defaultFS), FILESYSTEM_PREFIX 
is the
+# empty string.  When running against a secondary filesystem, it will be the 
scheme
+# and authority portion of the qualified path.
 FILESYSTEM_PREFIX = os.getenv("FILESYSTEM_PREFIX") or str()
 SECONDARY_FILESYSTEM = os.getenv("SECONDARY_FILESYSTEM") or str()
 FILESYSTEM = os.getenv("TARGET_FILESYSTEM")

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34b5f1c4/tests/util/hdfs_util.py
----------------------------------------------------------------------
diff --git a/tests/util/hdfs_util.py b/tests/util/hdfs_util.py
index 1c1080f..3c3a45e 100644
--- a/tests/util/hdfs_util.py
+++ b/tests/util/hdfs_util.py
@@ -26,6 +26,28 @@ from pywebhdfs.webhdfs import PyWebHdfsClient, errors, 
_raise_pywebhdfs_exceptio
 from xml.etree.ElementTree import parse
 
 from tests.util.filesystem_base import BaseFilesystem
+from tests.util.filesystem_utils import FILESYSTEM_PREFIX
+
+class HdfsConfig(object):
+  """Reads an XML configuration file (produced by a mini-cluster) into a 
dictionary
+  accessible via get()"""
+  def __init__(self, *filename):
+    self.conf = {}
+    for arg in filename:
+      tree = parse(arg)
+      for property in tree.getroot().getiterator('property'):
+        self.conf[property.find('name').text] = property.find('value').text
+
+  def get(self, key):
+    return self.conf.get(key)
+
+# Configuration object for the configuration that the minicluster will use.
+CORE_CONF = HdfsConfig(join_path(environ['HADOOP_CONF_DIR'], "core-site.xml"))
+# NAMENODE is the path prefix that should be used in results, since paths that 
come
+# out of Impala have been qualified.  When running against the default 
filesystem,
+# this will be the same as fs.defaultFS.  When running against a secondary 
filesystem,
+# this will be the same as FILESYSTEM_PREFIX.
+NAMENODE = FILESYSTEM_PREFIX or CORE_CONF.get('fs.defaultFS')
 
 class PyWebHdfsClientWithChmod(PyWebHdfsClient, BaseFilesystem):
   def chmod(self, path, permission):
@@ -118,19 +140,6 @@ class PyWebHdfsClientWithChmod(PyWebHdfsClient, 
BaseFilesystem):
       return False
     return True
 
-class HdfsConfig(object):
-  """Reads an XML configuration file (produced by a mini-cluster) into a 
dictionary
-  accessible via get()"""
-  def __init__(self, *filename):
-    self.conf = {}
-    for arg in filename:
-      tree = parse(arg)
-      for property in tree.getroot().getiterator('property'):
-        self.conf[property.find('name').text] = property.find('value').text
-
-  def get(self, key):
-    return self.conf.get(key)
-
 def get_hdfs_client_from_conf(conf):
   """Returns a new HTTP client for an HDFS cluster using an HdfsConfig 
object"""
   hostport = conf.get('dfs.namenode.http-address')

[3/3] incubator-impala git commit: IMPALA-(3895, 3859): Don't log file data on parse errors

Reply via email to