Repository: incubator-impala
Updated Branches:
  refs/heads/master ec3a1c786 -> 01287a3ba


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/README
----------------------------------------------------------------------
diff --git a/testdata/bad_avro_snap/README b/testdata/bad_avro_snap/README
new file mode 100644
index 0000000..a88d0ad
--- /dev/null
+++ b/testdata/bad_avro_snap/README
@@ -0,0 +1,19 @@
+These Avro files were created by modifying Impala's HdfsAvroTableWriter.
+
+String Data
+-----------
+These files' schemas have a single nullable string column 's'.
+
+negative_string_len.avro: contains two values, but the second value has a 
negative length.
+
+invalid_union.avro: contains three values, all of which have an invalid union 
value (4,
+which decodes to 2). Also has a single extra byte at the end (0x0a), which 
will be
+interpreted as a malformed data block.
+
+truncated_string.avro: contains one value, which is missing the last byte.
+
+Float Data
+----------
+These files' schemas have a single nullable float column 'c1'.
+
+truncated_float.avro: contains two float values. The second is missing the 
last byte.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/invalid_union.avro
----------------------------------------------------------------------
diff --git a/testdata/bad_avro_snap/invalid_union.avro 
b/testdata/bad_avro_snap/invalid_union.avro
new file mode 100644
index 0000000..f8f2ed0
Binary files /dev/null and b/testdata/bad_avro_snap/invalid_union.avro differ

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/negative_string_len.avro
----------------------------------------------------------------------
diff --git a/testdata/bad_avro_snap/negative_string_len.avro 
b/testdata/bad_avro_snap/negative_string_len.avro
new file mode 100644
index 0000000..4a306c1
Binary files /dev/null and b/testdata/bad_avro_snap/negative_string_len.avro 
differ

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/truncated_float.avro
----------------------------------------------------------------------
diff --git a/testdata/bad_avro_snap/truncated_float.avro 
b/testdata/bad_avro_snap/truncated_float.avro
new file mode 100644
index 0000000..7d76543
Binary files /dev/null and b/testdata/bad_avro_snap/truncated_float.avro differ

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/truncated_string.avro
----------------------------------------------------------------------
diff --git a/testdata/bad_avro_snap/truncated_string.avro 
b/testdata/bad_avro_snap/truncated_string.avro
new file mode 100644
index 0000000..f2c38ed
Binary files /dev/null and b/testdata/bad_avro_snap/truncated_string.avro differ

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/datasets/functional/functional_schema_template.sql
----------------------------------------------------------------------
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index 894fa8b..171216f 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -1334,6 +1334,26 @@ field STRING
 LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_seq_snap/bad_file' 
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 ====
 ---- DATASET
+functional
+---- BASE_TABLE_NAME
+bad_avro_snap_strings
+---- COLUMNS
+s STRING
+---- DEPENDENT_LOAD
+LOAD DATA LOCAL INPATH 
'${{env:IMPALA_HOME}}/testdata/bad_avro_snap/negative_string_len.avro' 
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
+LOAD DATA LOCAL INPATH 
'${{env:IMPALA_HOME}}/testdata/bad_avro_snap/invalid_union.avro' INTO TABLE 
{db_name}{db_suffix}.{table_name};
+LOAD DATA LOCAL INPATH 
'${{env:IMPALA_HOME}}/testdata/bad_avro_snap/truncated_string.avro' INTO TABLE 
{db_name}{db_suffix}.{table_name};
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+bad_avro_snap_floats
+---- COLUMNS
+c1 FLOAT
+---- DEPENDENT_LOAD
+LOAD DATA LOCAL INPATH 
'${{env:IMPALA_HOME}}/testdata/bad_avro_snap/truncated_float.avro' OVERWRITE 
INTO TABLE {db_name}{db_suffix}.{table_name};
+====
+---- DATASET
 -- IMPALA-694: uses data file produced by parquet-mr version 1.2.5-cdh4.5.0
 -- (can't use LOAD DATA LOCAL with Impala so copied in create-load-data.sh)
 functional

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/datasets/functional/schema_constraints.csv
----------------------------------------------------------------------
diff --git a/testdata/datasets/functional/schema_constraints.csv 
b/testdata/datasets/functional/schema_constraints.csv
index d95f3c4..9251308 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -35,6 +35,8 @@ table_name:old_rcfile_table, constraint:restrict_to, 
table_format:rc/none/none
 table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
 table_name:bad_text_gzip, constraint:restrict_to, table_format:text/gzip/block
 table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
+table_name:bad_avro_snap_strings, constraint:restrict_to, 
table_format:avro/snap/block
+table_name:bad_avro_snap_floats, constraint:restrict_to, 
table_format:avro/snap/block
 table_name:bad_parquet, constraint:restrict_to, table_format:parquet/none/none
 table_name:bad_magic_number, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:bad_metadata_len, constraint:restrict_to, 
table_format:parquet/none/none

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test 
b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
new file mode 100644
index 0000000..87d2930
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test
@@ -0,0 +1,24 @@
+====
+---- QUERY
+# Read from the corrupt files. We may get partial results.
+select * from bad_avro_snap_strings
+---- RESULTS
+---- TYPES
+string
+---- ERRORS
+row_regex: .*Problem parsing file.*
+row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/truncated_string.avro' 
is corrupt: truncated data block at offset 155.*
+row_regex: .*File 
'.*/bad_avro_snap_strings_avro_snap/negative_string_len.avro' is corrupt: 
invalid length -7 at offset 164.*
+row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/invalid_union.avro' is 
corrupt: invalid union value 4 at offset 174.*
+row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/invalid_union.avro' is 
corrupt: invalid encoded integer at offset 191.*
+====
+---- QUERY
+# Read from the corrupt files. We may get partial results.
+select * from bad_avro_snap_floats
+---- RESULTS
+---- TYPES
+float
+---- ERRORS
+row_regex: .*Problem parsing file.*
+row_regex: .*File '.*/bad_avro_snap_floats_avro_snap/truncated_float.avro' is 
corrupt: truncated data block at offset 159.*
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/tests/common/test_result_verifier.py
----------------------------------------------------------------------
diff --git a/tests/common/test_result_verifier.py 
b/tests/common/test_result_verifier.py
index 24554eb..9e0eca8 100644
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -230,9 +230,16 @@ def verify_results(expected_results, actual_results, 
order_matters):
 
 def verify_errors(expected_errors, actual_errors):
   """Convert the errors to our test format, treating them as a single string 
column row
-  set. This requires enclosing the data in single quotes."""
-  expected = QueryTestResult(["'%s'" % l for l in expected_errors if l], 
['STRING'],
-      ['DUMMY_LABEL'], order_matters=False)
+  set if not a row_regex. This requires enclosing the data in single quotes."""
+  converted_expected_errors = []
+  for expected_error in expected_errors:
+    if not expected_error: continue
+    if ROW_REGEX_PREFIX.match(expected_error):
+      converted_expected_errors.append(expected_error)
+    else:
+      converted_expected_errors.append("'%s'" % expected_error)
+  expected = QueryTestResult(converted_expected_errors, ['STRING'], 
['DUMMY_LABEL'],
+      order_matters=False)
   actual = QueryTestResult(["'%s'" % l for l in actual_errors if l], 
['STRING'],
       ['DUMMY_LABEL'], order_matters=False)
   VERIFIER_MAP['VERIFY_IS_EQUAL'](expected, actual)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/tests/data_errors/test_data_errors.py
----------------------------------------------------------------------
diff --git a/tests/data_errors/test_data_errors.py 
b/tests/data_errors/test_data_errors.py
index 809a911..e7897e4 100644
--- a/tests/data_errors/test_data_errors.py
+++ b/tests/data_errors/test_data_errors.py
@@ -75,6 +75,18 @@ class TestHdfsRcFileScanNodeErrors(TestHdfsScanNodeErrors):
     self.run_test_case('DataErrorsTest/hdfs-rcfile-scan-node-errors', vector)
 
 
+class TestAvroErrors(TestDataErrors):
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestAvroErrors, cls).add_test_dimensions()
+    cls.TestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format == 'avro' and
+        v.get_value('table_format').compression_codec == 'snap')
+
+  def test_avro_errors(self, vector):
+    vector.get_value('exec_option')['abort_on_error'] = 0
+    self.run_test_case('DataErrorsTest/avro-errors', vector)
+
 class TestHBaseDataErrors(TestDataErrors):
   @classmethod
   def add_test_dimensions(cls):

Reply via email to