Repository: incubator-impala Updated Branches: refs/heads/master ec3a1c786 -> 01287a3ba
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/README ---------------------------------------------------------------------- diff --git a/testdata/bad_avro_snap/README b/testdata/bad_avro_snap/README new file mode 100644 index 0000000..a88d0ad --- /dev/null +++ b/testdata/bad_avro_snap/README @@ -0,0 +1,19 @@ +These Avro files were created by modifying Impala's HdfsAvroTableWriter. + +String Data +----------- +These files' schemas have a single nullable string column 's'. + +negative_string_len.avro: contains two values, but the second value has a negative length. + +invalid_union.avro: contains three values, all of which have an invalid union value (4, +which decodes to 2). Also has a single extra byte at the end (0x0a), which will be +interpreted as a malformed data block. + +truncated_string.avro: contains one value, which is missing the last byte. + +Float Data +---------- +These files' schemas have a single nullable float column 'c1'. + +truncated_float.avro: contains two float values. The second is missing the last byte. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/invalid_union.avro ---------------------------------------------------------------------- diff --git a/testdata/bad_avro_snap/invalid_union.avro b/testdata/bad_avro_snap/invalid_union.avro new file mode 100644 index 0000000..f8f2ed0 Binary files /dev/null and b/testdata/bad_avro_snap/invalid_union.avro differ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/negative_string_len.avro ---------------------------------------------------------------------- diff --git a/testdata/bad_avro_snap/negative_string_len.avro b/testdata/bad_avro_snap/negative_string_len.avro new file mode 100644 index 0000000..4a306c1 Binary files /dev/null and b/testdata/bad_avro_snap/negative_string_len.avro differ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/truncated_float.avro ---------------------------------------------------------------------- diff --git a/testdata/bad_avro_snap/truncated_float.avro b/testdata/bad_avro_snap/truncated_float.avro new file mode 100644 index 0000000..7d76543 Binary files /dev/null and b/testdata/bad_avro_snap/truncated_float.avro differ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/bad_avro_snap/truncated_string.avro ---------------------------------------------------------------------- diff --git a/testdata/bad_avro_snap/truncated_string.avro b/testdata/bad_avro_snap/truncated_string.avro new file mode 100644 index 0000000..f2c38ed Binary files /dev/null and b/testdata/bad_avro_snap/truncated_string.avro differ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/datasets/functional/functional_schema_template.sql ---------------------------------------------------------------------- diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 894fa8b..171216f 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -1334,6 +1334,26 @@ field STRING LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_seq_snap/bad_file' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name}; ==== ---- DATASET +functional +---- BASE_TABLE_NAME +bad_avro_snap_strings +---- COLUMNS +s STRING +---- DEPENDENT_LOAD +LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_avro_snap/negative_string_len.avro' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name}; +LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_avro_snap/invalid_union.avro' INTO TABLE {db_name}{db_suffix}.{table_name}; +LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_avro_snap/truncated_string.avro' INTO TABLE {db_name}{db_suffix}.{table_name}; +==== +---- DATASET +functional +---- BASE_TABLE_NAME +bad_avro_snap_floats +---- COLUMNS +c1 FLOAT +---- DEPENDENT_LOAD +LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_avro_snap/truncated_float.avro' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name}; +==== +---- DATASET -- IMPALA-694: uses data file produced by parquet-mr version 1.2.5-cdh4.5.0 -- (can't use LOAD DATA LOCAL with Impala so copied in create-load-data.sh) functional http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/datasets/functional/schema_constraints.csv ---------------------------------------------------------------------- diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index d95f3c4..9251308 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -35,6 +35,8 @@ table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block table_name:bad_text_gzip, constraint:restrict_to, table_format:text/gzip/block table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block +table_name:bad_avro_snap_strings, constraint:restrict_to, table_format:avro/snap/block +table_name:bad_avro_snap_floats, constraint:restrict_to, table_format:avro/snap/block table_name:bad_parquet, constraint:restrict_to, table_format:parquet/none/none table_name:bad_magic_number, constraint:restrict_to, table_format:parquet/none/none table_name:bad_metadata_len, constraint:restrict_to, table_format:parquet/none/none http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test new file mode 100644 index 0000000..87d2930 --- /dev/null +++ b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test @@ -0,0 +1,24 @@ +==== +---- QUERY +# Read from the corrupt files. We may get partial results. +select * from bad_avro_snap_strings +---- RESULTS +---- TYPES +string +---- ERRORS +row_regex: .*Problem parsing file.* +row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/truncated_string.avro' is corrupt: truncated data block at offset 155.* +row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/negative_string_len.avro' is corrupt: invalid length -7 at offset 164.* +row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/invalid_union.avro' is corrupt: invalid union value 4 at offset 174.* +row_regex: .*File '.*/bad_avro_snap_strings_avro_snap/invalid_union.avro' is corrupt: invalid encoded integer at offset 191.* +==== +---- QUERY +# Read from the corrupt files. We may get partial results. +select * from bad_avro_snap_floats +---- RESULTS +---- TYPES +float +---- ERRORS +row_regex: .*Problem parsing file.* +row_regex: .*File '.*/bad_avro_snap_floats_avro_snap/truncated_float.avro' is corrupt: truncated data block at offset 159.* +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/tests/common/test_result_verifier.py ---------------------------------------------------------------------- diff --git a/tests/common/test_result_verifier.py b/tests/common/test_result_verifier.py index 24554eb..9e0eca8 100644 --- a/tests/common/test_result_verifier.py +++ b/tests/common/test_result_verifier.py @@ -230,9 +230,16 @@ def verify_results(expected_results, actual_results, order_matters): def verify_errors(expected_errors, actual_errors): """Convert the errors to our test format, treating them as a single string column row - set. This requires enclosing the data in single quotes.""" - expected = QueryTestResult(["'%s'" % l for l in expected_errors if l], ['STRING'], - ['DUMMY_LABEL'], order_matters=False) + set if not a row_regex. This requires enclosing the data in single quotes.""" + converted_expected_errors = [] + for expected_error in expected_errors: + if not expected_error: continue + if ROW_REGEX_PREFIX.match(expected_error): + converted_expected_errors.append(expected_error) + else: + converted_expected_errors.append("'%s'" % expected_error) + expected = QueryTestResult(converted_expected_errors, ['STRING'], ['DUMMY_LABEL'], + order_matters=False) actual = QueryTestResult(["'%s'" % l for l in actual_errors if l], ['STRING'], ['DUMMY_LABEL'], order_matters=False) VERIFIER_MAP['VERIFY_IS_EQUAL'](expected, actual) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01287a3b/tests/data_errors/test_data_errors.py ---------------------------------------------------------------------- diff --git a/tests/data_errors/test_data_errors.py b/tests/data_errors/test_data_errors.py index 809a911..e7897e4 100644 --- a/tests/data_errors/test_data_errors.py +++ b/tests/data_errors/test_data_errors.py @@ -75,6 +75,18 @@ class TestHdfsRcFileScanNodeErrors(TestHdfsScanNodeErrors): self.run_test_case('DataErrorsTest/hdfs-rcfile-scan-node-errors', vector) +class TestAvroErrors(TestDataErrors): + @classmethod + def add_test_dimensions(cls): + super(TestAvroErrors, cls).add_test_dimensions() + cls.TestMatrix.add_constraint(lambda v: + v.get_value('table_format').file_format == 'avro' and + v.get_value('table_format').compression_codec == 'snap') + + def test_avro_errors(self, vector): + vector.get_value('exec_option')['abort_on_error'] = 0 + self.run_test_case('DataErrorsTest/avro-errors', vector) + class TestHBaseDataErrors(TestDataErrors): @classmethod def add_test_dimensions(cls):