This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e294be7707167ba261e5d9a58d3837a83dd0b7ff
Author: stiga-huang <[email protected]>
AuthorDate: Mon Jun 19 14:18:31 2023 +0800

    IMPALA-12128: Bump ORC C++ version to 1.7.9-p10
    
    This bumps the ORC C++ version from 1.7.0-p14 to 1.7.9-p10 to add the
    fixes of ORC-1041 and ORC-1304.
    
    Tests:
     - Add e2e test for ORC-1304.
     - It's hard to add test for ORC-1041 since it won't cause crashes when
       compiling with gcc-10.
    
    Change-Id: I26c39fe5b15ab0bcbe6b2af6fe7a45e48eaec6eb
    Reviewed-on: http://gerrit.cloudera.org:8080/20090
    Reviewed-by: Joe McDonnell <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 bin/impala-config.sh                               |   4 +-
 testdata/data/README                               |  45 +++++++++++++++++++++
 testdata/data/empty_present_stream.orc             | Bin 0 -> 530 bytes
 .../functional/functional_schema_template.sql      |  14 ++++++-
 .../datasets/functional/schema_constraints.csv     |   3 ++
 .../queries/QueryTest/orc-stats.test               |   8 ++++
 6 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 644384e1f..cb4ca79a4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false}
 # moving to a different build of the toolchain, e.g. when a version is bumped 
or a
 # compile option is changed. The build id can be found in the output of the 
toolchain
 # build jobs, it is constructed from the build number and toolchain git hash 
prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=295-d43043e809
+export IMPALA_TOOLCHAIN_BUILD_ID=296-f7e1d0d78b
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p5
@@ -148,7 +148,7 @@ export IMPALA_ZSTD_VERSION=1.5.2
 unset IMPALA_ZSTD_URL
 export IMPALA_OPENLDAP_VERSION=2.4.47
 unset IMPALA_OPENLDAP_URL
-export IMPALA_ORC_VERSION=1.7.0-p14
+export IMPALA_ORC_VERSION=1.7.9-p10
 unset IMPALA_ORC_URL
 export IMPALA_PROTOBUF_VERSION=3.14.0
 unset IMPALA_PROTOBUF_URL
diff --git a/testdata/data/README b/testdata/data/README
index 0d4940d10..7c1560527 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -979,3 +979,48 @@ The following command was used:
 mvn -f "${IMPALA_HOME}/java/datagenerator/pom.xml" exec:java
  -Dexec.mainClass="org.apache.impala.datagenerator.RandomNestedDataGenerator"
  -Dexec.args="${input_table_schema}.avsc 1500000 15 '${output_file}.parquet'";
+
+empty_present_stream.orc:
+Generated by ORC C++ library using the following code
+
+  size_t num = 500;
+  WriterOptions options;
+  options.setRowIndexStride(100);
+  auto stream = writeLocalFile("empty_present_stream.orc");
+  std::unique_ptr<Type> type(Type::buildTypeFromString(
+      "struct<s1:struct<id:int>,s2:struct<id:int>>"));
+
+  std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options);
+
+  std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
+  StructVectorBatch* structBatch =
+      dynamic_cast<StructVectorBatch*>(batch.get());
+  StructVectorBatch* structBatch2 =
+      dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
+  LongVectorBatch* intBatch =
+      dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
+
+  StructVectorBatch* structBatch3 =
+      dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
+  LongVectorBatch* intBatch2 =
+      dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);
+
+  structBatch->numElements = num;
+  structBatch2->numElements = num;
+
+  structBatch3->numElements = num;
+  structBatch3->hasNulls = true;
+
+  for (size_t i = 0; i < num; ++i) {
+    intBatch->data.data()[i] = i;
+    intBatch->notNull[i] = 1;
+
+    intBatch2->notNull[i] = 0;
+    intBatch2->hasNulls = true;
+
+    structBatch3->notNull[i] = 0;
+  }
+  intBatch->hasNulls = false;
+
+  writer->add(*batch);
+  writer->close();
diff --git a/testdata/data/empty_present_stream.orc 
b/testdata/data/empty_present_stream.orc
new file mode 100644
index 000000000..90695c009
Binary files /dev/null and b/testdata/data/empty_present_stream.orc differ
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index 0bcf18fc4..cbaf445db 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -4202,4 +4202,16 @@ id bigint
 delimited
 ---- LOAD
 LOAD DATA LOCAL INPATH 
'{impala_home}/testdata/empty_parquet_page_source_impala10186/data.csv' 
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
-====
\ No newline at end of file
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+empty_stream_tbl
+---- COLUMNS
+s1 struct<id:int>
+s2 struct<id:int>
+---- TABLE_PROPERTIES
+transactional=false
+---- DEPENDENT_LOAD
+LOAD DATA LOCAL INPATH '{impala_home}/testdata/data/empty_present_stream.orc' 
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
+====
diff --git a/testdata/datasets/functional/schema_constraints.csv 
b/testdata/datasets/functional/schema_constraints.csv
index f71b00de0..f567cac56 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -386,3 +386,6 @@ table_name:complextypestbl_parquet_v2_snappy, 
constraint:restrict_to, table_form
 
 # The table is used to test a specific parquet page layout bug
 table_name:empty_parquet_page_source_impala10186, constraint:restrict_to, 
table_format:text/none/none
+
+# The table is used as test coverage for ORC-1304
+table_name:empty_stream_tbl, constraint:restrict_to, table_format:orc/def/block
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test 
b/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test
index d43049c4d..14b14b00b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test
@@ -856,3 +856,11 @@ select count(*) from functional_orc_def.decimal_tbl where 
d4 is null;
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 0
 ====
+---- QUERY
+# Regression test for ORC-1304
+select s2.id from functional_orc_def.empty_stream_tbl where s1.id = 100;
+---- RESULTS
+NULL
+---- TYPES
+INT
+====

Reply via email to