This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new ea3cb317eb9 branch-3.1: [opt](orc-reader)Turn on late materialization
of orc complex types. #49718 (#52079)
ea3cb317eb9 is described below
commit ea3cb317eb9435a56a9e10ce5fd2a1af2c7906f1
Author: Qi Chen <[email protected]>
AuthorDate: Thu Jun 26 14:30:25 2025 +0800
branch-3.1: [opt](orc-reader)Turn on late materialization of orc complex
types. #49718 (#52079)
Cherry-pick from #49718
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 3 +-
.../orc_nested_types/create_table.hql | 32 +++
.../multi_catalog/orc_nested_types/data.tar.gz | Bin 0 -> 2965 bytes
.../data/multi_catalog/orc_nested_types/run.sh | 12 ++
.../hive/test_orc_nested_types.out | Bin 0 -> 10661 bytes
.../hive/test_orc_nested_types.groovy | 236 +++++++++++++++++++++
6 files changed, 281 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index c50e1fa8883..911620a6100 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -921,8 +921,7 @@ Status OrcReader::set_fill_columns(
}
}
- if (!_has_complex_type && _enable_lazy_mat &&
- _lazy_read_ctx.predicate_columns.first.size() > 0 &&
+ if (_enable_lazy_mat && _lazy_read_ctx.predicate_columns.first.size() > 0
&&
_lazy_read_ctx.lazy_read_columns.size() > 0) {
_lazy_read_ctx.can_lazy_read = true;
}
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/create_table.hql
new file mode 100644
index 00000000000..a1a35827909
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/create_table.hql
@@ -0,0 +1,32 @@
+CREATE DATABASE IF NOT EXISTS multi_catalog;
+USE multi_catalog;
+
+CREATE TABLE `nested_types1_orc` (
+ `id` INT,
+ `array_col` ARRAY<INT>,
+ `nested_array_col` ARRAY<ARRAY<INT>>,
+ `map_col` MAP<STRING, INT>,
+ `nested_map_col` MAP<STRING, ARRAY<INT>>,
+ `struct_col` STRUCT<`name`: STRING, `age`: INT>,
+ `array_struct_col` ARRAY<STRUCT<`name`: STRING, `age`: INT>>,
+ `map_struct_col` MAP<STRING, STRUCT<`name`: STRING, `age`: INT>>,
+ `complex_struct_col` STRUCT<
+ `a`: ARRAY<INT>,
+ `b`: MAP<STRING, ARRAY<INT>>,
+ `c`: STRUCT<
+ `x`: ARRAY<INT>,
+ `y`: STRING
+ >
+ >
+)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/nested_types1_orc';
+
+msck repair table nested_types1_orc;
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/data.tar.gz
new file mode 100644
index 00000000000..d7be7822674
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/run.sh
new file mode 100644
index 00000000000..f3136eaa200
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_nested_types/run.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
diff --git
a/regression-test/data/external_table_p0/hive/test_orc_nested_types.out
b/regression-test/data/external_table_p0/hive/test_orc_nested_types.out
new file mode 100644
index 00000000000..6ee3da7b26d
Binary files /dev/null and
b/regression-test/data/external_table_p0/hive/test_orc_nested_types.out differ
diff --git
a/regression-test/suites/external_table_p0/hive/test_orc_nested_types.groovy
b/regression-test/suites/external_table_p0/hive/test_orc_nested_types.groovy
new file mode 100644
index 00000000000..f3d9af89e7e
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_orc_nested_types.groovy
@@ -0,0 +1,236 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_orc_nested_types",
"p0,external,hive,external_docker,external_docker_hive") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return;
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name = "${hivePrefix}_test_orc_nested_types"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """create catalog if not exists ${catalog_name} properties (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+ );"""
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+
+ sql """ use multi_catalog """
+
+ order_qt_nested_types1_q1 """select * from nested_types1_orc where id
= 1"""
+
+ order_qt_nested_types1_q2 """select array_col from nested_types1_orc
where id = 2"""
+
+ order_qt_nested_types1_q3 """select nested_array_col from
nested_types1_orc where id = 3"""
+
+
+ order_qt_nested_types1_q4 """
+ SELECT id, size(array_col) as arr_size
+ FROM nested_types1_orc
+ ORDER BY id
+ """
+
+ order_qt_nested_types1_q5 """
+ SELECT id, array_col[0] as first_elem, array_col[2] as third_elem
+ FROM nested_types1_orc
+ ORDER BY id
+ """
+
+ order_qt_nested_types1_q6 """
+ SELECT id, array_col
+ FROM nested_types1_orc
+ WHERE array_contains(array_col, 1)
+ ORDER BY id
+ """
+
+ order_qt_nested_types1_q7 """
+ SELECT
+ id,
+ array_min(array_col) as min_val,
+ array_max(array_col) as max_val
+ FROM nested_types1_orc
+ ORDER BY id
+ """
+
+ order_qt_nested_types1_q8 """
+ SELECT
+ id,
+ nested_array_col,
+ size(nested_array_col) as outer_size
+ FROM nested_types1_orc
+ WHERE id = 1
+ """
+
+ order_qt_nested_types1_q9 """
+ SELECT
+ id,
+ nested_array_col,
+ size(nested_array_col[0]) as inner_size
+ FROM nested_types1_orc
+ WHERE id = 2
+ """
+
+ order_qt_nested_types1_q10 """
+ SELECT
+ id,
+ nested_array_col[0] as first_inner_array
+ FROM nested_types1_orc
+ WHERE id = 3
+ """
+
+ order_qt_nested_types1_q11 """
+ SELECT
+ id,
+ map_col,
+ size(map_col) as map_size
+ FROM nested_types1_orc
+ WHERE id = 1
+ """
+
+ order_qt_nested_types1_q12 """
+ SELECT
+ id,
+ map_col['a'] as a_value
+ FROM nested_types1_orc
+ WHERE id = 2
+ """
+
+ order_qt_nested_types1_q13 """
+ SELECT
+ id,
+ nested_map_col['b'] as b_value
+ FROM nested_types1_orc
+ WHERE id = 3
+ """
+
+ order_qt_nested_types1_q14 """
+ SELECT
+ id,
+ struct_element(struct_col, 'name') as name,
+ struct_element(struct_col, 'age') as age
+ FROM nested_types1_orc
+ WHERE id = 1
+ """
+
+ order_qt_nested_types1_q15 """
+ SELECT
+ id,
+ array_struct_col,
+ size(array_struct_col) as struct_arr_size
+ FROM nested_types1_orc
+ WHERE id = 2
+ """
+
+ order_qt_nested_types1_q16 """
+ SELECT
+ id,
+ struct_element(item, 'name') as name,
+ struct_element(item, 'age') as age
+ FROM nested_types1_orc
+ LATERAL VIEW EXPLODE(array_struct_col) tmp AS item
+ WHERE id = 1 AND struct_element(item, 'age') > 30
+ """
+
+ order_qt_nested_types1_q17 """
+ SELECT
+ id,
+ struct_element(map_struct_col['a'], 'name') as name,
+ struct_element(map_struct_col['a'], 'age') as age
+ FROM nested_types1_orc
+ WHERE id = 2
+ """
+
+ order_qt_nested_types1_q18 """
+ SELECT
+ id,
+ struct_element(complex_struct_col, 'a') as array_a
+ FROM nested_types1_orc
+ WHERE id = 1
+ """
+
+ order_qt_nested_types1_q19 """
+ SELECT
+ id,
+ struct_element(complex_struct_col, 'b') as map_b
+ FROM nested_types1_orc
+ WHERE id = 2
+ """
+
+ order_qt_nested_types1_q20 """
+ SELECT
+ id,
+ struct_element(complex_struct_col, 'c') as struct_c
+ FROM nested_types1_orc
+ WHERE id = 3
+ """
+
+ order_qt_nested_types1_q21 """
+ SELECT
+ id,
+ struct_element(struct_element(complex_struct_col, 'c'), 'x')
as array_x
+ FROM nested_types1_orc
+ WHERE id = 1
+ """
+
+ order_qt_nested_types1_q22 """
+ SELECT
+ id,
+ struct_element(struct_element(complex_struct_col, 'c'), 'y')
as y_value
+ FROM nested_types1_orc
+ WHERE id = 2
+ """
+
+ order_qt_nested_types1_q23 """
+ SELECT *
+ FROM nested_types1_orc
+ ORDER BY id
+ """
+
+ order_qt_nested_types1_q24 """
+ SELECT id, array_col
+ FROM nested_types1_orc
+ WHERE array_contains(array_col, 1)
+ ORDER BY id
+ """
+
+ order_qt_nested_types1_q25 """
+ SELECT
+ id,
+ struct_element(struct_col, 'age') as age
+ FROM nested_types1_orc
+ WHERE struct_element(struct_col, 'name') = 'Alice'
+ """
+
+ order_qt_nested_types1_q26 """
+ SELECT
+ id,
+ struct_element(struct_col, 'age') as age
+ FROM nested_types1_orc
+ WHERE struct_element(struct_col, 'name') LIKE '%A%'
+ """
+
+ sql """drop catalog ${catalog_name};"""
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]