This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new e7e1e967cfc [test](migrate) move 2 cases from p2 to p0 for 2.1 (#37139)
e7e1e967cfc is described below
commit e7e1e967cfcdbe03bd5fe20344e38b7c22d3e48d
Author: wuwenchi <[email protected]>
AuthorDate: Tue Jul 2 22:50:53 2024 +0800
[test](migrate) move 2 cases from p2 to p0 for 2.1 (#37139)
pick #37004
---
.../docker-compose/hive/hive-2x.yaml.tpl | 2 +-
.../docker-compose/hive/hive-3x.yaml.tpl | 2 +-
.../hive_textfile_array_all_types/create_table.hql | 27 ++
.../hive_textfile_array_all_types/data.tar.gz | Bin 0 -> 625 bytes
.../hive_textfile_array_all_types/run.sh | 13 +
.../hive_textfile_array_delimiter/create_table.hql | 32 ++
.../hive_textfile_array_delimiter/data.tar.gz | Bin 0 -> 690 bytes
.../hive_textfile_array_delimiter/run.sh | 13 +
.../hive_textfile_nestedarray/create_table.hql | 16 +
.../hive_textfile_nestedarray/data.tar.gz | Bin 0 -> 280 bytes
.../multi_catalog/hive_textfile_nestedarray/run.sh | 13 +
.../multi_catalog/logs1_parquet/create_table.hql | 39 +++
.../data/multi_catalog/logs1_parquet/run.sh | 21 ++
.../multi_catalog/one_partition/create_table.hql | 22 ++
.../data/multi_catalog/one_partition/data.tar.gz | Bin 0 -> 296 bytes
.../data/multi_catalog/one_partition/run.sh | 13 +
.../test_csv_format_error/create_table.hql | 68 ++++
.../test_csv_format_error/data.tar.gz | Bin 0 -> 151583 bytes
.../multi_catalog/test_csv_format_error/run.sh | 13 +
.../test_date_string_partition/create_table.hql | 25 ++
.../test_date_string_partition/data.tar.gz | Bin 0 -> 353 bytes
.../test_date_string_partition/run.sh | 13 +
.../multi_catalog/two_partition/create_table.hql | 25 ++
.../data/multi_catalog/two_partition/data.tar.gz | Bin 0 -> 375 bytes
.../data/multi_catalog/two_partition/run.sh | 13 +
.../data/statistics/statistics/create_table.hql | 33 ++
.../scripts/data/statistics/statistics/data.tar.gz | Bin 0 -> 3956 bytes
.../hive/scripts/data/statistics/statistics/run.sh | 13 +
.../data/tpch_1000_parquet/part/create_table.hql | 24 ++
.../scripts/data/tpch_1000_parquet/part/run.sh | 22 ++
.../hive/test_hive_default_partition.out | 174 +++++++++++
.../hive/test_hive_to_array.out | 21 ++
.../hive/test_text_garbled_file.out | Bin 296830 -> 593565 bytes
.../hive/test_hive_default_partition.groovy | 17 +-
.../hive/test_hive_statistic.groovy | 344 +++++++++++++++++++++
.../hive/test_hive_statistic_timeout.groovy | 23 +-
.../hive/test_hive_to_array.groovy | 17 +-
.../hive/test_text_garbled_file.groovy | 47 +++
.../hive/test_hive_statistic.groovy | 338 --------------------
.../hive/test_text_garbled_file.groovy | 46 ---
40 files changed, 1085 insertions(+), 404 deletions(-)
diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
index ca0fe2e9ddb..de19ed9de18 100644
--- a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
@@ -101,7 +101,7 @@ services:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
- interval: 5s
+ interval: 10s
timeout: 60s
retries: 120
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
index 09d150c17b2..9004f28b61a 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
@@ -101,7 +101,7 @@ services:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
- interval: 5s
+ interval: 10s
timeout: 60s
retries: 120
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
new file mode 100644
index 00000000000..6b700396838
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
@@ -0,0 +1,27 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `hive_textfile_array_all_types`(
+ `col1` array<tinyint>,
+ `col2` array<smallint>,
+ `col3` array<int>,
+ `col4` array<bigint>,
+ `col5` array<boolean>,
+ `col6` array<float>,
+ `col7` array<double>,
+ `col8` array<string>,
+ `col9` array<timestamp>,
+ `col10` array<date>,
+ `col11` array<decimal(10,3)>,
+ `col12` array<char(1)>,
+ `col13` array<varchar(10)>)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/hive_textfile_array_all_types';
+
+msck repair table hive_textfile_array_all_types;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz
new file mode 100644
index 00000000000..b9d64ab29bc
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh
new file mode 100755
index 00000000000..7c2e7e7aed2
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}"/create_table.hql
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/create_table.hql
new file mode 100644
index 00000000000..7e40a2c6bb7
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/create_table.hql
@@ -0,0 +1,32 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `hive_textfile_array_delimiter`(
+ `col1` array<tinyint>,
+ `col2` array<smallint>,
+ `col3` array<int>,
+ `col4` array<bigint>,
+ `col5` array<boolean>,
+ `col6` array<float>,
+ `col7` array<double>,
+ `col8` array<string>,
+ `col9` array<timestamp>,
+ `col10` array<date>,
+ `col11` array<decimal(10,3)>,
+ `col12` int,
+ `col13` array<array<array<int>>>)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'colelction.delim'=',',
+ 'field.delim'='\t',
+ 'line.delim'='\n',
+ 'serialization.format'='\t')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/hive_textfile_array_delimiter';
+
+msck repair table hive_textfile_array_delimiter;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/data.tar.gz
new file mode 100644
index 00000000000..f2ec01fa79f
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/run.sh
new file mode 100755
index 00000000000..7c2e7e7aed2
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}"/create_table.hql
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/create_table.hql
new file mode 100644
index 00000000000..478a5341537
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/create_table.hql
@@ -0,0 +1,16 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `hive_textfile_nestedarray`(
+ `col1` int,
+ `col2` array<array<array<int>>>)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/hive_textfile_nestedarray';
+
+msck repair table hive_textfile_nestedarray;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/data.tar.gz
new file mode 100644
index 00000000000..7c6f8bfef0c
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/run.sh
new file mode 100755
index 00000000000..7c2e7e7aed2
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}"/create_table.hql
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/create_table.hql
new file mode 100644
index 00000000000..d4fb4be9a4e
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/create_table.hql
@@ -0,0 +1,39 @@
+
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `logs1_parquet`(
+ `log_time` timestamp,
+ `machine_name` varchar(128),
+ `machine_group` varchar(128),
+ `cpu_idle` float,
+ `cpu_nice` float,
+ `cpu_system` float,
+ `cpu_user` float,
+ `cpu_wio` float,
+ `disk_free` float,
+ `disk_total` float,
+ `part_max_used` float,
+ `load_fifteen` float,
+ `load_five` float,
+ `load_one` float,
+ `mem_buffers` float,
+ `mem_cached` float,
+ `mem_free` float,
+ `mem_shared` float,
+ `swap_free` float,
+ `bytes_in` float,
+ `bytes_out` float)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim'=',',
+ 'serialization.format'=',')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/logs1_parquet';
+
+msck repair table logs1_parquet;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/run.sh
new file mode 100644
index 00000000000..219916c0119
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/run.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+if [[ ! -d "${CUR_DIR}/data" ]]; then
+ echo "${CUR_DIR}/data does not exist"
+ cd "${CUR_DIR}" && rm -f data.tar.gz \
+ && curl -O
https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/multi_catalog/logs1_parquet/data.tar.gz
\
+ && tar xzf data.tar.gz
+ cd -
+else
+ echo "${CUR_DIR}/data exist, continue !"
+fi
+
+## mkdir and put data to hdfs
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}"/create_table.hql
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/create_table.hql
new file mode 100644
index 00000000000..8d12858b5f1
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/create_table.hql
@@ -0,0 +1,22 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+
+CREATE TABLE IF NOT EXISTS `one_partition`(
+ `id` int)
+PARTITIONED BY (
+ `part1` int)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim'='|',
+ 'serialization.format'='|')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/one_partition';
+
+
+msck repair table one_partition;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/data.tar.gz
new file mode 100644
index 00000000000..ff9d50b2da6
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/run.sh
new file mode 100755
index 00000000000..cc8392b9baa
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/create_table.hql
new file mode 100644
index 00000000000..a1a31120325
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/create_table.hql
@@ -0,0 +1,68 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `test_csv_format_error`(
+ `device_id` string COMMENT '设备唯一识别ID ',
+ `user_id` bigint COMMENT '设备唯一识别ID HASH DEVICE_ID ',
+ `user_app_id` int COMMENT '使用样本应用的用户Id ',
+ `standard_app_id` int COMMENT '标准应用ID ',
+ `standard_app_name` string COMMENT '标准应用名称 ',
+ `standard_package_name` string COMMENT '标准应用包名 ',
+ `device_brand_id` int COMMENT '设备品牌ID ',
+ `device_brand_name` string COMMENT '设备品牌名称 ',
+ `device_eqp_id` int COMMENT '设备型号ID ',
+ `device_eqp_name` string COMMENT '设备型号名称 ',
+ `os_version_id` int COMMENT '系统版本ID ',
+ `os_version_name` string COMMENT '系统版本名称 ',
+ `os_type_id` int COMMENT '操作系统类型ID 0 安卓 1 IOS ',
+ `os_type_name` string COMMENT '操作系统类型名称 0 安卓 1 IOS ',
+ `os_name` string COMMENT '操作系统名称 ',
+ `oem_os_version` string COMMENT '厂商封装后的操作系统版本 ',
+ `oem_os_name` string COMMENT '厂商封装后的操作系统名称 ',
+ `app_version` string COMMENT '样本应用版本 ',
+ `app_key` string COMMENT '样本应用key ',
+ `app_channel` string COMMENT '推广渠道 ',
+ `package_name` string COMMENT '宿主APP包名 ',
+ `app_name` string COMMENT '宿主APP名称',
+ `sdk_version` string COMMENT 'SDK版本 ',
+ `api_level` string COMMENT 'API等级 ',
+ `carrier_id` int COMMENT '运营商ID ',
+ `carrier_name` string COMMENT '运营商名称 ',
+ `phone_num` string COMMENT '手机号码 ',
+ `ip` string COMMENT 'IP地址 ',
+ `country_id` int COMMENT '国家id',
+ `country_name` string COMMENT '国家name',
+ `province_id` int COMMENT '省份ID ',
+ `province_name` string COMMENT '省份名称 ',
+ `city_id` int COMMENT '地级市ID ',
+ `city_name` string COMMENT '地级市名称 ',
+ `county_id` int COMMENT '县级市ID ',
+ `county_name` string COMMENT '县级市名称 ',
+ `mac_address` string COMMENT 'MAC地址 ',
+ `network_id` int COMMENT '网络类型ID ',
+ `network_name` string COMMENT '网络类型 ',
+ `org_package_name` string COMMENT '原始应用包名 ',
+ `org_app_name` string COMMENT '原始应用程序名 ',
+ `org_app_version` string COMMENT '原始应用版本 ',
+ `app_flag` int COMMENT '安装、更新、还是卸载',
+ `action_time` string COMMENT '行为发生的时间',
+ `day_realy` string COMMENT '行为发生的日期',
+ `memo` map<string,string> COMMENT '备注')
+COMMENT 'ods-App_Installed'
+PARTITIONED BY (
+ `day` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'colelction.delim'=',',
+ 'field.delim'='\t',
+ 'mapkey.delim'=':',
+ 'serialization.format'='\t')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/test_csv_format_error';
+
+msck repair table test_csv_format_error;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/data.tar.gz
new file mode 100644
index 00000000000..c0c9d63e2b8
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/run.sh
new file mode 100755
index 00000000000..cc8392b9baa
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/create_table.hql
new file mode 100644
index 00000000000..85db9993656
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/create_table.hql
@@ -0,0 +1,25 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+
+
+CREATE TABLE IF NOT EXISTS `test_date_string_partition`(
+ `k1` int)
+PARTITIONED BY (
+ `day1` string,
+ `day2` date)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim'=',',
+ 'serialization.format'=',')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/test_date_string_partition';
+
+
+msck repair table test_date_string_partition;
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/data.tar.gz
new file mode 100644
index 00000000000..70f7491953a
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/run.sh
new file mode 100755
index 00000000000..cc8392b9baa
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/create_table.hql
new file mode 100644
index 00000000000..7cdc6015674
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/create_table.hql
@@ -0,0 +1,25 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+
+CREATE TABLE IF NOT EXISTS `two_partition`(
+ `id` int)
+PARTITIONED BY (
+ `part1` int,
+ `part2` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim'='|',
+ 'serialization.format'='|')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ '/user/doris/suites/multi_catalog/two_partition';
+
+
+
+msck repair table two_partition;
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/data.tar.gz
new file mode 100644
index 00000000000..cf12e715f3e
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/run.sh
new file mode 100755
index 00000000000..cc8392b9baa
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/create_table.hql
new file mode 100644
index 00000000000..e131a4dc78d
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/create_table.hql
@@ -0,0 +1,33 @@
+create database if not exists statistics;
+use statistics;
+
+
+CREATE TABLE IF NOT EXISTS `statistics`(
+ `lo_orderkey` int,
+ `lo_linenumber` int,
+ `lo_custkey` int,
+ `lo_partkey` int,
+ `lo_suppkey` int,
+ `lo_orderdate` int,
+ `lo_orderpriority` string,
+ `lo_shippriority` int,
+ `lo_quantity` int,
+ `lo_extendedprice` int,
+ `lo_ordtotalprice` int,
+ `lo_discount` int,
+ `lo_revenue` int,
+ `lo_supplycost` int,
+ `lo_tax` int,
+ `lo_commitdate` int,
+ `lo_shipmode` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+ '/user/doris/suites/statistics/statistics';
+
+
+msck repair table statistics;
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/data.tar.gz
b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/data.tar.gz
new file mode 100644
index 00000000000..bf2af70acb9
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/data.tar.gz
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/run.sh
new file mode 100755
index 00000000000..1b9818427b7
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/statistics/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/statistics/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/create_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/create_table.hql
new file mode 100644
index 00000000000..bf02000f3f7
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/create_table.hql
@@ -0,0 +1,24 @@
+create database if not exists tpch_1000_parquet;
+use tpch_1000_parquet;
+
+
+CREATE TABLE IF NOT EXISTS `part`(
+ `p_partkey` int,
+ `p_name` varchar(55),
+ `p_mfgr` char(25),
+ `p_brand` char(10),
+ `p_type` varchar(25),
+ `p_size` int,
+ `p_container` char(10),
+ `p_retailprice` decimal(15,2),
+ `p_comment` varchar(23))
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+ '/user/doris/suites/tpch_1000_parquet/part';
+
+msck repair table part;
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/run.sh
new file mode 100644
index 00000000000..8d77dd927bf
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/run.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+set -x
+
+# CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+# if [[ ! -d "${CUR_DIR}/data" ]]; then
+# echo "${CUR_DIR}/data does not exist"
+# cd "${CUR_DIR}" && rm -f data.tar.gz \
+# && curl -O
https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/tpch_1000_parquet/part/data.tar.gz
\
+# && tar xzf data.tar.gz
+# cd -
+# else
+# echo "${CUR_DIR}/data exist, continue !"
+# fi
+
+# ## mkdir and put data to hdfs
+# hadoop fs -mkdir -p /user/doris/suites/tpch_1000_parquet/
+# hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/tpch_1000_parquet/
+
+# # create table
+# hive -f "${CUR_DIR}"/create_table.hql
+
diff --git
a/regression-test/data/external_table_p2/hive/test_hive_default_partition.out
b/regression-test/data/external_table_p0/hive/test_hive_default_partition.out
similarity index 51%
rename from
regression-test/data/external_table_p2/hive/test_hive_default_partition.out
rename to
regression-test/data/external_table_p0/hive/test_hive_default_partition.out
index cace5bba2bd..24227b9af22 100644
---
a/regression-test/data/external_table_p2/hive/test_hive_default_partition.out
+++
b/regression-test/data/external_table_p0/hive/test_hive_default_partition.out
@@ -173,3 +173,177 @@
-- !string_part_prune5 --
2 2023-08-16 2023-08-16
+-- !one_partition1 --
+1 1
+2 1
+3 2
+4 2
+5 \N
+6 \N
+
+-- !one_partition2 --
+5 \N
+6 \N
+
+-- !one_partition3 --
+1
+2
+3
+4
+
+-- !one_partition4 --
+1
+1
+2
+2
+
+-- !one_partition5 --
+4 2
+5 \N
+6 \N
+
+-- !one_partition6 --
+3 2
+4 2
+5 \N
+6 \N
+
+-- !two_partition1 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition2 --
+1 \N one
+2 \N one
+7 \N \N
+8 \N \N
+
+-- !two_partition3 --
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition4 --
+3 2 \N
+4 2 \N
+7 \N \N
+8 \N \N
+
+-- !two_partition5 --
+1 \N one
+2 \N one
+5 3 three
+6 3 three
+
+-- !two_partition6 --
+5 3 three
+6 3 three
+
+-- !two_partition7 --
+1 \N one
+2 \N one
+
+-- !two_partition8 --
+3 2 \N
+4 2 \N
+
+-- !two_partition9 --
+7 \N \N
+8 \N \N
+
+-- !two_partition10 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition11 --
+1 \N one
+2 \N one
+5 3 three
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition12 --
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition13 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+7 \N \N
+8 \N \N
+
+-- !two_partition14 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition15 --
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition16 --
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition17 --
+1 \N one
+2 \N one
+
+-- !two_partition18 --
+5 3 three
+6 3 three
+
+-- !string_part_prune1 --
+3 2023-08-17 2023-08-17
+
+-- !string_part_prune2 --
+
+-- !string_part_prune3 --
+3 2023-08-17 2023-08-17
+
+-- !string_part_prune4 --
+1 2023-08-15 2023-08-15
+1 2023-8-15 2023-08-15
+2 2023-08-16 2023-08-16
+3 2023-08-17 2023-08-17
+
+-- !string_part_prune5 --
+3 2023-08-17 2023-08-17
+
+-- !string_part_prune5 --
+1 2023-08-15 2023-08-15
+2 2023-08-16 2023-08-16
+
+-- !string_part_prune5 --
+2 2023-08-16 2023-08-16
+
+-- !string_part_prune5 --
+2 2023-08-16 2023-08-16
+
+-- !string_part_prune5 --
+2 2023-08-16 2023-08-16
+
diff --git a/regression-test/data/external_table_p2/hive/test_hive_to_array.out
b/regression-test/data/external_table_p0/hive/test_hive_to_array.out
similarity index 50%
rename from regression-test/data/external_table_p2/hive/test_hive_to_array.out
rename to regression-test/data/external_table_p0/hive/test_hive_to_array.out
index 0965e8d8be4..5dab8a92760 100644
--- a/regression-test/data/external_table_p2/hive/test_hive_to_array.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_to_array.out
@@ -20,3 +20,24 @@
[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000]
[60000000000000, 60000000000000, 60000000000000] [1] [120.301,
450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text",
"file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09
12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09",
"2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] 5
[[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17,
18]], [[1, 2, 3], [4, [...]
[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000]
[400000000000000, 50000000000000, 60000000000000] [1] [120.301,
450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello",
"world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000",
"2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"]
[3311111.111, 2211111.111, 3121111.111] 4 [[[1]]]
+-- !types --
+[1, 2, 3, 4] [10, 20, 30, 40] [100, 200, 300] [100000000000000,
20000000000000, 30000000000000, 40000000000000] [1, 0, 1] [1.23,
4.56, 7.89] [10.1, 20.2, 30.3] ["apple", "banana", "orange"]
["2023-07-04 12:00:00.000000", "2023-07-05 12:00:00.000000", "2023-07-06
12:00:00.000000"] ["2023-07-04", "2023-07-05", "2023-07-06"]
[1111111.111, 2111111.111, 3111111.111] ["a", "b", "c"] ["aa", "bb", "cc"]
+[10, 20, 30] [100, 200, 300, 400] [1000, 2000, 3000]
[1000000000000000, 200000000000000, 300000000000000, 400000000000000] [1, 1,
1, 1] [12.3, 45.6, 78.9] [100.1, 200.2, 300.3] ["grapes",
"watermelon", "kiwi"] ["2023-07-03 12:00:00.000000", "2023-07-03
12:00:00.000000", "2023-07-03 12:00:00.000000"] ["2021-07-05",
"2021-07-05", "2021-07-05"] [2222222.111, 2222222.112, 2222222.113] \N
\N
+[20, 30, 40, 50] [200, 300, 400, 500] [10000, 20000, 30000]
[100000000000000, 20000000000000, 30000000000000] [1, 1, 1, 1, 0, 0]
[120.3, 450.6, 780.9] [100.001, 200.002, 300.003] ["melon", "strawberry",
"blueberry"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000",
"2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"]
[1111111.111, 2111111.111, 3111111.111] \N \N
+[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000]
[60000000000000, 60000000000000, 60000000000000] [1] [120.301,
450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text",
"file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09
12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09",
"2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] ["d",
"d", "d", "d"] ["ffffffff"]
+[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000]
[400000000000000, 50000000000000, 60000000000000] [1] [120.301,
450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello",
"world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000",
"2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"]
[3311111.111, 2211111.111, 3121111.111] ["1"] ["hello", "world"]
+
+-- !array --
+1 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16,
17, 18]]]
+2 [[[19, 20, 21], [22, 23, 24], [25, 26, 27]], [[28], [31], [34]], [[28,
29], [31, 32], [34, 35]]]
+3 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]
+4 [[[1]]]
+5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16,
17, 18]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16,
17, 18]]]
+
+-- !delimiter --
+[1, 2, 3, 4] [10, 20, 30, 40] [100, 200, 300] [100000000000000,
20000000000000, 30000000000000, 40000000000000] [1, 0, 1] [1.23,
4.56, 7.89] [10.1, 20.2, 30.3] ["apple", "banana", "orange"]
["2023-07-04 12:00:00.000000", "2023-07-05 12:00:00.000000", "2023-07-06
12:00:00.000000"] ["2023-07-04", "2023-07-05", "2023-07-06"]
[1111111.111, 2111111.111, 3111111.111] 1 [[[1, 2, 3], [4, 5, 6], [7, 8,
9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]]
+[10, 20, 30] [100, 200, 300, 400] [1000, 2000, 3000]
[1000000000000000, 200000000000000, 300000000000000, 400000000000000] [1, 1,
1, 1] [12.3, 45.6, 78.9] [100.1, 200.2, 300.3] ["grapes",
"watermelon", "kiwi"] ["2023-07-03 12:00:00.000000", "2023-07-03
12:00:00.000000", "2023-07-03 12:00:00.000000"] ["2021-07-05",
"2021-07-05", "2021-07-05"] [2222222.111, 2222222.112, 2222222.113] 2
[[[19, 20, 21], [22, 23, 24], [25, 26, 27]], [[28], [31], [34]], [[28, 29],
[31, 32], [34, 35]]]
+[20, 30, 40, 50] [200, 300, 400, 500] [10000, 20000, 30000]
[100000000000000, 20000000000000, 30000000000000] [1, 1, 1, 1, 0, 0]
[120.3, 450.6, 780.9] [100.001, 200.002, 300.003] ["melon", "strawberry",
"blueberry"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000",
"2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"]
[1111111.111, 2111111.111, 3111111.111] 3 [[[1, 2, 3], [4, 5, 6], [7,
8, 9]]]
+[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000]
[60000000000000, 60000000000000, 60000000000000] [1] [120.301,
450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text",
"file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09
12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09",
"2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] 5
[[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17,
18]], [[1, 2, 3], [4, [...]
+[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000]
[400000000000000, 50000000000000, 60000000000000] [1] [120.301,
450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello",
"world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000",
"2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"]
[3311111.111, 2211111.111, 3121111.111] 4 [[[1]]]
+
diff --git
a/regression-test/data/external_table_p2/hive/test_text_garbled_file.out
b/regression-test/data/external_table_p0/hive/test_text_garbled_file.out
similarity index 50%
rename from
regression-test/data/external_table_p2/hive/test_text_garbled_file.out
rename to regression-test/data/external_table_p0/hive/test_text_garbled_file.out
index b003cd49e97..0c44dc7de0a 100644
Binary files
a/regression-test/data/external_table_p2/hive/test_text_garbled_file.out and
b/regression-test/data/external_table_p0/hive/test_text_garbled_file.out differ
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_default_partition.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_default_partition.groovy
similarity index 95%
rename from
regression-test/suites/external_table_p2/hive/test_hive_default_partition.groovy
rename to
regression-test/suites/external_table_p0/hive/test_hive_default_partition.groovy
index 6e7215e31d1..9fdd77b9711 100644
---
a/regression-test/suites/external_table_p2/hive/test_hive_default_partition.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_default_partition.groovy
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-suite("test_hive_default_partition",
"p2,external,hive,external_remote,external_remote_hive") {
+suite("test_hive_default_partition",
"p0,external,hive,external_docker,external_docker_hive") {
def one_partition1 = """select * from one_partition order by id;"""
def one_partition2 = """select id, part1 from one_partition where part1 is
null order by id;"""
def one_partition3 = """select id from one_partition where part1 is not
null order by id;"""
@@ -52,11 +52,16 @@ suite("test_hive_default_partition",
"p2,external,hive,external_remote,external_
def string_part_prune8 = """select * from test_date_string_partition where
cast(day1 as date) in ("2023-08-16", "2023-08-18");"""
def string_part_prune9 = """select * from test_date_string_partition where
cast(day1 as date) in (cast("2023-08-16" as date), "2023-08-18");"""
- String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
- if (enabled != null && enabled.equalsIgnoreCase("true")) {
- String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
- String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
- String catalog_name = "hive_default_partition"
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable hive test.")
+ return;
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("externalEnvIp")
+ String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name = hivePrefix + "_hive_default_partition"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy
new file mode 100644
index 00000000000..5da056ff00e
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic",
"p0,external,hive,external_docker,external_docker_hive") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("externalEnvIp")
+ String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name = hivePrefix + "_test_hive_statistic"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+
+ // TODO will be supported in future
+ // Test analyze table without init.
+ // sql """analyze table ${catalog_name}.tpch_1000_parquet.region with
sync"""
+
+ // logger.info("switched to catalog " + catalog_name)
+ // sql """use statistics;"""
+ // sql """analyze table `statistics` with sync"""
+ // def result = sql """show column stats `statistics` (lo_quantity)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_quantity")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "46.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "1")
+ // assertEquals(result[0][8], "50")
+
+ // result = sql """show column stats `statistics` (lo_orderkey)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_orderkey")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "26.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "1")
+ // assertEquals(result[0][8], "98")
+
+ // result = sql """show column stats `statistics` (lo_linenumber)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_linenumber")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "7.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "1")
+ // assertEquals(result[0][8], "7")
+
+ // result = sql """show column stats `statistics` (lo_custkey)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_custkey")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "26.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "67423")
+ // assertEquals(result[0][8], "2735521")
+
+ // result = sql """show column stats `statistics` (lo_partkey)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_partkey")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "100.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "2250")
+ // assertEquals(result[0][8], "989601")
+
+ // result = sql """show column stats `statistics` (lo_suppkey)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_suppkey")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "100.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "4167")
+ // assertEquals(result[0][8], "195845")
+
+ // result = sql """show column stats `statistics` (lo_orderdate)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_orderdate")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "26.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "19920221")
+ // assertEquals(result[0][8], "19980721")
+
+ // result = sql """show column stats `statistics` (lo_orderpriority)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_orderpriority")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "5.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "880.0")
+ // assertEquals(result[0][6], "8.8")
+ // assertEquals(result[0][7], "'1-URGENT'")
+ // assertEquals(result[0][8], "'5-LOW'")
+
+ // result = sql """show column stats `statistics` (lo_shippriority)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_shippriority")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "1.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "0")
+ // assertEquals(result[0][8], "0")
+
+ // result = sql """show column stats `statistics` (lo_extendedprice)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_extendedprice")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "100.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "104300")
+ // assertEquals(result[0][8], "9066094")
+
+ // result = sql """show column stats `statistics` (lo_ordtotalprice)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_ordtotalprice")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "26.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "3428256")
+ // assertEquals(result[0][8], "36771805")
+
+ // result = sql """show column stats `statistics` (lo_discount)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_discount")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "11.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "0")
+ // assertEquals(result[0][8], "10")
+
+ // result = sql """show column stats `statistics` (lo_revenue)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_revenue")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "100.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "101171")
+ // assertEquals(result[0][8], "8703450")
+
+ // result = sql """show column stats `statistics` (lo_supplycost)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_supplycost")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "100.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "58023")
+ // assertEquals(result[0][8], "121374")
+
+ // result = sql """show column stats `statistics` (lo_tax)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_tax")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "9.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "0")
+ // assertEquals(result[0][8], "8")
+
+ // result = sql """show column stats `statistics` (lo_commitdate)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_commitdate")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "95.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "400.0")
+ // assertEquals(result[0][6], "4.0")
+ // assertEquals(result[0][7], "19920515")
+ // assertEquals(result[0][8], "19981016")
+
+ // result = sql """show column stats `statistics` (lo_shipmode)"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_shipmode")
+ // assertEquals(result[0][2], "100.0")
+ // assertEquals(result[0][3], "7.0")
+ // assertEquals(result[0][4], "0.0")
+ // assertEquals(result[0][5], "421.0")
+ // assertEquals(result[0][6], "4.21")
+ // assertEquals(result[0][7], "'AIR'")
+ // assertEquals(result[0][8], "'TRUCK'")
+
+ // sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS
('row_count'='6001215')"""
+ // result = sql "show column stats `statistics` (lo_shipmode)"
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][0], "lo_shipmode")
+ // assertEquals(result[0][2], "6001215.0")
+
+ // sql """drop stats statistics"""
+ // result = sql """show column stats statistics"""
+ // assertEquals(result.size(), 0)
+
+ // sql """analyze database `statistics` with sync"""
+ // result = sql """show table stats statistics"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][2], "100")
+
+ // result = sql """show table cached stats statistics"""
+ // assertEquals(result.size(), 1)
+ // assertEquals(result[0][2], "100")
+
+ // sql """drop stats statistics"""
+ // result = sql """show column cached stats statistics"""
+ // assertEquals(result.size(), 0)
+
+ sql """use multi_catalog"""
+ sql """analyze table logs1_parquet (log_time) with sync"""
+ def ctlId
+ def dbId
+ def tblId
+ result = sql """show catalogs"""
+
+ for (int i = 0; i < result.size(); i++) {
+ if (result[i][1] == catalog_name) {
+ ctlId = result[i][0]
+ }
+ }
+ result = sql """show proc '/catalogs/$ctlId'"""
+ for (int i = 0; i < result.size(); i++) {
+ if (result[i][1] == 'multi_catalog') {
+ dbId = result[i][0]
+ }
+ }
+ result = sql """show proc '/catalogs/$ctlId/$dbId'"""
+ for (int i = 0; i < result.size(); i++) {
+ if (result[i][1] == 'logs1_parquet') {
+ tblId = result[i][0]
+ }
+ }
+
+ result = sql """select * from
internal.__internal_schema.column_statistics where id =
'${tblId}--1-log_time'"""
+ assertEquals(result.size(), 1)
+ def id = result[0][0]
+ def catalog_id = result[0][1]
+ def db_id = result[0][2]
+ def tbl_id = result[0][3]
+ def idx_id = result[0][4]
+ def col_id = result[0][5]
+ def count = result[0][7]
+ def ndv = result[0][8]
+ def null_count = result[0][9]
+ def data_size_in_bytes = result[0][12]
+ def update_time = result[0][13]
+
+ sql """insert into internal.__internal_schema.column_statistics values
('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count,
$ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')"""
+
+ result = sql """show column stats logs1_parquet (log_time)"""
+ assertEquals(result.size(), 1)
+ assertEquals(result[0][7], "N/A")
+ assertEquals(result[0][8], "N/A")
+
+ sql """use tpch1_parquet;"""
+ sql """drop stats region"""
+ sql """alter table region modify column r_comment set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0',
'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special
accounts cajole carefully blithely close requests. carefully final asymptotes
haggle furiousl');"""
+ sql """alter table region modify column r_name set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0',
'min_value'='AFRICA', 'max_value'='MIDDLE EAST');"""
+ sql """alter table region modify column r_regionkey set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0',
'min_value'='0', 'max_value'='4');"""
+ result = sql """show column stats region(r_regionkey)"""
+ assertEquals(result.size(), 1)
+ assertEquals(result[0][0], "r_regionkey")
+ assertEquals(result[0][2], "5.0")
+ assertEquals(result[0][3], "5.0")
+ assertEquals(result[0][4], "0.0")
+ assertEquals(result[0][5], "20.0")
+ assertEquals(result[0][6], "4.0")
+ assertEquals(result[0][7], "0")
+ assertEquals(result[0][8], "4")
+
+ result = sql """show column stats region(r_comment)"""
+ assertEquals(result.size(), 1)
+ assertEquals(result[0][0], "r_comment")
+ assertEquals(result[0][2], "5.0")
+ assertEquals(result[0][3], "5.0")
+ assertEquals(result[0][4], "0.0")
+ assertEquals(result[0][5], "330.0")
+ assertEquals(result[0][6], "66.0")
+ assertEquals(result[0][7], "\'ges. thinly even pinto beans ca\'")
+ assertEquals(result[0][8], "\'uickly special accounts cajole carefully
blithely close requests. carefully final asymptotes haggle furiousl\'")
+
+ result = sql """show column stats region(r_name)"""
+ assertEquals(result.size(), 1)
+ assertEquals(result[0][0], "r_name")
+ assertEquals(result[0][2], "5.0")
+ assertEquals(result[0][3], "5.0")
+ assertEquals(result[0][4], "0.0")
+ assertEquals(result[0][5], "34.0")
+ assertEquals(result[0][6], "6.8")
+ assertEquals(result[0][7], "\'AFRICA\'")
+ assertEquals(result[0][8], "\'MIDDLE EAST\'")
+
+ sql """drop catalog ${catalog_name}"""
+ }
+}
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_statistic_timeout.groovy
similarity index 76%
rename from
regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
rename to
regression-test/suites/external_table_p0/hive/test_hive_statistic_timeout.groovy
index a52a2304788..d1ae297f3d5 100644
---
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_statistic_timeout.groovy
@@ -15,12 +15,21 @@
// specific language governing permissions and limitations
// under the License.
-suite("test_hive_statistic_timeout",
"p2,external,hive,external_remote,external_remote_hive, nonConcurrent") {
- String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
- if (enabled != null && enabled.equalsIgnoreCase("true")) {
- String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
- String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
- String catalog_name = "test_hive_statistic_timeout"
+suite("test_hive_statistic_timeout",
"p0,external,hive,external_docker,external_docker_hive") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("externalEnvIp")
+ String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name = hivePrefix + "_test_hive_statistic_timeout"
+
+ // TODO tpch will be supported in future
+ return
+
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
@@ -33,6 +42,7 @@ suite("test_hive_statistic_timeout",
"p2,external,hive,external_remote,external_
sql """use ${catalog_name}.tpch_1000_parquet"""
sql """set global analyze_timeout=1"""
+
try {
test {
sql """analyze table part (p_partkey, p_container, p_type,
p_retailprice) with sync with full;"""
@@ -41,6 +51,7 @@ suite("test_hive_statistic_timeout",
"p2,external,hive,external_remote,external_
} finally {
sql """set global analyze_timeout=43200"""
}
+
sql """drop catalog ${catalog_name}""";
}
}
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_to_array.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_to_array.groovy
similarity index 79%
rename from
regression-test/suites/external_table_p2/hive/test_hive_to_array.groovy
rename to
regression-test/suites/external_table_p0/hive/test_hive_to_array.groovy
index b03a83a158e..058afc51430 100644
--- a/regression-test/suites/external_table_p2/hive/test_hive_to_array.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_to_array.groovy
@@ -15,12 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-suite("test_hive_to_array",
"p2,external,hive,external_remote,external_remote_hive") {
- String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
- if (enabled != null && enabled.equalsIgnoreCase("true")) {
- String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
- String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
- String catalog_name = "test_hive_to_array"
+suite("test_hive_to_array",
"p0,external,hive,external_docker,external_docker_hive") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("externalEnvIp")
+ String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name = hivePrefix + "_test_hive_to_array"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
diff --git
a/regression-test/suites/external_table_p0/hive/test_text_garbled_file.groovy
b/regression-test/suites/external_table_p0/hive/test_text_garbled_file.groovy
new file mode 100644
index 00000000000..27e171bab8c
--- /dev/null
+++
b/regression-test/suites/external_table_p0/hive/test_text_garbled_file.groovy
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_text_garbled_file",
"p0,external,hive,external_docker,external_docker_hive") {
+ //test hive garbled files , prevent be hanged
+
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("externalEnvIp")
+ String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix
+ "HmsPort")
+ String catalog_name = hivePrefix + "_test_text_garbled_file"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+
+
+ order_qt_garbled_file """
+ select * from ${catalog_name}.multi_catalog.test_csv_format_error;
+ """
+ }
+ }
+}
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
deleted file mode 100644
index 4fcd70fe732..00000000000
--- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
+++ /dev/null
@@ -1,338 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_hive_statistic",
"p2,external,hive,external_remote,external_remote_hive") {
- String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
- if (enabled != null && enabled.equalsIgnoreCase("true")) {
- String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
- String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
- String catalog_name = "test_hive_statistic"
- sql """drop catalog if exists ${catalog_name};"""
- sql """
- create catalog if not exists ${catalog_name} properties (
- 'type'='hms',
- 'hadoop.username' = 'hadoop',
- 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
- );
- """
- logger.info("catalog " + catalog_name + " created")
-
- // Test analyze table without init.
- sql """analyze table ${catalog_name}.tpch_1000_parquet.region with
sync"""
-
- sql """switch ${catalog_name};"""
- logger.info("switched to catalog " + catalog_name)
- sql """use statistics;"""
- sql """analyze table `statistics` with sync"""
- def result = sql """show column stats `statistics` (lo_quantity)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_quantity")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "46.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "1")
- assertEquals(result[0][8], "50")
-
- result = sql """show column stats `statistics` (lo_orderkey)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_orderkey")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "26.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "1")
- assertEquals(result[0][8], "98")
-
- result = sql """show column stats `statistics` (lo_linenumber)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_linenumber")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "7.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "1")
- assertEquals(result[0][8], "7")
-
- result = sql """show column stats `statistics` (lo_custkey)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_custkey")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "26.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "67423")
- assertEquals(result[0][8], "2735521")
-
- result = sql """show column stats `statistics` (lo_partkey)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_partkey")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "100.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "2250")
- assertEquals(result[0][8], "989601")
-
- result = sql """show column stats `statistics` (lo_suppkey)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_suppkey")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "100.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "4167")
- assertEquals(result[0][8], "195845")
-
- result = sql """show column stats `statistics` (lo_orderdate)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_orderdate")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "26.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "19920221")
- assertEquals(result[0][8], "19980721")
-
- result = sql """show column stats `statistics` (lo_orderpriority)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_orderpriority")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "5.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "880.0")
- assertEquals(result[0][6], "8.8")
- assertEquals(result[0][7], "'1-URGENT'")
- assertEquals(result[0][8], "'5-LOW'")
-
- result = sql """show column stats `statistics` (lo_shippriority)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_shippriority")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "1.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "0")
- assertEquals(result[0][8], "0")
-
- result = sql """show column stats `statistics` (lo_extendedprice)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_extendedprice")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "100.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "104300")
- assertEquals(result[0][8], "9066094")
-
- result = sql """show column stats `statistics` (lo_ordtotalprice)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_ordtotalprice")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "26.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "3428256")
- assertEquals(result[0][8], "36771805")
-
- result = sql """show column stats `statistics` (lo_discount)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_discount")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "11.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "0")
- assertEquals(result[0][8], "10")
-
- result = sql """show column stats `statistics` (lo_revenue)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_revenue")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "100.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "101171")
- assertEquals(result[0][8], "8703450")
-
- result = sql """show column stats `statistics` (lo_supplycost)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_supplycost")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "100.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "58023")
- assertEquals(result[0][8], "121374")
-
- result = sql """show column stats `statistics` (lo_tax)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_tax")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "9.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "0")
- assertEquals(result[0][8], "8")
-
- result = sql """show column stats `statistics` (lo_commitdate)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_commitdate")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "95.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "400.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "19920515")
- assertEquals(result[0][8], "19981016")
-
- result = sql """show column stats `statistics` (lo_shipmode)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_shipmode")
- assertEquals(result[0][2], "100.0")
- assertEquals(result[0][3], "7.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "421.0")
- assertEquals(result[0][6], "4.21")
- assertEquals(result[0][7], "'AIR'")
- assertEquals(result[0][8], "'TRUCK'")
-
- sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS
('row_count'='6001215')"""
- result = sql "show column stats `statistics` (lo_shipmode)"
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "lo_shipmode")
- assertEquals(result[0][2], "6001215.0")
-
- sql """drop stats statistics"""
- result = sql """show column stats statistics"""
- assertEquals(result.size(), 0)
-
- sql """analyze database `statistics` with sync"""
- result = sql """show table stats statistics"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][2], "100")
-
- result = sql """show table cached stats statistics"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][2], "100")
-
- sql """drop stats statistics"""
- result = sql """show column cached stats statistics"""
- assertEquals(result.size(), 0)
-
- sql """use multi_catalog"""
- sql """analyze table logs1_parquet (log_time) with sync"""
- def ctlId
- def dbId
- def tblId
- result = sql """show catalogs"""
-
- for (int i = 0; i < result.size(); i++) {
- if (result[i][1] == catalog_name) {
- ctlId = result[i][0]
- }
- }
- result = sql """show proc '/catalogs/$ctlId'"""
- for (int i = 0; i < result.size(); i++) {
- if (result[i][1] == 'multi_catalog') {
- dbId = result[i][0]
- }
- }
- result = sql """show proc '/catalogs/$ctlId/$dbId'"""
- for (int i = 0; i < result.size(); i++) {
- if (result[i][1] == 'logs1_parquet') {
- tblId = result[i][0]
- }
- }
-
- result = sql """select * from
internal.__internal_schema.column_statistics where id =
'${tblId}--1-log_time'"""
- assertEquals(result.size(), 1)
- def id = result[0][0]
- def catalog_id = result[0][1]
- def db_id = result[0][2]
- def tbl_id = result[0][3]
- def idx_id = result[0][4]
- def col_id = result[0][5]
- def count = result[0][7]
- def ndv = result[0][8]
- def null_count = result[0][9]
- def data_size_in_bytes = result[0][12]
- def update_time = result[0][13]
-
- sql """insert into internal.__internal_schema.column_statistics values
('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count,
$ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')"""
-
- result = sql """show column stats logs1_parquet (log_time)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][7], "N/A")
- assertEquals(result[0][8], "N/A")
-
- sql """use tpch1_parquet;"""
- sql """drop stats region"""
- sql """alter table region modify column r_comment set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0',
'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special
accounts cajole carefully blithely close requests. carefully final asymptotes
haggle furiousl');"""
- sql """alter table region modify column r_name set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0',
'min_value'='AFRICA', 'max_value'='MIDDLE EAST');"""
- sql """alter table region modify column r_regionkey set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0',
'min_value'='0', 'max_value'='4');"""
- result = sql """show column stats region(r_regionkey)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "r_regionkey")
- assertEquals(result[0][2], "5.0")
- assertEquals(result[0][3], "5.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "20.0")
- assertEquals(result[0][6], "4.0")
- assertEquals(result[0][7], "0")
- assertEquals(result[0][8], "4")
-
- result = sql """show column stats region(r_comment)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "r_comment")
- assertEquals(result[0][2], "5.0")
- assertEquals(result[0][3], "5.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "330.0")
- assertEquals(result[0][6], "66.0")
- assertEquals(result[0][7], "\'ges. thinly even pinto beans ca\'")
- assertEquals(result[0][8], "\'uickly special accounts cajole carefully
blithely close requests. carefully final asymptotes haggle furiousl\'")
-
- result = sql """show column stats region(r_name)"""
- assertEquals(result.size(), 1)
- assertEquals(result[0][0], "r_name")
- assertEquals(result[0][2], "5.0")
- assertEquals(result[0][3], "5.0")
- assertEquals(result[0][4], "0.0")
- assertEquals(result[0][5], "34.0")
- assertEquals(result[0][6], "6.8")
- assertEquals(result[0][7], "\'AFRICA\'")
- assertEquals(result[0][8], "\'MIDDLE EAST\'")
-
- sql """drop catalog ${catalog_name}"""
- }
-}
-
diff --git
a/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy
b/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy
deleted file mode 100644
index a3ea6a3bcc2..00000000000
---
a/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_text_garbled_file",
"p2,external,hive,external_remote,external_remote_hive") {
- //test hive garbled files , prevent be hanged
-
- String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
- if (enabled != null && enabled.equalsIgnoreCase("true")) {
- String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
- String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
- String catalog_name = "test_text_garbled_file"
- sql """drop catalog if exists ${catalog_name};"""
- sql """
- create catalog if not exists ${catalog_name} properties (
- 'type'='hms',
- 'hadoop.username' = 'hadoop',
- 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
- );
- """
- logger.info("catalog " + catalog_name + " created")
- sql """switch ${catalog_name};"""
- logger.info("switched to catalog " + catalog_name)
-
-
- order_qt_garbled_file """
- select * from ${catalog_name}.multi_catalog.test_csv_format_error;
- """
-
-
- }
-}
-
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]