This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0c3e7183ea1df2e20a1d6a45b7dfa6cce20145d6 Author: Ashin Gau <[email protected]> AuthorDate: Sat Aug 12 15:12:00 2023 +0800 [fix](test) load data inpath will remove the data in hdfs (#22908) Load data from hdfs in hive will move the source directory into table's location directory, leading the error like Can not get first file, please check uri in tvf test. --- .../hive/scripts/create_preinstalled_table.hql | 13 ++++++++++--- .../hive/test_different_parquet_types.groovy | 3 ++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 9f34c121df..9dad454bf4 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -167,9 +167,16 @@ CREATE TABLE `delta_encoding_required_column`( c_email_address string, c_last_review_date string ) -STORED AS parquet; - -load data inpath '/user/doris/preinstalled_data/different_types_parquet/delta_encoding_required_column/delta_encoding_required_column.parquet' into table default.delta_encoding_required_column; +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_encoding_required_column/' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); msck repair table delta_encoding_required_column; diff --git a/regression-test/suites/external_table_p0/hive/test_different_parquet_types.groovy b/regression-test/suites/external_table_p0/hive/test_different_parquet_types.groovy index e26864e66c..beb3cd3e0c 100644 --- a/regression-test/suites/external_table_p0/hive/test_different_parquet_types.groovy +++ b/regression-test/suites/external_table_p0/hive/test_different_parquet_types.groovy @@ -20,6 +20,7 @@ suite("test_different_parquet_types", "p0") { String hms_port = context.config.otherConfigs.get("hms_port") String hdfs_port = context.config.otherConfigs.get("hdfs_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") // problem 01 :in hive execute "select * from delta_byte_array limit 10" ,there will be some valid data return,but doris query return nothing def q01 = { @@ -113,7 +114,7 @@ suite("test_different_parquet_types", "p0") { logger.info("record res" + res6_2.toString()) def res6_3 = sql """ - select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/datapage_v1_snappy_compressed_checksum/datapage_v1_snappy_compressed_checksum.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + select * from hdfs(\"uri" = \"hdfs://${externalEnvIp}:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/datapage_v1-snappy-compressed-checksum/datapage_v1-snappy-compressed-checksum.parquet\",\"fs.defaultFS\" = \"hdfs://${externalEnvIp}:${hdfs_port}\",\"format\" = \"parquet\") limit 10 """ logger.info("record res" + res6_3.toString()) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
