This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 53ee740b88b [enhance](hive) Add regression-test cases for hive text
ddl and hive text insert and fix reading null string bug #42200 (#42272)
53ee740b88b is described below
commit 53ee740b88b374bb98bd3ff23b87d22e2606225f
Author: Rayner Chen <[email protected]>
AuthorDate: Tue Oct 22 23:42:49 2024 +0800
[enhance](hive) Add regression-test cases for hive text ddl and hive text
insert and fix reading null string bug #42200 (#42272)
cherry pick from #42200
Co-authored-by: Socrates <[email protected]>
---
be/src/vec/exec/format/csv/csv_reader.cpp | 2 +-
.../scripts/create_preinstalled_scripts/run63.hql | 18 ++-
.../hive/ddl/test_hive_ddl_text_format.out | 57 +++++++
.../hive/ddl/test_hive_ddl_text_format.groovy | 177 +++++++++++++++------
4 files changed, 200 insertions(+), 54 deletions(-)
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 0583b74d735..bf0e543d650 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -622,7 +622,7 @@ template <bool from_json>
Status CsvReader::deserialize_nullable_string(IColumn& column, Slice& slice) {
auto& null_column = assert_cast<ColumnNullable&>(column);
if (!(from_json && _options.converted_from_string &&
slice.trim_double_quotes())) {
- if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
+ if (slice.compare(Slice(_options.null_format, _options.null_len)) ==
0) {
null_column.insert_data(nullptr, 0);
return Status::OK();
}
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
index aebd7522959..c287595278f 100755
---
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
@@ -560,7 +560,14 @@ CREATE TABLE `all_types_text`(
`t_array_string_all_nulls` array<string>,
`dt` int)
stored as textfile
-TBLPROPERTIES("line.delim"="\n", "field.delim"="\1");
+TBLPROPERTIES(
+ 'field.delim'='\t',
+ 'line.delim'='\n',
+ 'collection.delim'=',',
+ 'mapkey.delim'=':',
+ 'escape.delim'='|',
+ 'serialization.null.format'='null'
+);
CREATE TABLE all_types_par_text(
`boolean_col` boolean,
@@ -628,4 +635,11 @@ CREATE TABLE all_types_par_text(
PARTITIONED BY (
`dt` int)
stored as textfile
-TBLPROPERTIES("line.delim"="\n", "field.delim"="\1");
+TBLPROPERTIES(
+ 'field.delim'='\t',
+ 'line.delim'='\n',
+ 'collection.delim'=',',
+ 'mapkey.delim'=':',
+ 'escape.delim'='|',
+ 'serialization.null.format'='null'
+);
diff --git
a/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out
b/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out
new file mode 100644
index 00000000000..faf343ce09b
--- /dev/null
+++
b/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out
@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !default_properties --
+1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
+2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_default_properties --
+1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC","keyD":"valueD"}
+
+-- !standard_properties --
+1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
+2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_standard_properties --
+1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC","keyD":"valueD"}
+
+-- !different_properties --
+1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
+2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_different_properties --
+1 Alice ["tag1,tag2"] {"key1":"value1,key2:value2\\u00042"}
+
+-- !default_properties --
+1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
+2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_default_properties --
+1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC","keyD":"valueD"}
+
+-- !standard_properties --
+1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
+2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_standard_properties --
+1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC","keyD":"valueD"}
+
+-- !different_properties --
+1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
+2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
+3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_different_properties --
+1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2\\u00042"}
+
diff --git
a/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
b/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
index aaa5b198e69..730db1247cd 100644
---
a/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
+++
b/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
@@ -17,62 +17,137 @@
suite("test_hive_ddl_text_format",
"p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
- if (enabled != null && enabled.equalsIgnoreCase("true")) {
- String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
- String hms_port = context.config.otherConfigs.get("hive3HmsPort")
- String hdfs_port = context.config.otherConfigs.get("hive3HdfsPort")
- String catalog_name = "test_hive_ddl_text_format"
- String table_name = "table_with_pars";
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ setHivePrefix(hivePrefix)
+ try{
+ String externalEnvIp =
context.config.otherConfigs.get("externalEnvIp")
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String hdfs_port = context.config.otherConfigs.get(hivePrefix +
"HdfsPort")
+ String catalog_name = "test_hive_ddl_text_format"
+ String table_name = "table_with_pars";
- sql """drop catalog if exists ${catalog_name};"""
+ sql """drop catalog if exists ${catalog_name};"""
- sql """
- create catalog if not exists ${catalog_name} properties (
- 'type'='hms',
- 'hive.metastore.uris' =
'thrift://${externalEnvIp}:${hms_port}',
- 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
- 'use_meta_cache' = 'true'
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hive.metastore.uris' =
'thrift://${externalEnvIp}:${hms_port}',
+ 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
+ 'use_meta_cache' = 'true'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """use `default`;"""
+
+ sql """ drop table if exists text_table_default_properties """
+ sql """
+ create table text_table_default_properties (
+ id int,
+ `name` string,
+ tags array<string>,
+ attributes map<string, string>
+ ) PROPERTIES (
+ 'file_format'='text'
);
- """
- logger.info("catalog " + catalog_name + " created")
- sql """switch ${catalog_name};"""
- logger.info("switched to catalog " + catalog_name)
- sql """use `default`;"""
+ """
+ sql """
+ INSERT INTO text_table_default_properties VALUES
+ (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1',
'key2', 'value2')),
+ (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA',
'keyB', 'valueB')),
+ (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
+ """
+ order_qt_default_properties """ select * from
text_table_default_properties """
+
+ order_qt_hive_docker_default_properties""" select * from
text_table_default_properties """
- sql """ drop table if exists tb_text """
- sql """
- create table tb_text (
- id int,
- `name` string
- ) PROPERTIES (
- 'compression'='gzip',
- 'file_format'='text',
- 'field.delim'='\t',
- 'line.delim'='\n',
- 'collection.delim'=';',
- 'mapkey.delim'=':',
- 'serialization.null.format'='\\N'
- );
- """
+ sql """ drop table if exists text_table_standard_properties """
+ // Escape characters need to be considered in groovy scripts
+ sql """
+ create table text_table_standard_properties (
+ id int,
+ `name` string,
+ tags array<string>,
+ attributes map<string, string>
+ ) PROPERTIES (
+ 'compression'='plain',
+ 'file_format'='text',
+ 'field.delim'='\\1',
+ 'line.delim'='\\n',
+ 'collection.delim'='\\2',
+ 'mapkey.delim'='\\3',
+ 'escape.delim'= '\\\\',
+ 'serialization.null.format'='\\\\N'
+ );
+ """
+ sql """
+ INSERT INTO text_table_standard_properties VALUES
+ (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1',
'key2', 'value2')),
+ (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA',
'keyB', 'valueB')),
+ (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
+ """
+ order_qt_standard_properties """ select * from
text_table_standard_properties """
+ order_qt_hive_docker_standard_properties """ select * from
text_table_standard_properties order by id; """
+
+ sql """ drop table if exists text_table_different_properties """
+ sql """
+ create table text_table_different_properties (
+ id int,
+ `name` string,
+ tags array<string>,
+ attributes map<string, string>
+ ) PROPERTIES (
+ 'compression'='gzip',
+ 'file_format'='text',
+ 'field.delim'='A',
+ 'line.delim'='\\4',
+ 'collection.delim'=',',
+ 'mapkey.delim'=':',
+ 'escape.delim'='|',
+ 'serialization.null.format'='null'
+ );
+ """
+ sql """
+ INSERT INTO text_table_different_properties VALUES
+ (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1',
'key2', 'value2')),
+ (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA',
'keyB', 'valueB')),
+ (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
+ """
+ order_qt_different_properties """ select * from
text_table_different_properties """
+ order_qt_hive_docker_different_properties """ select * from
text_table_different_properties order by id; """
- String serde = "'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'"
- String input_format = "'org.apache.hadoop.mapred.TextInputFormat'"
- String output_format =
"'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'"
- String doris_fileformat = "'doris.file_format'='text'"
- String filed_delim = "'field.delim'"
- String line_delim = "'line.delim'"
- String mapkey_delim = "'mapkey.delim'"
+ String serde =
"'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'"
+ String input_format = "'org.apache.hadoop.mapred.TextInputFormat'"
+ String output_format =
"'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'"
+ String doris_fileformat = "'doris.file_format'='text'"
+ String filed_delim = "'field.delim'"
+ String line_delim = "'line.delim'"
+ String mapkey_delim = "'mapkey.delim'"
+ String collection_delim = "'collection.delim'"
+ String escape_delim = "'escape.delim'"
+ String serialization_null_format = "'serialization.null.format'"
- def create_tbl_res = sql """ show create table tb_text """
- String res = create_tbl_res.toString()
- logger.info("${res}")
- assertTrue(res.containsIgnoreCase("${serde}"))
- assertTrue(res.containsIgnoreCase("${input_format}"))
- assertTrue(res.containsIgnoreCase("${output_format}"))
- assertTrue(res.containsIgnoreCase("${doris_fileformat}"))
- assertTrue(res.containsIgnoreCase("${filed_delim}"))
- assertTrue(res.containsIgnoreCase("${filed_delim}"))
- assertTrue(res.containsIgnoreCase("${line_delim}"))
- assertTrue(res.containsIgnoreCase("${mapkey_delim}"))
+ def create_tbl_res = sql """ show create table
text_table_standard_properties """
+ String res = create_tbl_res.toString()
+ logger.info("${res}")
+ assertTrue(res.containsIgnoreCase("${serde}"))
+ assertTrue(res.containsIgnoreCase("${input_format}"))
+ assertTrue(res.containsIgnoreCase("${output_format}"))
+ assertTrue(res.containsIgnoreCase("${doris_fileformat}"))
+ assertTrue(res.containsIgnoreCase("${filed_delim}"))
+ assertTrue(res.containsIgnoreCase("${filed_delim}"))
+ assertTrue(res.containsIgnoreCase("${line_delim}"))
+ assertTrue(res.containsIgnoreCase("${mapkey_delim}"))
+ assertTrue(res.containsIgnoreCase("${collection_delim}"))
+ assertTrue(res.containsIgnoreCase("${escape_delim}"))
+ assertTrue(res.containsIgnoreCase("${serialization_null_format}"))
+ } finally {
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]