This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new e92dc19f7f6 [fix](hive) add support for quoteChar and seperatorChar for hive (branch-2.0) (#28703)#28703 e92dc19f7f6 is described below commit e92dc19f7f6823b9d6d0cb622aa8de6309c0d515 Author: wuwenchi <wuwenchi...@hotmail.com> AuthorDate: Thu Dec 21 19:03:13 2023 +0800 [fix](hive) add support for quoteChar and seperatorChar for hive (branch-2.0) (#28703)#28703 bp #28613 --- .../hive/scripts/create_preinstalled_table.hql | 8 +++++ .../doris/planner/external/HiveScanNode.java | 17 +++++++++- .../hive/test_hive_serde_prop.out | 4 +++ .../hive/test_hive_serde_prop.groovy | 36 ++++++++++++++++++++++ 4 files changed, 64 insertions(+), 1 deletion(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 4e80d7466d2..e798ecd7f2b 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -1798,3 +1798,11 @@ create table stats_test2 (id INT, value STRING) STORED AS PARQUET; insert into stats_test1 values (1, 'name1'), (2, 'name2'), (3, 'name3'); INSERT INTO stats_test2 VALUES (1, ';'), (2, '\*'); + +create table employee_gz(name string,salary string) +row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +with serdeproperties +('quoteChar'='\"' +,'seperatorChar'=','); + +insert into employee_gz values ('a', '1.1'), ('b', '2.2'); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java index 943d30017e7..58b93112477 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java @@ -76,6 +76,9 @@ public class HiveScanNode extends FileQueryScanNode { public static final String DEFAULT_FIELD_DELIMITER = "\1"; // "\x01" public static final String PROP_LINE_DELIMITER = "line.delim"; public static final String DEFAULT_LINE_DELIMITER = "\n"; + public static final String PROP_SEPERATOR_CHAR = "seperatorChar"; + public static final String PROP_QUOTA_CHAR = "quoteChar"; + public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim"; public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim"; @@ -362,7 +365,16 @@ public class HiveScanNode extends FileQueryScanNode { protected TFileAttributes getFileAttributes() throws UserException { TFileTextScanRangeParams textParams = new TFileTextScanRangeParams(); java.util.Map<String, String> delimiter = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters(); - textParams.setColumnSeparator(delimiter.getOrDefault(PROP_FIELD_DELIMITER, DEFAULT_FIELD_DELIMITER)); + if (delimiter.containsKey(PROP_FIELD_DELIMITER)) { + textParams.setColumnSeparator(delimiter.get(PROP_FIELD_DELIMITER)); + } else if (delimiter.containsKey(PROP_SEPERATOR_CHAR)) { + textParams.setColumnSeparator(delimiter.get(PROP_SEPERATOR_CHAR)); + } else { + textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER); + } + if (delimiter.containsKey(PROP_QUOTA_CHAR)) { + textParams.setEnclose(delimiter.get(PROP_QUOTA_CHAR).getBytes()[0]); + } textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER, DEFAULT_LINE_DELIMITER)); textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER, DEFAULT_MAP_KV_DELIMITER)); @@ -377,6 +389,9 @@ public class HiveScanNode extends FileQueryScanNode { TFileAttributes fileAttributes = new TFileAttributes(); fileAttributes.setTextParams(textParams); fileAttributes.setHeaderType(""); + if (textParams.isSet(TFileTextScanRangeParams._Fields.ENCLOSE)) { + fileAttributes.setTrimDoubleQuotes(true); + } return fileAttributes; } diff --git a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out new file mode 100644 index 00000000000..1cde2baec27 --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out @@ -0,0 +1,4 @@ +test_hive_serde_prop.out -- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +a 1.1 +b 2.2 diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy new file mode 100644 index 00000000000..41d7056d208 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,external") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String catalog_name = "test_hive_serde_prop" + String ex_db_name = "`stats_test`" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hms_port = context.config.otherConfigs.get("hms_port") + + sql """drop catalog if exists ${catalog_name} """ + + sql """CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hadoop.username' = 'hive' + );""" + + qt_1 """select * from ${catalog_name}.${ex_db_name}.employee_gz order by name;""" + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org