This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 3613413a54b [fix](hive) support find serde info from both tbl
properties and serde properties (#37043) (#37188)
3613413a54b is described below
commit 3613413a54b63eb8ec51ab29c676fed778e90d59
Author: Mingyu Chen <[email protected]>
AuthorDate: Thu Jul 4 13:55:38 2024 +0800
[fix](hive) support find serde info from both tbl properties and serde
properties (#37043) (#37188)
bp #37043
---
.../hive/scripts/data/regression/serde_prop/run.sh | 9 ++++
.../regression/serde_prop/some_serde_table.hql | 34 +++++++++++++
.../datasource/hive/HiveMetaStoreClientHelper.java | 26 ++++++++++
.../doris/datasource/hive/source/HiveScanNode.java | 58 ++++++++++++----------
.../hive/test_hive_serde_prop.out | 16 ++++++
.../hive/test_hive_serde_prop.groovy | 4 ++
6 files changed, 121 insertions(+), 26 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/run.sh
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/run.sh
new file mode 100755
index 00000000000..ef6538563d5
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+# create table
+hive -f "${CUR_DIR}"/some_serde_table.hql
+
+
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
new file mode 100644
index 00000000000..fa6ad791118
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -0,0 +1,34 @@
+create database if not exists regression;
+use regression;
+
+CREATE TABLE `serde_test1`(
+ `id` int,
+ `name` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim'='',
+ 'serialization.format'='')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test2`(
+ `id` int,
+ `name` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim'='',
+ 'serialization.format'='')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+TBLPROPERTIES (
+ 'field.delim'='|'
+);
+
+insert into serde_test1 values(1, "abc"),(2, "def");
+insert into serde_test2 values(1, "abc"),(2, "def");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 2e7693619b8..7ad7621f7cc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -79,6 +80,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@@ -847,4 +849,28 @@ public class HiveMetaStoreClientHelper {
}
return conf;
}
+
+ public static Optional<String> getSerdeProperty(Table table, String key) {
+ String valueFromSd =
table.getSd().getSerdeInfo().getParameters().get(key);
+ String valueFromTbl = table.getParameters().get(key);
+ return firstNonNullable(valueFromTbl, valueFromSd);
+ }
+
+ private static Optional<String> firstNonNullable(String... values) {
+ for (String value : values) {
+ if (!Strings.isNullOrEmpty(value)) {
+ return Optional.of(value);
+ }
+ }
+ return Optional.empty();
+ }
+
+ public static String firstPresentOrDefault(String defaultValue,
Optional<String>... values) {
+ for (Optional<String> value : values) {
+ if (value.isPresent()) {
+ return value.get();
+ }
+ }
+ return defaultValue;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 1970a48f2d4..0214ecc4642 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -85,7 +85,7 @@ public class HiveScanNode extends FileQueryScanNode {
public static final String PROP_LINE_DELIMITER = "line.delim";
public static final String DEFAULT_LINE_DELIMITER = "\n";
public static final String PROP_SEPARATOR_CHAR = "separatorChar";
- public static final String PROP_QUOTA_CHAR = "quoteChar";
+ public static final String PROP_QUOTE_CHAR = "quoteChar";
public static final String PROP_COLLECTION_DELIMITER_HIVE2 =
"colelction.delim";
public static final String PROP_COLLECTION_DELIMITER_HIVE3 =
"collection.delim";
@@ -445,32 +445,37 @@ public class HiveScanNode extends FileQueryScanNode {
@Override
protected TFileAttributes getFileAttributes() throws UserException {
TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
- java.util.Map<String, String> delimiter =
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
- if (delimiter.containsKey(PROP_FIELD_DELIMITER)) {
- if (delimiter.get(PROP_FIELD_DELIMITER).length() == 0) {
- textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
- } else {
-
textParams.setColumnSeparator(delimiter.get(PROP_FIELD_DELIMITER));
- }
- } else if (delimiter.containsKey(PROP_SEPARATOR_CHAR)) {
- textParams.setColumnSeparator(delimiter.get(PROP_SEPARATOR_CHAR));
- } else {
- textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
- }
- if (delimiter.containsKey(PROP_QUOTA_CHAR)) {
-
textParams.setEnclose(delimiter.get(PROP_QUOTA_CHAR).getBytes()[0]);
- }
-
textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER,
DEFAULT_LINE_DELIMITER));
-
textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER,
DEFAULT_MAP_KV_DELIMITER));
-
- // textParams.collection_delimiter field is map, array and struct
delimiter;
- if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2) != null) {
-
textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2));
- } else if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3) != null) {
-
textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3));
- } else {
- textParams.setCollectionDelimiter(DEFAULT_COLLECTION_DELIMITER);
+
+ // 1. set column separator
+ Optional<String> fieldDelim =
+
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_FIELD_DELIMITER);
+ Optional<String> columnSeparator =
+
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_SEPARATOR_CHAR);
+
textParams.setColumnSeparator(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator));
+ // 2. set line delimiter
+ Optional<String> lineDelim =
+
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_LINE_DELIMITER);
+
textParams.setLineDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_LINE_DELIMITER, lineDelim));
+ // 3. set mapkv delimiter
+ Optional<String> mapkvDelim =
+
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_MAP_KV_DELIMITER);
+
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
+ // 4. set collection delimiter
+ Optional<String> collectionDelimHive2 =
+
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_COLLECTION_DELIMITER_HIVE2);
+ Optional<String> collectionDelimHive3 =
+
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_COLLECTION_DELIMITER_HIVE3);
+
textParams.setCollectionDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2,
collectionDelimHive3));
+ // 5. set quote char
+ Map<String, String> serdeParams =
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
+ if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
+
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
}
+
TFileAttributes fileAttributes = new TFileAttributes();
fileAttributes.setTextParams(textParams);
fileAttributes.setHeaderType("");
@@ -502,3 +507,4 @@ public class HiveScanNode extends FileQueryScanNode {
return compressType;
}
}
+
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index 818db069d50..b00eebec49d 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -3,7 +3,23 @@
a 1.1
b 2.2
+-- !2 --
+1 abc
+2 def
+
+-- !2 --
+1 abc
+2 def
+
-- !1 --
a 1.1
b 2.2
+-- !2 --
+1 abc
+2 def
+
+-- !2 --
+1 abc
+2 def
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 7ac366748b6..3ae6b21bbba 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -37,6 +37,10 @@ suite("test_hive_serde_prop",
"external_docker,hive,external_docker_hive,p0,exte
);"""
qt_1 """select * from ${catalog_name}.${ex_db_name}.employee_gz
order by name;"""
+
+
+ qt_2 """select * from ${catalog_name}.regression.serde_test1 order by
id;"""
+ qt_2 """select * from ${catalog_name}.regression.serde_test2 order by
id;"""
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]