This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new f7068b56587 [cherry-pick](branch-2.1) Make doris read hive text table
parameters and behavior consistent with hive (#37840)
f7068b56587 is described below
commit f7068b56587172581d8e248532daec95102e01dc
Author: 苏小刚 <[email protected]>
AuthorDate: Tue Jul 16 22:24:50 2024 +0800
[cherry-pick](branch-2.1) Make doris read hive text table parameters and
behavior consistent with hive (#37840)
## Proposed changes
pick from master https://github.com/apache/doris/pull/37638
<!--Describe your changes.-->
---
.../regression/serde_prop/some_serde_table.hql | 57 ++++++++++++++++++++++
.../datasource/hive/HiveMetaStoreClientHelper.java | 17 +++++++
.../doris/datasource/hive/source/HiveScanNode.java | 44 +++++++++--------
.../hive/test_hive_serde_prop.out | 36 +++++++++++++-
.../hive/test_hive_serde_prop.groovy | 6 ++-
5 files changed, 137 insertions(+), 23 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index fa6ad791118..13e7cb86e03 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -30,5 +30,62 @@ TBLPROPERTIES (
'field.delim'='|'
);
+CREATE TABLE `serde_test3`(
+ `id` int,
+ `name` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'serialization.format'='g')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+
+CREATE TABLE `serde_test4`(
+ `id` int,
+ `name` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim' = 'gg',
+ "line.delim" = "hh")
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test5`(
+ `id` int,
+ `name` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim' = '16',
+ "line.delim" = "21")
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test6`(
+ `id` int,
+ `name` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'field.delim' = '\16',
+ "line.delim" = "\21")
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+
insert into serde_test1 values(1, "abc"),(2, "def");
insert into serde_test2 values(1, "abc"),(2, "def");
+insert into serde_test3 values(1, "abc"),(2, "def");
+insert into serde_test4 values(1, "abc"),(2, "def");
+insert into serde_test5 values(1, "abc"),(2, "def");
+insert into serde_test6 values(1, "abc"),(2, "def");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 22bf13755a2..c086172f1f9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -873,4 +873,21 @@ public class HiveMetaStoreClientHelper {
}
return defaultValue;
}
+
+ /**
+ * Return the byte value of the number string.
+ *
+ * @param altValue
+ * The string containing a number.
+ */
+ public static String getByte(String altValue) {
+ if (altValue != null && altValue.length() > 0) {
+ try {
+ return String.valueOf((char) ((Byte.parseByte(altValue) + 256)
% 256));
+ } catch (NumberFormatException e) {
+ return altValue.substring(0, 1);
+ }
+ }
+ return null;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 0214ecc4642..abb8cc8dda3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -86,6 +86,7 @@ public class HiveScanNode extends FileQueryScanNode {
public static final String DEFAULT_LINE_DELIMITER = "\n";
public static final String PROP_SEPARATOR_CHAR = "separatorChar";
public static final String PROP_QUOTE_CHAR = "quoteChar";
+ public static final String PROP_SERIALIZATION_FORMAT =
"serialization.format";
public static final String PROP_COLLECTION_DELIMITER_HIVE2 =
"colelction.delim";
public static final String PROP_COLLECTION_DELIMITER_HIVE3 =
"collection.delim";
@@ -447,29 +448,32 @@ public class HiveScanNode extends FileQueryScanNode {
TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
// 1. set column separator
- Optional<String> fieldDelim =
-
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_FIELD_DELIMITER);
- Optional<String> columnSeparator =
-
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_SEPARATOR_CHAR);
-
textParams.setColumnSeparator(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator));
+ Optional<String> fieldDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_FIELD_DELIMITER);
+ Optional<String> serFormat =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_SERIALIZATION_FORMAT);
+ Optional<String> columnSeparator =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_SEPARATOR_CHAR);
+
textParams.setColumnSeparator(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator,
serFormat)));
// 2. set line delimiter
- Optional<String> lineDelim =
-
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_LINE_DELIMITER);
-
textParams.setLineDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_LINE_DELIMITER, lineDelim));
+ Optional<String> lineDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_LINE_DELIMITER);
+
textParams.setLineDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_LINE_DELIMITER, lineDelim)));
// 3. set mapkv delimiter
- Optional<String> mapkvDelim =
-
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_MAP_KV_DELIMITER);
-
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
+ Optional<String> mapkvDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_MAP_KV_DELIMITER);
+
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_MAP_KV_DELIMITER, mapkvDelim)));
// 4. set collection delimiter
- Optional<String> collectionDelimHive2 =
-
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_COLLECTION_DELIMITER_HIVE2);
- Optional<String> collectionDelimHive3 =
-
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_COLLECTION_DELIMITER_HIVE3);
-
textParams.setCollectionDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2,
collectionDelimHive3));
+ Optional<String> collectionDelimHive2 =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_COLLECTION_DELIMITER_HIVE2);
+ Optional<String> collectionDelimHive3 =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ PROP_COLLECTION_DELIMITER_HIVE3);
+ textParams.setCollectionDelimiter(
+
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2,
collectionDelimHive3)));
// 5. set quote char
Map<String, String> serdeParams =
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index b00eebec49d..38918c3fc6f 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -7,7 +7,23 @@ b 2.2
1 abc
2 def
--- !2 --
+-- !3 --
+1 abc
+2 def
+
+-- !4 --
+1 abc
+2 def
+
+-- !5 --
+1 abc
+2 def
+
+-- !6 --
+1 abc
+2 def
+
+-- !7 --
1 abc
2 def
@@ -19,7 +35,23 @@ b 2.2
1 abc
2 def
--- !2 --
+-- !3 --
+1 abc
+2 def
+
+-- !4 --
+1 abc
+2 def
+
+-- !5 --
+1 abc
+2 def
+
+-- !6 --
+1 abc
+2 def
+
+-- !7 --
1 abc
2 def
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 3ae6b21bbba..0da2eb3160a 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -40,7 +40,11 @@ suite("test_hive_serde_prop",
"external_docker,hive,external_docker_hive,p0,exte
qt_2 """select * from ${catalog_name}.regression.serde_test1 order by
id;"""
- qt_2 """select * from ${catalog_name}.regression.serde_test2 order by
id;"""
+ qt_3 """select * from ${catalog_name}.regression.serde_test2 order by
id;"""
+ qt_4 """select * from ${catalog_name}.regression.serde_test3 order by
id;"""
+ qt_5 """select * from ${catalog_name}.regression.serde_test4 order by
id;"""
+ qt_6 """select * from ${catalog_name}.regression.serde_test5 order by
id;"""
+ qt_7 """select * from ${catalog_name}.regression.serde_test6 order by
id;"""
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]