This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 93cb6f9d19f branch-3.1: [fix](hive)fix querying hive text table with
NULL DEFINED AS '' #55626 (#55661)
93cb6f9d19f is described below
commit 93cb6f9d19f6c343f289bc34003f5569bed06bd8
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Sep 5 18:21:47 2025 +0800
branch-3.1: [fix](hive)fix querying hive text table with NULL DEFINED AS ''
#55626 (#55661)
Cherry-picked from #55626
Co-authored-by: Socrates <[email protected]>
---
be/src/vec/exec/format/text/text_reader.cpp | 8 +++---
.../regression/serde_prop/some_serde_table.hql | 30 +++++++++++++++++++++
.../datasource/hive/HiveMetaStoreClientHelper.java | 8 +++---
.../doris/datasource/hive/HiveProperties.java | 17 +++++-------
.../hive/test_hive_serde_prop.out | Bin 1534 -> 2092 bytes
.../hive/test_hive_serde_prop.groovy | 8 ++++++
6 files changed, 52 insertions(+), 19 deletions(-)
diff --git a/be/src/vec/exec/format/text/text_reader.cpp
b/be/src/vec/exec/format/text/text_reader.cpp
index 7913a9bdb2b..ab7f92f489f 100644
--- a/be/src/vec/exec/format/text/text_reader.cpp
+++ b/be/src/vec/exec/format/text/text_reader.cpp
@@ -165,11 +165,9 @@ Status TextReader::_validate_line(const Slice& line, bool*
success) {
Status TextReader::_deserialize_nullable_string(IColumn& column, Slice& slice)
{
auto& null_column = assert_cast<ColumnNullable&>(column);
- if (_options.null_len > 0) {
- if (slice.compare(Slice(_options.null_format, _options.null_len)) ==
0) {
- null_column.insert_data(nullptr, 0);
- return Status::OK();
- }
+ if (slice.compare(Slice(_options.null_format, _options.null_len)) == 0) {
+ null_column.insert_data(nullptr, 0);
+ return Status::OK();
}
static DataTypeStringSerDe stringSerDe;
auto st =
stringSerDe.deserialize_one_cell_from_hive_text(null_column.get_nested_column(),
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index 0368547f8be..81bdf03da8e 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -170,3 +170,33 @@ INSERT INTO TABLE test_open_csv_standard_prop VALUES
INSERT INTO TABLE test_open_csv_custom_prop VALUES
(1, 'John Doe', 28, 50000.75, true, '2022-01-15', '2023-10-21 14:30:00', 4.5,
'Senior Developer'),
(2, 'Jane,Smith', NULL, NULL, false, '2020-05-20', NULL, NULL, '\"Project
Manager\"');
+
+CREATE TABLE test_empty_null_format_text (
+ id INT,
+ name STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+TBLPROPERTIES (
+ "serialization.null.format"=""
+);
+
+INSERT INTO TABLE test_empty_null_format_text VALUES
+ (1, 'Alice'),
+ (2, NULL),
+ (3, '');
+
+CREATE TABLE test_empty_null_defined_text (
+ id INT,
+ name STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '\t'
+NULL DEFINED AS ''
+STORED AS TEXTFILE;
+
+INSERT INTO TABLE test_empty_null_defined_text VALUES
+ (1, 'Alice'),
+ (2, NULL),
+ (3, '');
\ No newline at end of file
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 9874c18c5b3..cf3f02a25f3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -863,7 +863,7 @@ public class HiveMetaStoreClientHelper {
private static Optional<String> firstNonNullable(String... values) {
for (String value : values) {
- if (!Strings.isNullOrEmpty(value)) {
+ if (value != null) {
return Optional.of(value);
}
}
@@ -884,8 +884,10 @@ public class HiveMetaStoreClientHelper {
*
* @param altValue
* The string containing a number.
+ * @param defValue
+ * The default value to return if altValue is invalid.
*/
- public static String getByte(String altValue) {
+ public static String getByte(String altValue, String defValue) {
if (altValue != null && altValue.length() > 0) {
try {
return Character.toString((char) ((Byte.parseByte(altValue) +
256) % 256));
@@ -893,6 +895,6 @@ public class HiveMetaStoreClientHelper {
return altValue.substring(0, 1);
}
}
- return null;
+ return defValue;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
index 1be78e41b89..36c147da142 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
@@ -89,8 +89,8 @@ public class HiveProperties {
Optional<String> fieldDelim =
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_FIELD_DELIMITER);
Optional<String> serFormat =
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SERIALIZATION_FORMAT);
String delimiter = HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat);
- return supportMultiChar ? delimiter :
HiveMetaStoreClientHelper.getByte(delimiter);
+ "", fieldDelim, serFormat);
+ return supportMultiChar ? delimiter :
HiveMetaStoreClientHelper.getByte(delimiter, DEFAULT_FIELD_DELIMITER);
}
public static String getSeparatorChar(Table table) {
@@ -102,13 +102,13 @@ public class HiveProperties {
public static String getLineDelimiter(Table table) {
Optional<String> lineDelim =
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_LINE_DELIMITER);
return
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_LINE_DELIMITER, lineDelim));
+ "", lineDelim), DEFAULT_LINE_DELIMITER);
}
public static String getMapKvDelimiter(Table table) {
Optional<String> mapkvDelim =
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_MAP_KV_DELIMITER);
return
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
+ "", mapkvDelim), DEFAULT_MAP_KV_DELIMITER);
}
public static String getCollectionDelimiter(Table table) {
@@ -117,18 +117,13 @@ public class HiveProperties {
Optional<String> collectionDelimHive3 =
HiveMetaStoreClientHelper.getSerdeProperty(table,
PROP_COLLECTION_DELIMITER_HIVE3);
return
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2,
collectionDelimHive3));
+ "", collectionDelimHive2, collectionDelimHive3),
DEFAULT_COLLECTION_DELIMITER);
}
public static Optional<String> getEscapeDelimiter(Table table) {
Optional<String> escapeDelim =
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_ESCAPE_DELIMITER);
if (escapeDelim.isPresent()) {
- String escape =
HiveMetaStoreClientHelper.getByte(escapeDelim.get());
- if (escape != null) {
- return Optional.of(escape);
- } else {
- return Optional.of(DEFAULT_ESCAPE_DELIMIER);
- }
+ return
Optional.of(HiveMetaStoreClientHelper.getByte(escapeDelim.get(),
DEFAULT_ESCAPE_DELIMIER));
}
return Optional.empty();
}
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index c2415c058f1..cda92c0519a 100644
Binary files
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out and
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out differ
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 52cdd25eb07..d4bb051214d 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -55,6 +55,14 @@ suite("test_hive_serde_prop",
"external_docker,hive,external_docker_hive,p0,exte
qt_test_open_csv_default_prop """select * from
${catalog_name}.regression.test_open_csv_default_prop order by id;"""
qt_test_open_csv_standard_prop """select * from
${catalog_name}.regression.test_open_csv_standard_prop order by id;"""
qt_test_open_csv_custom_prop """select * from
${catalog_name}.regression.test_open_csv_custom_prop order by id;"""
+
+ qt_test_empty_null_format_text """select * from
${catalog_name}.regression.test_empty_null_format_text order by id;"""
+ qt_test_empty_null_format_text2 """select * from
${catalog_name}.regression.test_empty_null_format_text where name is null order
by id;"""
+ qt_test_empty_null_format_text3 """select * from
${catalog_name}.regression.test_empty_null_format_text where name = '' order by
id;"""
+
+ qt_test_empty_null_defined_text """select * from
${catalog_name}.regression.test_empty_null_defined_text order by id;"""
+ qt_test_empty_null_defined_text2 """select * from
${catalog_name}.regression.test_empty_null_defined_text where name is null
order by id;"""
+ qt_test_empty_null_defined_text3 """select * from
${catalog_name}.regression.test_empty_null_defined_text where name = '' order
by id;"""
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]