This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 93cb6f9d19f branch-3.1: [fix](hive)fix querying hive text table with 
NULL DEFINED AS '' #55626 (#55661)
93cb6f9d19f is described below

commit 93cb6f9d19f6c343f289bc34003f5569bed06bd8
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Sep 5 18:21:47 2025 +0800

    branch-3.1: [fix](hive)fix querying hive text table with NULL DEFINED AS '' 
#55626 (#55661)
    
    Cherry-picked from #55626
    
    Co-authored-by: Socrates <[email protected]>
---
 be/src/vec/exec/format/text/text_reader.cpp        |   8 +++---
 .../regression/serde_prop/some_serde_table.hql     |  30 +++++++++++++++++++++
 .../datasource/hive/HiveMetaStoreClientHelper.java |   8 +++---
 .../doris/datasource/hive/HiveProperties.java      |  17 +++++-------
 .../hive/test_hive_serde_prop.out                  | Bin 1534 -> 2092 bytes
 .../hive/test_hive_serde_prop.groovy               |   8 ++++++
 6 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/be/src/vec/exec/format/text/text_reader.cpp 
b/be/src/vec/exec/format/text/text_reader.cpp
index 7913a9bdb2b..ab7f92f489f 100644
--- a/be/src/vec/exec/format/text/text_reader.cpp
+++ b/be/src/vec/exec/format/text/text_reader.cpp
@@ -165,11 +165,9 @@ Status TextReader::_validate_line(const Slice& line, bool* 
success) {
 
 Status TextReader::_deserialize_nullable_string(IColumn& column, Slice& slice) 
{
     auto& null_column = assert_cast<ColumnNullable&>(column);
-    if (_options.null_len > 0) {
-        if (slice.compare(Slice(_options.null_format, _options.null_len)) == 
0) {
-            null_column.insert_data(nullptr, 0);
-            return Status::OK();
-        }
+    if (slice.compare(Slice(_options.null_format, _options.null_len)) == 0) {
+        null_column.insert_data(nullptr, 0);
+        return Status::OK();
     }
     static DataTypeStringSerDe stringSerDe;
     auto st = 
stringSerDe.deserialize_one_cell_from_hive_text(null_column.get_nested_column(),
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index 0368547f8be..81bdf03da8e 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -170,3 +170,33 @@ INSERT INTO TABLE test_open_csv_standard_prop VALUES
 INSERT INTO TABLE test_open_csv_custom_prop VALUES 
 (1, 'John Doe', 28, 50000.75, true, '2022-01-15', '2023-10-21 14:30:00', 4.5, 
'Senior Developer'),
 (2, 'Jane,Smith', NULL, NULL, false, '2020-05-20', NULL, NULL, '\"Project 
Manager\"');
+
+CREATE TABLE test_empty_null_format_text (
+  id INT,
+  name STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+TBLPROPERTIES (
+  "serialization.null.format"=""
+);
+
+INSERT INTO TABLE test_empty_null_format_text VALUES
+  (1, 'Alice'),
+  (2, NULL),
+  (3, '');
+
+CREATE TABLE test_empty_null_defined_text (
+  id INT,
+  name STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '\t'
+NULL DEFINED AS ''
+STORED AS TEXTFILE;
+
+INSERT INTO TABLE test_empty_null_defined_text VALUES
+  (1, 'Alice'),
+  (2, NULL),
+  (3, '');
\ No newline at end of file
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 9874c18c5b3..cf3f02a25f3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -863,7 +863,7 @@ public class HiveMetaStoreClientHelper {
 
     private static Optional<String> firstNonNullable(String... values) {
         for (String value : values) {
-            if (!Strings.isNullOrEmpty(value)) {
+            if (value != null) {
                 return Optional.of(value);
             }
         }
@@ -884,8 +884,10 @@ public class HiveMetaStoreClientHelper {
      *
      * @param altValue
      *                 The string containing a number.
+     * @param defValue
+     *                 The default value to return if altValue is invalid.
      */
-    public static String getByte(String altValue) {
+    public static String getByte(String altValue, String defValue) {
         if (altValue != null && altValue.length() > 0) {
             try {
                 return Character.toString((char) ((Byte.parseByte(altValue) + 
256) % 256));
@@ -893,6 +895,6 @@ public class HiveMetaStoreClientHelper {
                 return altValue.substring(0, 1);
             }
         }
-        return null;
+        return defValue;
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
index 1be78e41b89..36c147da142 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
@@ -89,8 +89,8 @@ public class HiveProperties {
         Optional<String> fieldDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_FIELD_DELIMITER);
         Optional<String> serFormat = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SERIALIZATION_FORMAT);
         String delimiter = HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat);
-        return supportMultiChar ? delimiter : 
HiveMetaStoreClientHelper.getByte(delimiter);
+                "", fieldDelim, serFormat);
+        return supportMultiChar ? delimiter : 
HiveMetaStoreClientHelper.getByte(delimiter, DEFAULT_FIELD_DELIMITER);
     }
 
     public static String getSeparatorChar(Table table) {
@@ -102,13 +102,13 @@ public class HiveProperties {
     public static String getLineDelimiter(Table table) {
         Optional<String> lineDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_LINE_DELIMITER);
         return 
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_LINE_DELIMITER, lineDelim));
+                "", lineDelim), DEFAULT_LINE_DELIMITER);
     }
 
     public static String getMapKvDelimiter(Table table) {
         Optional<String> mapkvDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_MAP_KV_DELIMITER);
         return 
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
+                "", mapkvDelim), DEFAULT_MAP_KV_DELIMITER);
     }
 
     public static String getCollectionDelimiter(Table table) {
@@ -117,18 +117,13 @@ public class HiveProperties {
         Optional<String> collectionDelimHive3 = 
HiveMetaStoreClientHelper.getSerdeProperty(table,
                 PROP_COLLECTION_DELIMITER_HIVE3);
         return 
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, 
collectionDelimHive3));
+                "", collectionDelimHive2, collectionDelimHive3), 
DEFAULT_COLLECTION_DELIMITER);
     }
 
     public static Optional<String> getEscapeDelimiter(Table table) {
         Optional<String> escapeDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_ESCAPE_DELIMITER);
         if (escapeDelim.isPresent()) {
-            String escape = 
HiveMetaStoreClientHelper.getByte(escapeDelim.get());
-            if (escape != null) {
-                return Optional.of(escape);
-            } else {
-                return Optional.of(DEFAULT_ESCAPE_DELIMIER);
-            }
+            return 
Optional.of(HiveMetaStoreClientHelper.getByte(escapeDelim.get(), 
DEFAULT_ESCAPE_DELIMIER));
         }
         return Optional.empty();
     }
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out 
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index c2415c058f1..cda92c0519a 100644
Binary files 
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out and 
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 52cdd25eb07..d4bb051214d 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -55,6 +55,14 @@ suite("test_hive_serde_prop", 
"external_docker,hive,external_docker_hive,p0,exte
         qt_test_open_csv_default_prop """select * from 
${catalog_name}.regression.test_open_csv_default_prop order by id;"""
         qt_test_open_csv_standard_prop """select * from 
${catalog_name}.regression.test_open_csv_standard_prop order by id;"""
         qt_test_open_csv_custom_prop """select * from 
${catalog_name}.regression.test_open_csv_custom_prop order by id;"""
+
+        qt_test_empty_null_format_text """select * from 
${catalog_name}.regression.test_empty_null_format_text order by id;"""
+        qt_test_empty_null_format_text2 """select * from 
${catalog_name}.regression.test_empty_null_format_text where name is null order 
by id;"""
+        qt_test_empty_null_format_text3 """select * from 
${catalog_name}.regression.test_empty_null_format_text where name = '' order by 
id;"""
+
+        qt_test_empty_null_defined_text """select * from 
${catalog_name}.regression.test_empty_null_defined_text order by id;"""
+        qt_test_empty_null_defined_text2 """select * from 
${catalog_name}.regression.test_empty_null_defined_text where name is null 
order by id;"""
+        qt_test_empty_null_defined_text3 """select * from 
${catalog_name}.regression.test_empty_null_defined_text where name = '' order 
by id;"""
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to