This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ebe2d222d78 [fix](hudi) fix quering hudi table with timestamp key 
(#53791)
ebe2d222d78 is described below

commit ebe2d222d7834804019c81e650b2f4228cc66984
Author: Socrates <[email protected]>
AuthorDate: Mon Jul 28 11:49:13 2025 +0800

    [fix](hudi) fix quering hudi table with timestamp key (#53791)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    When querying a Hudi table with timestamp type as the partition key in
    Doris, an error will occur:
    <img width="1280" height="455" alt="image"
    
src="https://github.com/user-attachments/assets/1ea05e75-9713-46b0-99e4-95584309a1a2";
    />
    Issue Details:
    - Hudi tables with timestamp partition columns (e.g., 2023-12-01T08:00)
    store partition paths in HMS with URL encoding
    - Special characters like : in timestamps get encoded as %3A, and in
    some cases may be double-encoded as %253A
    
    **The Solution:** The changes add proper URL unescaping using
    `FileUtils.unescapePathName()` from Hadoop Hive common library to handle
    encoded partition paths correctly.
---
 .../hudi/source/HudiCachedPartitionProcessor.java     |   6 ++++++
 .../hudi/source/HudiPartitionProcessor.java           |   9 +++++----
 .../hudi/test_hudi_partition_prune.out                | Bin 5389 -> 5549 bytes
 .../hudi/test_hudi_partition_prune.groovy             |  10 +++++-----
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 1db39c230a1..6356698c067 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -31,6 +31,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable;
 import com.github.benmanes.caffeine.cache.LoadingCache;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -154,6 +155,11 @@ public class HudiCachedPartitionProcessor extends 
HudiPartitionProcessor {
                     // we can still obtain the partition information through 
the HMS API.
                     partitionNames = catalog.getClient()
                             .listPartitionNames(table.getRemoteDbName(), 
table.getRemoteName());
+                    // HMS stored Hudi partition paths may have double 
encoding issue (e.g., %3A
+                    // becomes %253A), need to unescape first here.
+                    partitionNames = partitionNames.stream()
+                            .map(FileUtils::unescapePathName)
+                            .collect(Collectors.toList());
                     if (partitionNames.size() == 0) {
                         LOG.warn("Failed to get partitions from hms api, 
switch it from hudi api.");
                         partitionNames = getAllPartitionNames(tableMetaClient);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
index cb5e2993a56..b1e5bd4a82d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
@@ -19,6 +19,7 @@ package org.apache.doris.datasource.hudi.source;
 
 import org.apache.doris.datasource.ExternalTable;
 
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -98,8 +99,8 @@ public abstract class HudiPartitionProcessor {
                 } else {
                     partitionValue = partitionPath;
                 }
-                // TODO: In hive, the specific characters like '=', '/' will 
be url encoded
-                return Collections.singletonList(partitionValue);
+                // In hive, the specific characters like '=', '/' will be url 
encoded
+                return 
Collections.singletonList(FileUtils.unescapePathName(partitionValue));
             } else {
                 // If the partition column size is not equal to the partition 
fragments size
                 // and the partition column size > 1, we do not know how to 
map the partition
@@ -119,9 +120,9 @@ public abstract class HudiPartitionProcessor {
             for (int i = 0; i < partitionFragments.length; i++) {
                 String prefix = partitionColumns.get(i) + "=";
                 if (partitionFragments[i].startsWith(prefix)) {
-                    
partitionValues.add(partitionFragments[i].substring(prefix.length()));
+                    
partitionValues.add(FileUtils.unescapePathName(partitionFragments[i].substring(prefix.length())));
                 } else {
-                    partitionValues.add(partitionFragments[i]);
+                    
partitionValues.add(FileUtils.unescapePathName(partitionFragments[i]));
                 }
             }
             return partitionValues;
diff --git 
a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out 
b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out
index fd3eafa0255..d3d4600a0e6 100644
Binary files 
a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out and 
b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out 
differ
diff --git 
a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
 
b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
index 063439d9a87..629923da306 100644
--- 
a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
+++ 
b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
@@ -320,11 +320,11 @@ suite("test_hudi_partition_prune", 
"p2,external,hudi,external_remote,external_re
             sql("${one_partition_date}")
             contains "partition=1/2"
         }
-        // qt_one_partition_timestamp one_partition_timestamp
-        // explain {
-        //     sql("${one_partition_timestamp}")
-        //     contains "partition=1/2"
-        // }
+        qt_one_partition_timestamp one_partition_timestamp
+        explain {
+            sql("${one_partition_timestamp}")
+            contains "partition=1/2"
+        }
 
         sql """drop catalog if exists ${catalog_name};"""
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to