This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new ad4345d2ce8 branch-3.1: [fix](hudi) fix quering hudi table with 
timestamp key #53791 (#53961)
ad4345d2ce8 is described below

commit ad4345d2ce85e97c47b78d577f0d60f421159d41
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jul 28 15:43:13 2025 +0800

    branch-3.1: [fix](hudi) fix quering hudi table with timestamp key #53791 
(#53961)
    
    Cherry-picked from #53791
    
    Co-authored-by: Socrates <[email protected]>
---
 .../hudi/source/HudiCachedPartitionProcessor.java     |   6 ++++++
 .../hudi/source/HudiPartitionProcessor.java           |   9 +++++----
 .../hudi/test_hudi_partition_prune.out                | Bin 5389 -> 5549 bytes
 .../hudi/test_hudi_partition_prune.groovy             |  10 +++++-----
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 1db39c230a1..6356698c067 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -31,6 +31,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable;
 import com.github.benmanes.caffeine.cache.LoadingCache;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -154,6 +155,11 @@ public class HudiCachedPartitionProcessor extends 
HudiPartitionProcessor {
                     // we can still obtain the partition information through 
the HMS API.
                     partitionNames = catalog.getClient()
                             .listPartitionNames(table.getRemoteDbName(), 
table.getRemoteName());
+                    // HMS stored Hudi partition paths may have double 
encoding issue (e.g., %3A
+                    // becomes %253A), need to unescape first here.
+                    partitionNames = partitionNames.stream()
+                            .map(FileUtils::unescapePathName)
+                            .collect(Collectors.toList());
                     if (partitionNames.size() == 0) {
                         LOG.warn("Failed to get partitions from hms api, 
switch it from hudi api.");
                         partitionNames = getAllPartitionNames(tableMetaClient);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
index cb5e2993a56..b1e5bd4a82d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
@@ -19,6 +19,7 @@ package org.apache.doris.datasource.hudi.source;
 
 import org.apache.doris.datasource.ExternalTable;
 
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -98,8 +99,8 @@ public abstract class HudiPartitionProcessor {
                 } else {
                     partitionValue = partitionPath;
                 }
-                // TODO: In hive, the specific characters like '=', '/' will 
be url encoded
-                return Collections.singletonList(partitionValue);
+                // In hive, the specific characters like '=', '/' will be url 
encoded
+                return 
Collections.singletonList(FileUtils.unescapePathName(partitionValue));
             } else {
                 // If the partition column size is not equal to the partition 
fragments size
                 // and the partition column size > 1, we do not know how to 
map the partition
@@ -119,9 +120,9 @@ public abstract class HudiPartitionProcessor {
             for (int i = 0; i < partitionFragments.length; i++) {
                 String prefix = partitionColumns.get(i) + "=";
                 if (partitionFragments[i].startsWith(prefix)) {
-                    
partitionValues.add(partitionFragments[i].substring(prefix.length()));
+                    
partitionValues.add(FileUtils.unescapePathName(partitionFragments[i].substring(prefix.length())));
                 } else {
-                    partitionValues.add(partitionFragments[i]);
+                    
partitionValues.add(FileUtils.unescapePathName(partitionFragments[i]));
                 }
             }
             return partitionValues;
diff --git 
a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out 
b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out
index fd3eafa0255..d3d4600a0e6 100644
Binary files 
a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out and 
b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out 
differ
diff --git 
a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
 
b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
index 063439d9a87..629923da306 100644
--- 
a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
+++ 
b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
@@ -320,11 +320,11 @@ suite("test_hudi_partition_prune", 
"p2,external,hudi,external_remote,external_re
             sql("${one_partition_date}")
             contains "partition=1/2"
         }
-        // qt_one_partition_timestamp one_partition_timestamp
-        // explain {
-        //     sql("${one_partition_timestamp}")
-        //     contains "partition=1/2"
-        // }
+        qt_one_partition_timestamp one_partition_timestamp
+        explain {
+            sql("${one_partition_timestamp}")
+            contains "partition=1/2"
+        }
 
         sql """drop catalog if exists ${catalog_name};"""
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to