This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new ad4345d2ce8 branch-3.1: [fix](hudi) fix quering hudi table with
timestamp key #53791 (#53961)
ad4345d2ce8 is described below
commit ad4345d2ce85e97c47b78d577f0d60f421159d41
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jul 28 15:43:13 2025 +0800
branch-3.1: [fix](hudi) fix quering hudi table with timestamp key #53791
(#53961)
Cherry-picked from #53791
Co-authored-by: Socrates <[email protected]>
---
.../hudi/source/HudiCachedPartitionProcessor.java | 6 ++++++
.../hudi/source/HudiPartitionProcessor.java | 9 +++++----
.../hudi/test_hudi_partition_prune.out | Bin 5389 -> 5549 bytes
.../hudi/test_hudi_partition_prune.groovy | 10 +++++-----
4 files changed, 16 insertions(+), 9 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 1db39c230a1..6356698c067 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -31,6 +31,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -154,6 +155,11 @@ public class HudiCachedPartitionProcessor extends
HudiPartitionProcessor {
// we can still obtain the partition information through
the HMS API.
partitionNames = catalog.getClient()
.listPartitionNames(table.getRemoteDbName(),
table.getRemoteName());
+ // HMS stored Hudi partition paths may have double
encoding issue (e.g., %3A
+ // becomes %253A), need to unescape first here.
+ partitionNames = partitionNames.stream()
+ .map(FileUtils::unescapePathName)
+ .collect(Collectors.toList());
if (partitionNames.size() == 0) {
LOG.warn("Failed to get partitions from hms api,
switch it from hudi api.");
partitionNames = getAllPartitionNames(tableMetaClient);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
index cb5e2993a56..b1e5bd4a82d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiPartitionProcessor.java
@@ -19,6 +19,7 @@ package org.apache.doris.datasource.hudi.source;
import org.apache.doris.datasource.ExternalTable;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -98,8 +99,8 @@ public abstract class HudiPartitionProcessor {
} else {
partitionValue = partitionPath;
}
- // TODO: In hive, the specific characters like '=', '/' will
be url encoded
- return Collections.singletonList(partitionValue);
+ // In hive, the specific characters like '=', '/' will be url
encoded
+ return
Collections.singletonList(FileUtils.unescapePathName(partitionValue));
} else {
// If the partition column size is not equal to the partition
fragments size
// and the partition column size > 1, we do not know how to
map the partition
@@ -119,9 +120,9 @@ public abstract class HudiPartitionProcessor {
for (int i = 0; i < partitionFragments.length; i++) {
String prefix = partitionColumns.get(i) + "=";
if (partitionFragments[i].startsWith(prefix)) {
-
partitionValues.add(partitionFragments[i].substring(prefix.length()));
+
partitionValues.add(FileUtils.unescapePathName(partitionFragments[i].substring(prefix.length())));
} else {
- partitionValues.add(partitionFragments[i]);
+
partitionValues.add(FileUtils.unescapePathName(partitionFragments[i]));
}
}
return partitionValues;
diff --git
a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out
b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out
index fd3eafa0255..d3d4600a0e6 100644
Binary files
a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out and
b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out
differ
diff --git
a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
index 063439d9a87..629923da306 100644
---
a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
+++
b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy
@@ -320,11 +320,11 @@ suite("test_hudi_partition_prune",
"p2,external,hudi,external_remote,external_re
sql("${one_partition_date}")
contains "partition=1/2"
}
- // qt_one_partition_timestamp one_partition_timestamp
- // explain {
- // sql("${one_partition_timestamp}")
- // contains "partition=1/2"
- // }
+ qt_one_partition_timestamp one_partition_timestamp
+ explain {
+ sql("${one_partition_timestamp}")
+ contains "partition=1/2"
+ }
sql """drop catalog if exists ${catalog_name};"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]