This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 51f48ed632a branch-3.1: [fix](iceberg) only enable dynamic partition 
pruning for identity partitions in Iceberg #58033 (#58181)
51f48ed632a is described below

commit 51f48ed632a6eeeb8f6fbf3aaaf652e0ae1dccb0
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 25 14:17:01 2025 +0800

    branch-3.1: [fix](iceberg) only enable dynamic partition pruning for 
identity partitions in Iceberg #58033 (#58181)
    
    Cherry-picked from #58033
    
    Co-authored-by: Socrates <[email protected]>
---
 .../create_preinstalled_scripts/iceberg/run19.sql  |  87 +++++++++++++++++
 .../doris/datasource/iceberg/IcebergUtils.java     |  38 +++++++-
 .../datasource/iceberg/source/IcebergScanNode.java |  18 +++-
 ..._runtime_filter_partition_pruning_transform.out | 108 +++++++++++++++++++++
 ...ntime_filter_partition_pruning_transform.groovy |  98 +++++++++++++++++++
 .../test_iceberg_write_transform_partitions.groovy |   6 ++
 6 files changed, 350 insertions(+), 5 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
index 42451e6c964..d019b89223c 100644
--- 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
@@ -364,6 +364,93 @@ VALUES (1, 'z', 0.00),
     (9, 'big', 9999999.99),
     (10, 'null', NULL);
 
+-- =============================================
+-- Time transform partition coverage (day/year/month/hour)
+-- =============================================
+
+-- Day transform by TIMESTAMP
+CREATE TABLE day_partitioned (
+    id BIGINT,
+    name STRING,
+    ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (day(ts));
+
+INSERT INTO day_partitioned VALUES
+    (1, 'd1', TIMESTAMP '2024-01-15 08:00:00'),
+    (2, 'd2', TIMESTAMP '2024-01-15 12:30:00'),
+    (3, 'd3', TIMESTAMP '2024-01-15 23:59:59'),
+    (4, 'd4', TIMESTAMP '2024-01-16 00:00:00'),
+    (5, 'd5', TIMESTAMP '2024-01-16 10:00:00'),
+    (6, 'd6', TIMESTAMP '2024-02-29 12:00:00'),
+    (7, 'd7', TIMESTAMP '2024-12-31 23:59:59'),
+    (8, 'null', NULL);
+
+-- Year transform by TIMESTAMP
+CREATE TABLE year_partitioned (
+    id BIGINT,
+    name STRING,
+    ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (year(ts));
+
+INSERT INTO year_partitioned VALUES
+    (1, 'y1', TIMESTAMP '2023-01-01 00:00:00'),
+    (2, 'y2', TIMESTAMP '2023-06-15 12:00:00'),
+    (3, 'y3', TIMESTAMP '2023-12-31 23:59:59'),
+    (4, 'y4', TIMESTAMP '2024-01-01 00:00:00'),
+    (5, 'y5', TIMESTAMP '2024-06-15 12:00:00'),
+    (6, 'y6', TIMESTAMP '2024-12-31 23:59:59'),
+    (7, 'y7', TIMESTAMP '2025-01-01 00:00:00'),
+    (8, 'null', NULL);
+
+-- Month transform by TIMESTAMP
+CREATE TABLE month_partitioned (
+    id BIGINT,
+    name STRING,
+    ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (month(ts));
+
+INSERT INTO month_partitioned VALUES
+    (1, 'm1', TIMESTAMP '2024-01-01 00:00:00'),
+    (2, 'm2', TIMESTAMP '2024-01-15 12:00:00'),
+    (3, 'm3', TIMESTAMP '2024-01-31 23:59:59'),
+    (4, 'm4', TIMESTAMP '2024-02-01 00:00:00'),
+    (5, 'm5', TIMESTAMP '2024-02-15 12:00:00'),
+    (6, 'm6', TIMESTAMP '2024-02-29 23:59:59'),
+    (7, 'm7', TIMESTAMP '2024-12-01 00:00:00'),
+    (8, 'm8', TIMESTAMP '2024-12-31 23:59:59'),
+    (9, 'null', NULL);
+
+-- Hour transform by TIMESTAMP
+CREATE TABLE hour_partitioned (
+    id BIGINT,
+    name STRING,
+    ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (hour(ts));
+
+INSERT INTO hour_partitioned VALUES
+    (1, 'h1', TIMESTAMP '2024-01-15 10:00:00'),
+    (2, 'h2', TIMESTAMP '2024-01-15 10:30:00'),
+    (3, 'h3', TIMESTAMP '2024-01-15 10:59:59'),
+    (4, 'h4', TIMESTAMP '2024-01-15 11:00:00'),
+    (5, 'h5', TIMESTAMP '2024-01-15 11:30:00'),
+    (6, 'h6', TIMESTAMP '2024-01-15 23:00:00'),
+    (7, 'h7', TIMESTAMP '2024-01-15 23:59:59'),
+    (8, 'h8', TIMESTAMP '2024-01-16 00:00:00'),
+    (9, 'null', NULL);
+
+-- Create _copy tables for write testing
+CREATE TABLE day_partitioned_copy AS
+SELECT * FROM day_partitioned;
+
+CREATE TABLE year_partitioned_copy AS
+SELECT * FROM year_partitioned;
+
+CREATE TABLE month_partitioned_copy AS
+SELECT * FROM month_partitioned;
+
+CREATE TABLE hour_partitioned_copy AS
+SELECT * FROM hour_partitioned;
+
 -- create table for testing query partitions with snapshot has been expired
 
 CREATE TABLE test_partitions_with_expired_snapshot (
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 767b7cf68a2..698469db93b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -613,11 +613,47 @@ public class IcebergUtils {
         }
     }
 
-    public static Map<String, String> getPartitionInfoMap(PartitionData 
partitionData, String timeZone) {
+    /**
+     * Get partition info map for identity partitions only, considering 
partition
+     * evolution.
+     * For non-identity partitions (e.g., day, bucket, truncate), returns null 
to
+     * skip
+     * dynamic partition pruning.
+     *
+     * @param partitionData The partition data from the file
+     * @param partitionSpec The partition spec corresponding to the file's 
specId
+     *                      (required)
+     * @param timeZone      The time zone for timestamp serialization
+     * @return Map of partition field name to partition value string, or null 
if
+     *         there are non-identity partitions
+     */
+    public static Map<String, String> getPartitionInfoMap(PartitionData 
partitionData, PartitionSpec partitionSpec,
+            String timeZone) {
         Map<String, String> partitionInfoMap = new HashMap<>();
         List<NestedField> fields = 
partitionData.getPartitionType().asNestedType().fields();
+
+        // Check if all partition fields are identity transform
+        // If any field is not identity, return null to skip dynamic partition 
pruning
+        List<PartitionField> partitionFields = partitionSpec.fields();
+        Preconditions.checkArgument(fields.size() == partitionFields.size(),
+                "PartitionData fields size does not match PartitionSpec fields 
size");
+
         for (int i = 0; i < fields.size(); i++) {
             NestedField field = fields.get(i);
+            PartitionField partitionField = partitionFields.get(i);
+
+            // Only process identity transform partitions
+            // For other transforms (day, bucket, truncate, etc.), skip 
dynamic partition
+            // pruning
+            if (!partitionField.transform().isIdentity()) {
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug(
+                            "Skip dynamic partition pruning for non-identity 
partition field: {} with transform: {}",
+                            field.name(), 
partitionField.transform().toString());
+                }
+                return null;
+            }
+
             Object value = partitionData.get(i);
             try {
                 String partitionString = serializePartitionValue(field.type(), 
value, timeZone);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index ce2e057ab49..d0415c72285 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -66,6 +66,7 @@ import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.ManifestFile;
 import org.apache.iceberg.MetadataColumns;
 import org.apache.iceberg.PartitionData;
+import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableScan;
@@ -398,10 +399,19 @@ public class IcebergScanNode extends FileQueryScanNode {
         if (isPartitionedTable) {
             PartitionData partitionData = (PartitionData) 
fileScanTask.file().partition();
             if (sessionVariable.isEnableRuntimeFilterPartitionPrune()) {
-                // If the partition data is not in the map, we need to 
calculate the partition
-                Map<String, String> partitionInfoMap = 
partitionMapInfos.computeIfAbsent(partitionData, k -> {
-                    return IcebergUtils.getPartitionInfoMap(partitionData, 
sessionVariable.getTimeZone());
-                });
+                // Get specId and corresponding PartitionSpec to handle 
partition evolution
+                int specId = fileScanTask.file().specId();
+                PartitionSpec partitionSpec = icebergTable.specs().get(specId);
+
+                Preconditions.checkNotNull(partitionSpec, "Partition spec with 
specId %s not found for table %s",
+                        specId, icebergTable.name());
+                Map<String, String> partitionInfoMap = 
partitionMapInfos.computeIfAbsent(
+                        partitionData, k -> {
+                            return 
IcebergUtils.getPartitionInfoMap(partitionData, partitionSpec,
+                                    sessionVariable.getTimeZone());
+                        });
+                // Only set partition values if all partitions are identity 
transform
+                // For non-identity partitions, getPartitionInfoMap returns 
null to skip dynamic partition pruning
                 if (partitionInfoMap != null) {
                     split.setIcebergPartitionValues(partitionInfoMap);
                 }
diff --git 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
index 1c26361afa5..fb18cc11885 100644
--- 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
+++ 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
@@ -71,6 +71,60 @@
 -- !trunc_decimal_in --
 2
 
+-- !day_eq --
+1
+
+-- !day_in --
+2
+
+-- !day_range_ge --
+7
+
+-- !day_range_lt --
+3
+
+-- !day_range_between --
+3
+
+-- !day_range_multi --
+5
+
+-- !year_eq --
+1
+
+-- !year_in --
+2
+
+-- !year_range --
+3
+
+-- !year_range_cross --
+3
+
+-- !month_eq --
+1
+
+-- !month_in --
+2
+
+-- !month_range --
+3
+
+-- !month_range_multi --
+6
+
+-- !hour_eq --
+1
+
+-- !hour_in --
+2
+
+-- !hour_range --
+3
+
+-- !hour_range_multi --
+5
+
 -- !bucket_int_eq --
 1
 
@@ -143,3 +197,57 @@
 -- !trunc_decimal_in --
 2
 
+-- !day_eq --
+1
+
+-- !day_in --
+2
+
+-- !day_range_ge --
+7
+
+-- !day_range_lt --
+3
+
+-- !day_range_between --
+3
+
+-- !day_range_multi --
+5
+
+-- !year_eq --
+1
+
+-- !year_in --
+2
+
+-- !year_range --
+3
+
+-- !year_range_cross --
+3
+
+-- !month_eq --
+1
+
+-- !month_in --
+2
+
+-- !month_range --
+3
+
+-- !month_range_multi --
+6
+
+-- !hour_eq --
+1
+
+-- !hour_in --
+2
+
+-- !hour_range --
+3
+
+-- !hour_range_multi --
+5
+
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
index 63a723ebe9e..7a2dbcfab92 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
@@ -201,6 +201,104 @@ 
suite("test_iceberg_runtime_filter_partition_pruning_transform", "p0,external,do
                  group by partition_key having count(*) > 0
                  order by partition_key desc limit 2);
         """
+
+        // Time transform partitions (day/year/month/hour)
+        // Note: Bucket and Truncate transforms are not supported for runtime 
filter partition pruning,
+        // but Day/Year/Month/Hour transforms are supported.
+        
+        // Day transform tests
+        qt_day_eq """
+            select count(*) from day_partitioned where ts =
+                (select ts from day_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 1);
+        """
+        qt_day_in """
+            select count(*) from day_partitioned where ts in
+                (select ts from day_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 2);
+        """
+        qt_day_range_ge """
+            select count(*) from day_partitioned where ts >= '2024-01-15 
00:00:00';
+        """
+        qt_day_range_lt """
+            select count(*) from day_partitioned where ts < '2024-01-16 
00:00:00';
+        """
+        qt_day_range_between """
+            select count(*) from day_partitioned where ts >= '2024-01-15 
00:00:00' 
+                and ts < '2024-01-16 00:00:00';
+        """
+        qt_day_range_multi """
+            select count(*) from day_partitioned where ts >= '2024-01-15 
00:00:00' 
+                and ts < '2024-01-17 00:00:00';
+        """
+
+        // Year transform tests
+        qt_year_eq """
+            select count(*) from year_partitioned where ts =
+                (select ts from year_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 1);
+        """
+        qt_year_in """
+            select count(*) from year_partitioned where ts in
+                (select ts from year_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 2);
+        """
+        qt_year_range """
+            select count(*) from year_partitioned where ts >= '2024-01-01 
00:00:00' 
+                and ts < '2025-01-01 00:00:00';
+        """
+        qt_year_range_cross """
+            select count(*) from year_partitioned where ts >= '2023-06-01 
00:00:00' 
+                and ts < '2024-06-01 00:00:00';
+        """
+
+        // Month transform tests
+        qt_month_eq """
+            select count(*) from month_partitioned where ts =
+                (select ts from month_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 1);
+        """
+        qt_month_in """
+            select count(*) from month_partitioned where ts in
+                (select ts from month_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 2);
+        """
+        qt_month_range """
+            select count(*) from month_partitioned where ts >= '2024-01-01 
00:00:00' 
+                and ts < '2024-02-01 00:00:00';
+        """
+        qt_month_range_multi """
+            select count(*) from month_partitioned where ts >= '2024-01-01 
00:00:00' 
+                and ts < '2024-03-01 00:00:00';
+        """
+
+        // Hour transform tests
+        qt_hour_eq """
+            select count(*) from hour_partitioned where ts =
+                (select ts from hour_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 1);
+        """
+        qt_hour_in """
+            select count(*) from hour_partitioned where ts in
+                (select ts from hour_partitioned
+                 group by ts having count(*) > 0
+                 order by ts desc limit 2);
+        """
+        qt_hour_range """
+            select count(*) from hour_partitioned where ts >= '2024-01-15 
10:00:00' 
+                and ts < '2024-01-15 11:00:00';
+        """
+        qt_hour_range_multi """
+            select count(*) from hour_partitioned where ts >= '2024-01-15 
10:00:00' 
+                and ts < '2024-01-15 12:00:00';
+        """
     }
     try {
         sql """ set time_zone = 'Asia/Shanghai'; """
diff --git 
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
 
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
index 2662de4dfda..2d8e265222e 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
@@ -64,6 +64,12 @@ suite("test_iceberg_write_transform_partitions", 
"p0,external,iceberg,external_d
         test_write_transform_partitions("truncate_int_10");
         test_write_transform_partitions("truncate_bigint_100");
         test_write_transform_partitions("truncate_decimal_10");
+        
+        // Time transform partitions (day/year/month/hour)
+        test_write_transform_partitions("day_partitioned");
+        test_write_transform_partitions("year_partitioned");
+        test_write_transform_partitions("month_partitioned");
+        test_write_transform_partitions("hour_partitioned");
     } finally {
         sql """ unset variable time_zone; """
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to