This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 51f48ed632a branch-3.1: [fix](iceberg) only enable dynamic partition
pruning for identity partitions in Iceberg #58033 (#58181)
51f48ed632a is described below
commit 51f48ed632a6eeeb8f6fbf3aaaf652e0ae1dccb0
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 25 14:17:01 2025 +0800
branch-3.1: [fix](iceberg) only enable dynamic partition pruning for
identity partitions in Iceberg #58033 (#58181)
Cherry-picked from #58033
Co-authored-by: Socrates <[email protected]>
---
.../create_preinstalled_scripts/iceberg/run19.sql | 87 +++++++++++++++++
.../doris/datasource/iceberg/IcebergUtils.java | 38 +++++++-
.../datasource/iceberg/source/IcebergScanNode.java | 18 +++-
..._runtime_filter_partition_pruning_transform.out | 108 +++++++++++++++++++++
...ntime_filter_partition_pruning_transform.groovy | 98 +++++++++++++++++++
.../test_iceberg_write_transform_partitions.groovy | 6 ++
6 files changed, 350 insertions(+), 5 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
index 42451e6c964..d019b89223c 100644
---
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
@@ -364,6 +364,93 @@ VALUES (1, 'z', 0.00),
(9, 'big', 9999999.99),
(10, 'null', NULL);
+-- =============================================
+-- Time transform partition coverage (day/year/month/hour)
+-- =============================================
+
+-- Day transform by TIMESTAMP
+CREATE TABLE day_partitioned (
+ id BIGINT,
+ name STRING,
+ ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (day(ts));
+
+INSERT INTO day_partitioned VALUES
+ (1, 'd1', TIMESTAMP '2024-01-15 08:00:00'),
+ (2, 'd2', TIMESTAMP '2024-01-15 12:30:00'),
+ (3, 'd3', TIMESTAMP '2024-01-15 23:59:59'),
+ (4, 'd4', TIMESTAMP '2024-01-16 00:00:00'),
+ (5, 'd5', TIMESTAMP '2024-01-16 10:00:00'),
+ (6, 'd6', TIMESTAMP '2024-02-29 12:00:00'),
+ (7, 'd7', TIMESTAMP '2024-12-31 23:59:59'),
+ (8, 'null', NULL);
+
+-- Year transform by TIMESTAMP
+CREATE TABLE year_partitioned (
+ id BIGINT,
+ name STRING,
+ ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (year(ts));
+
+INSERT INTO year_partitioned VALUES
+ (1, 'y1', TIMESTAMP '2023-01-01 00:00:00'),
+ (2, 'y2', TIMESTAMP '2023-06-15 12:00:00'),
+ (3, 'y3', TIMESTAMP '2023-12-31 23:59:59'),
+ (4, 'y4', TIMESTAMP '2024-01-01 00:00:00'),
+ (5, 'y5', TIMESTAMP '2024-06-15 12:00:00'),
+ (6, 'y6', TIMESTAMP '2024-12-31 23:59:59'),
+ (7, 'y7', TIMESTAMP '2025-01-01 00:00:00'),
+ (8, 'null', NULL);
+
+-- Month transform by TIMESTAMP
+CREATE TABLE month_partitioned (
+ id BIGINT,
+ name STRING,
+ ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (month(ts));
+
+INSERT INTO month_partitioned VALUES
+ (1, 'm1', TIMESTAMP '2024-01-01 00:00:00'),
+ (2, 'm2', TIMESTAMP '2024-01-15 12:00:00'),
+ (3, 'm3', TIMESTAMP '2024-01-31 23:59:59'),
+ (4, 'm4', TIMESTAMP '2024-02-01 00:00:00'),
+ (5, 'm5', TIMESTAMP '2024-02-15 12:00:00'),
+ (6, 'm6', TIMESTAMP '2024-02-29 23:59:59'),
+ (7, 'm7', TIMESTAMP '2024-12-01 00:00:00'),
+ (8, 'm8', TIMESTAMP '2024-12-31 23:59:59'),
+ (9, 'null', NULL);
+
+-- Hour transform by TIMESTAMP
+CREATE TABLE hour_partitioned (
+ id BIGINT,
+ name STRING,
+ ts TIMESTAMP
+) USING ICEBERG PARTITIONED BY (hour(ts));
+
+INSERT INTO hour_partitioned VALUES
+ (1, 'h1', TIMESTAMP '2024-01-15 10:00:00'),
+ (2, 'h2', TIMESTAMP '2024-01-15 10:30:00'),
+ (3, 'h3', TIMESTAMP '2024-01-15 10:59:59'),
+ (4, 'h4', TIMESTAMP '2024-01-15 11:00:00'),
+ (5, 'h5', TIMESTAMP '2024-01-15 11:30:00'),
+ (6, 'h6', TIMESTAMP '2024-01-15 23:00:00'),
+ (7, 'h7', TIMESTAMP '2024-01-15 23:59:59'),
+ (8, 'h8', TIMESTAMP '2024-01-16 00:00:00'),
+ (9, 'null', NULL);
+
+-- Create _copy tables for write testing
+CREATE TABLE day_partitioned_copy AS
+SELECT * FROM day_partitioned;
+
+CREATE TABLE year_partitioned_copy AS
+SELECT * FROM year_partitioned;
+
+CREATE TABLE month_partitioned_copy AS
+SELECT * FROM month_partitioned;
+
+CREATE TABLE hour_partitioned_copy AS
+SELECT * FROM hour_partitioned;
+
-- create table for testing query partitions with snapshot has been expired
CREATE TABLE test_partitions_with_expired_snapshot (
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 767b7cf68a2..698469db93b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -613,11 +613,47 @@ public class IcebergUtils {
}
}
- public static Map<String, String> getPartitionInfoMap(PartitionData
partitionData, String timeZone) {
+ /**
+ * Get partition info map for identity partitions only, considering
partition
+ * evolution.
+ * For non-identity partitions (e.g., day, bucket, truncate), returns null
to
+ * skip
+ * dynamic partition pruning.
+ *
+ * @param partitionData The partition data from the file
+ * @param partitionSpec The partition spec corresponding to the file's
specId
+ * (required)
+ * @param timeZone The time zone for timestamp serialization
+ * @return Map of partition field name to partition value string, or null
if
+ * there are non-identity partitions
+ */
+ public static Map<String, String> getPartitionInfoMap(PartitionData
partitionData, PartitionSpec partitionSpec,
+ String timeZone) {
Map<String, String> partitionInfoMap = new HashMap<>();
List<NestedField> fields =
partitionData.getPartitionType().asNestedType().fields();
+
+ // Check if all partition fields are identity transform
+ // If any field is not identity, return null to skip dynamic partition
pruning
+ List<PartitionField> partitionFields = partitionSpec.fields();
+ Preconditions.checkArgument(fields.size() == partitionFields.size(),
+ "PartitionData fields size does not match PartitionSpec fields
size");
+
for (int i = 0; i < fields.size(); i++) {
NestedField field = fields.get(i);
+ PartitionField partitionField = partitionFields.get(i);
+
+ // Only process identity transform partitions
+ // For other transforms (day, bucket, truncate, etc.), skip
dynamic partition
+ // pruning
+ if (!partitionField.transform().isIdentity()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Skip dynamic partition pruning for non-identity
partition field: {} with transform: {}",
+ field.name(),
partitionField.transform().toString());
+ }
+ return null;
+ }
+
Object value = partitionData.get(i);
try {
String partitionString = serializePartitionValue(field.type(),
value, timeZone);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index ce2e057ab49..d0415c72285 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -66,6 +66,7 @@ import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionData;
+import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
@@ -398,10 +399,19 @@ public class IcebergScanNode extends FileQueryScanNode {
if (isPartitionedTable) {
PartitionData partitionData = (PartitionData)
fileScanTask.file().partition();
if (sessionVariable.isEnableRuntimeFilterPartitionPrune()) {
- // If the partition data is not in the map, we need to
calculate the partition
- Map<String, String> partitionInfoMap =
partitionMapInfos.computeIfAbsent(partitionData, k -> {
- return IcebergUtils.getPartitionInfoMap(partitionData,
sessionVariable.getTimeZone());
- });
+ // Get specId and corresponding PartitionSpec to handle
partition evolution
+ int specId = fileScanTask.file().specId();
+ PartitionSpec partitionSpec = icebergTable.specs().get(specId);
+
+ Preconditions.checkNotNull(partitionSpec, "Partition spec with
specId %s not found for table %s",
+ specId, icebergTable.name());
+ Map<String, String> partitionInfoMap =
partitionMapInfos.computeIfAbsent(
+ partitionData, k -> {
+ return
IcebergUtils.getPartitionInfoMap(partitionData, partitionSpec,
+ sessionVariable.getTimeZone());
+ });
+ // Only set partition values if all partitions are identity
transform
+ // For non-identity partitions, getPartitionInfoMap returns
null to skip dynamic partition pruning
if (partitionInfoMap != null) {
split.setIcebergPartitionValues(partitionInfoMap);
}
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
index 1c26361afa5..fb18cc11885 100644
---
a/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
+++
b/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
@@ -71,6 +71,60 @@
-- !trunc_decimal_in --
2
+-- !day_eq --
+1
+
+-- !day_in --
+2
+
+-- !day_range_ge --
+7
+
+-- !day_range_lt --
+3
+
+-- !day_range_between --
+3
+
+-- !day_range_multi --
+5
+
+-- !year_eq --
+1
+
+-- !year_in --
+2
+
+-- !year_range --
+3
+
+-- !year_range_cross --
+3
+
+-- !month_eq --
+1
+
+-- !month_in --
+2
+
+-- !month_range --
+3
+
+-- !month_range_multi --
+6
+
+-- !hour_eq --
+1
+
+-- !hour_in --
+2
+
+-- !hour_range --
+3
+
+-- !hour_range_multi --
+5
+
-- !bucket_int_eq --
1
@@ -143,3 +197,57 @@
-- !trunc_decimal_in --
2
+-- !day_eq --
+1
+
+-- !day_in --
+2
+
+-- !day_range_ge --
+7
+
+-- !day_range_lt --
+3
+
+-- !day_range_between --
+3
+
+-- !day_range_multi --
+5
+
+-- !year_eq --
+1
+
+-- !year_in --
+2
+
+-- !year_range --
+3
+
+-- !year_range_cross --
+3
+
+-- !month_eq --
+1
+
+-- !month_in --
+2
+
+-- !month_range --
+3
+
+-- !month_range_multi --
+6
+
+-- !hour_eq --
+1
+
+-- !hour_in --
+2
+
+-- !hour_range --
+3
+
+-- !hour_range_multi --
+5
+
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
index 63a723ebe9e..7a2dbcfab92 100644
---
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
@@ -201,6 +201,104 @@
suite("test_iceberg_runtime_filter_partition_pruning_transform", "p0,external,do
group by partition_key having count(*) > 0
order by partition_key desc limit 2);
"""
+
+ // Time transform partitions (day/year/month/hour)
+ // Note: Bucket and Truncate transforms are not supported for runtime
filter partition pruning,
+ // but Day/Year/Month/Hour transforms are supported.
+
+ // Day transform tests
+ qt_day_eq """
+ select count(*) from day_partitioned where ts =
+ (select ts from day_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 1);
+ """
+ qt_day_in """
+ select count(*) from day_partitioned where ts in
+ (select ts from day_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 2);
+ """
+ qt_day_range_ge """
+ select count(*) from day_partitioned where ts >= '2024-01-15
00:00:00';
+ """
+ qt_day_range_lt """
+ select count(*) from day_partitioned where ts < '2024-01-16
00:00:00';
+ """
+ qt_day_range_between """
+ select count(*) from day_partitioned where ts >= '2024-01-15
00:00:00'
+ and ts < '2024-01-16 00:00:00';
+ """
+ qt_day_range_multi """
+ select count(*) from day_partitioned where ts >= '2024-01-15
00:00:00'
+ and ts < '2024-01-17 00:00:00';
+ """
+
+ // Year transform tests
+ qt_year_eq """
+ select count(*) from year_partitioned where ts =
+ (select ts from year_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 1);
+ """
+ qt_year_in """
+ select count(*) from year_partitioned where ts in
+ (select ts from year_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 2);
+ """
+ qt_year_range """
+ select count(*) from year_partitioned where ts >= '2024-01-01
00:00:00'
+ and ts < '2025-01-01 00:00:00';
+ """
+ qt_year_range_cross """
+ select count(*) from year_partitioned where ts >= '2023-06-01
00:00:00'
+ and ts < '2024-06-01 00:00:00';
+ """
+
+ // Month transform tests
+ qt_month_eq """
+ select count(*) from month_partitioned where ts =
+ (select ts from month_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 1);
+ """
+ qt_month_in """
+ select count(*) from month_partitioned where ts in
+ (select ts from month_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 2);
+ """
+ qt_month_range """
+ select count(*) from month_partitioned where ts >= '2024-01-01
00:00:00'
+ and ts < '2024-02-01 00:00:00';
+ """
+ qt_month_range_multi """
+ select count(*) from month_partitioned where ts >= '2024-01-01
00:00:00'
+ and ts < '2024-03-01 00:00:00';
+ """
+
+ // Hour transform tests
+ qt_hour_eq """
+ select count(*) from hour_partitioned where ts =
+ (select ts from hour_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 1);
+ """
+ qt_hour_in """
+ select count(*) from hour_partitioned where ts in
+ (select ts from hour_partitioned
+ group by ts having count(*) > 0
+ order by ts desc limit 2);
+ """
+ qt_hour_range """
+ select count(*) from hour_partitioned where ts >= '2024-01-15
10:00:00'
+ and ts < '2024-01-15 11:00:00';
+ """
+ qt_hour_range_multi """
+ select count(*) from hour_partitioned where ts >= '2024-01-15
10:00:00'
+ and ts < '2024-01-15 12:00:00';
+ """
}
try {
sql """ set time_zone = 'Asia/Shanghai'; """
diff --git
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
index 2662de4dfda..2d8e265222e 100644
---
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
@@ -64,6 +64,12 @@ suite("test_iceberg_write_transform_partitions",
"p0,external,iceberg,external_d
test_write_transform_partitions("truncate_int_10");
test_write_transform_partitions("truncate_bigint_100");
test_write_transform_partitions("truncate_decimal_10");
+
+ // Time transform partitions (day/year/month/hour)
+ test_write_transform_partitions("day_partitioned");
+ test_write_transform_partitions("year_partitioned");
+ test_write_transform_partitions("month_partitioned");
+ test_write_transform_partitions("hour_partitioned");
} finally {
sql """ unset variable time_zone; """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]