This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new e4c0030d93c branch-4.0: [fix](paimon) Fix Paimon time-travel tag reads
for expired snapshots #60166 (#60222)
e4c0030d93c is described below
commit e4c0030d93c3af893d581c9a20c94f10f92f64a0
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jan 26 17:09:33 2026 +0800
branch-4.0: [fix](paimon) Fix Paimon time-travel tag reads for expired
snapshots #60166 (#60222)
Cherry-picked from #60166
Co-authored-by: Socrates <[email protected]>
---
.../create_preinstalled_scripts/paimon/run09.sql | 34 ++++++++++++++++
.../datasource/paimon/PaimonExternalTable.java | 32 +++++++++++++--
.../apache/doris/datasource/paimon/PaimonUtil.java | 46 ++++------------------
.../paimon/paimon_time_travel.out | 4 +-
.../paimon/paimon_incr_read.groovy | 2 +-
.../paimon/paimon_time_travel.groovy | 9 ++++-
6 files changed, 82 insertions(+), 45 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
index 87b290e1a55..2c3998c4d90 100644
---
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
@@ -108,3 +108,37 @@ INSERT INTO test_paimon_time_travel_db.tbl_time_travel
VALUES
(6006, 9006, '2024-02-16', 123.75, '987 Advanced Court, Silicon Valley, CA
94301', 'CANCELLED', true, 600);
CALL sys.create_tag(table => 'test_paimon_time_travel_db.tbl_time_travel', tag
=> 't_6', snapshot => 6);
+
+-- table for expired snapshot tag time travel
+drop table if exists test_paimon_time_travel_db.tbl_time_travel_expired_tag;
+CREATE TABLE test_paimon_time_travel_db.tbl_time_travel_expired_tag (
+ id INT NOT NULL,
+ name STRING
+)
+USING paimon
+TBLPROPERTIES (
+ 'bucket' = '1',
+ 'primary-key' = 'id',
+ 'file.format' = 'parquet'
+);
+
+-- snapshot 1
+INSERT INTO test_paimon_time_travel_db.tbl_time_travel_expired_tag VALUES
+(1, 'alpha'),
+(2, 'beta');
+CALL sys.create_tag(table =>
'test_paimon_time_travel_db.tbl_time_travel_expired_tag', tag => 't_exp_1',
snapshot => 1);
+
+-- snapshot 2
+INSERT INTO test_paimon_time_travel_db.tbl_time_travel_expired_tag VALUES
+(3, 'gamma'),
+(4, 'delta');
+CALL sys.create_tag(table =>
'test_paimon_time_travel_db.tbl_time_travel_expired_tag', tag => 't_exp_2',
snapshot => 2);
+
+-- snapshot 3
+INSERT INTO test_paimon_time_travel_db.tbl_time_travel_expired_tag VALUES
+(5, 'epsilon'),
+(6, 'zeta');
+CALL sys.create_tag(table =>
'test_paimon_time_travel_db.tbl_time_travel_expired_tag', tag => 't_exp_3',
snapshot => 3);
+
+-- expire snapshots so tag points to expired snapshot file
+CALL sys.expire_snapshots(table =>
'test_paimon_time_travel_db.tbl_time_travel_expired_tag', retain_max => 1);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
index d50f9580e10..174bfa64a2d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
@@ -109,11 +109,35 @@ public class PaimonExternalTable extends ExternalTable
implements MTMVRelatedTab
// use the specified snapshot and the corresponding schema(not the
latest
// schema).
try {
- Snapshot snapshot = PaimonUtil.getPaimonSnapshot(paimonTable,
tableSnapshot, scanParams);
- Table dataTable = paimonTable.copy(
-
Collections.singletonMap(CoreOptions.SCAN_SNAPSHOT_ID.key(),
String.valueOf(snapshot.id())));
+ DataTable dataTable = (DataTable) paimonTable;
+ Snapshot snapshot;
+ Map<String, String> scanOptions = new HashMap<>();
+
+ if (tableSnapshot.isPresent()) {
+ TableSnapshot snapshotOpt = tableSnapshot.get();
+ String value = snapshotOpt.getValue();
+ if (snapshotOpt.getType() ==
TableSnapshot.VersionType.TIME) {
+ snapshot = PaimonUtil.getPaimonSnapshotByTimestamp(
+ dataTable, value,
PaimonUtil.isDigitalString(value));
+ scanOptions.put(CoreOptions.SCAN_SNAPSHOT_ID.key(),
String.valueOf(snapshot.id()));
+ } else {
+ if (PaimonUtil.isDigitalString(value)) {
+ snapshot =
PaimonUtil.getPaimonSnapshotBySnapshotId(dataTable, value);
+
scanOptions.put(CoreOptions.SCAN_SNAPSHOT_ID.key(),
String.valueOf(snapshot.id()));
+ } else {
+ snapshot =
PaimonUtil.getPaimonSnapshotByTag(dataTable, value);
+ scanOptions.put(CoreOptions.SCAN_TAG_NAME.key(),
value);
+ }
+ }
+ } else {
+ String tagName =
PaimonUtil.extractBranchOrTagName(scanParams.get());
+ snapshot = PaimonUtil.getPaimonSnapshotByTag(dataTable,
tagName);
+ scanOptions.put(CoreOptions.SCAN_TAG_NAME.key(), tagName);
+ }
+
+ Table scanTable = paimonTable.copy(scanOptions);
return new PaimonSnapshotCacheValue(PaimonPartitionInfo.EMPTY,
- new PaimonSnapshot(snapshot.id(), snapshot.schemaId(),
dataTable));
+ new PaimonSnapshot(snapshot.id(), snapshot.schemaId(),
scanTable));
} catch (Exception e) {
LOG.warn("Failed to get Paimon snapshot for table {}",
paimonTable.name(), e);
throw new RuntimeException(
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
index b9285b4a757..497f06883a7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
@@ -19,7 +19,6 @@ package org.apache.doris.datasource.paimon;
import org.apache.doris.analysis.PartitionValue;
import org.apache.doris.analysis.TableScanParams;
-import org.apache.doris.analysis.TableSnapshot;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.PartitionItem;
@@ -99,6 +98,10 @@ public class PaimonUtil {
private static final Base64.Encoder BASE64_ENCODER =
java.util.Base64.getUrlEncoder().withoutPadding();
private static final Pattern DIGITAL_REGEX = Pattern.compile("\\d+");
+ public static boolean isDigitalString(String value) {
+ return value != null && DIGITAL_REGEX.matcher(value).matches();
+ }
+
public static List<InternalRow> read(
Table table, @Nullable int[] projection, @Nullable Predicate
predicate,
Pair<ConfigOption<?>, String>... dynamicOptions)
@@ -532,41 +535,7 @@ public class PaimonUtil {
}
}
- // get snapshot info from query like 'for version/time as of' or '@tag'
- public static Snapshot getPaimonSnapshot(Table table,
Optional<TableSnapshot> querySnapshot,
- Optional<TableScanParams> scanParams) throws UserException {
- Preconditions.checkArgument(querySnapshot.isPresent() ||
(scanParams.isPresent() && scanParams.get().isTag()),
- "should spec version or time or tag");
- Preconditions.checkArgument(!(querySnapshot.isPresent() &&
scanParams.isPresent()),
- "should not spec both snapshot and scan params");
-
- DataTable dataTable = (DataTable) table;
- if (querySnapshot.isPresent()) {
- return getPaimonSnapshotByTableSnapshot(dataTable,
querySnapshot.get());
- } else if (scanParams.isPresent() && scanParams.get().isTag()) {
- return getPaimonSnapshotByTag(dataTable,
extractBranchOrTagName(scanParams.get()));
- } else {
- throw new UserException("should spec version or time or tag");
- }
- }
-
- private static Snapshot getPaimonSnapshotByTableSnapshot(DataTable table,
TableSnapshot tableSnapshot)
- throws UserException {
- final String value = tableSnapshot.getValue();
- final TableSnapshot.VersionType type = tableSnapshot.getType();
- final boolean isDigital = DIGITAL_REGEX.matcher(value).matches();
-
- switch (type) {
- case TIME:
- return getPaimonSnapshotByTimestamp(table, value, isDigital);
- case VERSION:
- return isDigital ? getPaimonSnapshotBySnapshotId(table, value)
: getPaimonSnapshotByTag(table, value);
- default:
- throw new UserException("Unsupported snapshot type: " + type);
- }
- }
-
- private static Snapshot getPaimonSnapshotByTimestamp(DataTable table,
String timestamp, boolean isDigital)
+ static Snapshot getPaimonSnapshotByTimestamp(DataTable table, String
timestamp, boolean isDigital)
throws UserException {
long timestampMillis = 0;
if (isDigital) {
@@ -594,7 +563,7 @@ public class PaimonUtil {
return snapshot;
}
- private static Snapshot getPaimonSnapshotBySnapshotId(DataTable table,
String snapshotString)
+ static Snapshot getPaimonSnapshotBySnapshotId(DataTable table, String
snapshotString)
throws UserException {
long snapshotId = Long.parseLong(snapshotString);
try {
@@ -605,12 +574,13 @@ public class PaimonUtil {
}
}
- private static Snapshot getPaimonSnapshotByTag(DataTable table, String
tagName)
+ static Snapshot getPaimonSnapshotByTag(DataTable table, String tagName)
throws UserException {
Optional<Tag> tag = table.tagManager().get(tagName);
return tag.orElseThrow(() -> new UserException("can't find snapshot by
tag: " + tagName));
}
+
public static String resolvePaimonBranch(TableScanParams tableScanParams,
Table baseTable)
throws UserException {
String branchName = extractBranchOrTagName(tableScanParams);
diff --git
a/regression-test/data/external_table_p0/paimon/paimon_time_travel.out
b/regression-test/data/external_table_p0/paimon/paimon_time_travel.out
index 93a7c42e165..effd42e0b15 100644
--- a/regression-test/data/external_table_p0/paimon/paimon_time_travel.out
+++ b/regression-test/data/external_table_p0/paimon/paimon_time_travel.out
@@ -535,6 +535,9 @@ true 389.20 5003 6003
false 5
true 7
+-- !expired_tag_count --
+2
+
-- !branch_1_count_list --
10
@@ -1098,4 +1101,3 @@ true 7
6004 9004 2024-02-14 199.99 321 Future Lane, Innovation, WA 98001
PENDING true 400
6005 9005 2024-02-15 567.25 654 Progress Drive, Tech City, OR 97201
COMPLETED false 500
6006 9006 2024-02-16 123.75 987 Advanced Court, Silicon Valley, CA
94301 CANCELLED true 600
-
diff --git
a/regression-test/suites/external_table_p0/paimon/paimon_incr_read.groovy
b/regression-test/suites/external_table_p0/paimon/paimon_incr_read.groovy
index 3f5b2accf10..824d38f1cda 100644
--- a/regression-test/suites/external_table_p0/paimon/paimon_incr_read.groovy
+++ b/regression-test/suites/external_table_p0/paimon/paimon_incr_read.groovy
@@ -93,7 +93,7 @@ suite("test_paimon_incr_read",
"p0,external,doris,external_docker,external_docke
}
test {
sql """select * from paimon_incr@incr('startSnapshotId'=1,
'endSnapshotId'=2) for version as of 1"""
- exception "should not spec both snapshot and scan params"
+ exception "Can not specify scan params and table snapshot at
same time"
}
test {
sql """select * from paimon_incr@incr('startSnapshotId'=-1)"""
diff --git
a/regression-test/suites/external_table_p0/paimon/paimon_time_travel.groovy
b/regression-test/suites/external_table_p0/paimon/paimon_time_travel.groovy
index 96e6551aa88..d91cc75abe5 100644
--- a/regression-test/suites/external_table_p0/paimon/paimon_time_travel.groovy
+++ b/regression-test/suites/external_table_p0/paimon/paimon_time_travel.groovy
@@ -153,6 +153,9 @@ suite("paimon_time_travel",
"p0,external,doris,external_docker,external_docker_d
}
}
+ // tag on expired snapshot should still be readable
+ qt_expired_tag_count """select count(*) from ${tableName}_expired_tag
FOR VERSION AS OF 't_exp_1';"""
+
List<List<Object>> branchesResult = sql """ select branch_name from
${tableName}\$branches order by branch_name;"""
logger.info("Query result from ${tableName}\$branches:
${branchesResult}")
assertTrue(branchesResult.size()==2)
@@ -340,6 +343,10 @@ suite("paimon_time_travel",
"p0,external,doris,external_docker,external_docker_d
sql """ select * from ${tableName} for version as of
'not_exists_tag'; """
exception "can't find snapshot by tag: not_exists_tag"
}
+ test {
+ sql """ select * from ${tableName}_expired_tag for version as of
1; """
+ exception "can't find snapshot by id: 1"
+ }
// Use branch function to query tags
test {
@@ -359,4 +366,4 @@ suite("paimon_time_travel",
"p0,external,doris,external_docker,external_docker_d
} finally {
// sql """drop catalog if exists ${catalog_name}"""
}
-}
\ No newline at end of file
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]