This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 5d0b24991a3 branch-4.1: [fix](fe) set cloud version_cache_ttl to 0 
temporarily if retry a query with -230 #63721 (#65048)
5d0b24991a3 is described below

commit 5d0b24991a36946b0f7459c99343775f392049f3
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jul 1 18:44:03 2026 +0800

    branch-4.1: [fix](fe) set cloud version_cache_ttl to 0 temporarily if retry 
a query with -230 #63721 (#65048)
    
    Cherry-picked from #63721
    
    Co-authored-by: meiyi <[email protected]>
---
 .../java/org/apache/doris/qe/StmtExecutor.java     | 36 +++++++++++++-
 .../java/org/apache/doris/qe/StmtExecutorTest.java | 48 +++++++++++++++++++
 .../cloud_p0/query_retry/test_retry_e-230.groovy   | 55 ++++++++++++++++++++++
 3 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index 6d98e97af42..4c75df883a8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -512,6 +512,7 @@ public class StmtExecutor {
         UUID uuid;
         int retryTime = Config.max_query_retry_time;
         retryTime = retryTime <= 0 ? 1 : retryTime + 1;
+        boolean disableCloudVersionCacheOnRetry = false;
         // If the query is an `outfile` statement,
         // we execute it only once to avoid exporting redundant data.
         if (parsedStmt instanceof Queriable) {
@@ -519,7 +520,11 @@ public class StmtExecutor {
         }
         for (int i = 1; i <= retryTime; i++) {
             try {
-                execute(queryId);
+                if (disableCloudVersionCacheOnRetry) {
+                    executeWithVersionCacheDisabled(queryId);
+                } else {
+                    execute(queryId);
+                }
                 return;
             } catch (UserException e) {
                 if (!SystemInfoService.needRetryWithReplan(e.getMessage()) || 
i == retryTime) {
@@ -531,6 +536,7 @@ public class StmtExecutor {
                 if (this.coord != null && this.coord.isQueryCancelled()) {
                     throw e;
                 }
+                disableCloudVersionCacheOnRetry = 
shouldDisableCloudVersionCacheOnRetry(e.getMessage());
                 TUniqueId lastQueryId = queryId;
                 uuid = UUID.randomUUID();
                 queryId = new TUniqueId(uuid.getMostSignificantBits(), 
uuid.getLeastSignificantBits());
@@ -552,6 +558,34 @@ public class StmtExecutor {
         }
     }
 
+    // Temporarily disable the cloud version cache for this single attempt so 
that the retry
+    // re-fetches the visible version from meta-service, then restore the 
user-set TTLs.
+    private void executeWithVersionCacheDisabled(TUniqueId queryId) throws 
Exception {
+        SessionVariable sessionVariable = context.getSessionVariable();
+        long oldPartitionTtl = sessionVariable.cloudPartitionVersionCacheTtlMs;
+        long oldTableTtl = sessionVariable.cloudTableVersionCacheTtlMs;
+        try {
+            sessionVariable.cloudPartitionVersionCacheTtlMs = 0;
+            sessionVariable.cloudTableVersionCacheTtlMs = 0;
+            LOG.info("temporarily set {} from {} to 0 and {} from {} to 0 
before retry. {}",
+                    SessionVariable.CLOUD_PARTITION_VERSION_CACHE_TTL_MS, 
oldPartitionTtl,
+                    SessionVariable.CLOUD_TABLE_VERSION_CACHE_TTL_MS, 
oldTableTtl,
+                    context.getQueryIdentifier());
+            execute(queryId);
+        } finally {
+            sessionVariable.cloudPartitionVersionCacheTtlMs = oldPartitionTtl;
+            sessionVariable.cloudTableVersionCacheTtlMs = oldTableTtl;
+        }
+    }
+
+    boolean shouldDisableCloudVersionCacheOnRetry(String errorMessage) {
+        return Config.isCloudMode()
+                && errorMessage != null
+                && errorMessage.contains(SystemInfoService.ERROR_E230)
+                && 
(context.getSessionVariable().cloudPartitionVersionCacheTtlMs != 0
+                || context.getSessionVariable().cloudTableVersionCacheTtlMs != 
0);
+    }
+
     public void execute(TUniqueId queryId) throws Exception {
         SessionVariable sessionVariable = context.getSessionVariable();
         if (context.getConnectType() == ConnectType.ARROW_FLIGHT_SQL) {
diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java
index c778c965f6d..8d21e40282b 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java
@@ -339,4 +339,52 @@ public class StmtExecutorTest extends TestWithFeService {
                 "ParsedStatement should be a LogicalPlanAdapter after 
parseByNereids(), but was: "
                         + (parsedStatement == null ? "null" : 
parsedStatement.getClass().getName()));
     }
+
+    @Test
+    public void testShouldDisableCloudVersionCacheOnRetryForE230() {
+        String originalCloudUniqueId = Config.cloud_unique_id;
+        String originalDeployMode = Config.deploy_mode;
+        long originalPartitionTtl = 
connectContext.getSessionVariable().cloudPartitionVersionCacheTtlMs;
+        long originalTableTtl = 
connectContext.getSessionVariable().cloudTableVersionCacheTtlMs;
+        try {
+            Config.cloud_unique_id = "test-cloud-id";
+            StmtExecutor executor = new StmtExecutor(connectContext, "select 
1");
+
+            
connectContext.getSessionVariable().cloudPartitionVersionCacheTtlMs = 1000L;
+            connectContext.getSessionVariable().cloudTableVersionCacheTtlMs = 
1000L;
+            
Assertions.assertTrue(executor.shouldDisableCloudVersionCacheOnRetry(
+                    "errCode = 2, detailMessage = E-230 versions are already 
compacted"));
+            
Assertions.assertFalse(executor.shouldDisableCloudVersionCacheOnRetry(
+                    "errCode = 2, detailMessage = some other error"));
+            // null error message must not trigger the disable.
+            
Assertions.assertFalse(executor.shouldDisableCloudVersionCacheOnRetry(null));
+
+            // Non-cloud mode must never disable the version cache, even on 
E-230.
+            Config.cloud_unique_id = "";
+            Config.deploy_mode = "";
+            
Assertions.assertFalse(executor.shouldDisableCloudVersionCacheOnRetry(
+                    "errCode = 2, detailMessage = E-230 versions are already 
compacted"));
+            Config.cloud_unique_id = "test-cloud-id";
+
+            
connectContext.getSessionVariable().cloudPartitionVersionCacheTtlMs = 0L;
+            connectContext.getSessionVariable().cloudTableVersionCacheTtlMs = 
1000L;
+            
Assertions.assertTrue(executor.shouldDisableCloudVersionCacheOnRetry(
+                    "errCode = 2, detailMessage = E-230 versions are already 
compacted"));
+
+            
connectContext.getSessionVariable().cloudPartitionVersionCacheTtlMs = 1000L;
+            connectContext.getSessionVariable().cloudTableVersionCacheTtlMs = 
0L;
+            
Assertions.assertTrue(executor.shouldDisableCloudVersionCacheOnRetry(
+                    "errCode = 2, detailMessage = E-230 versions are already 
compacted"));
+
+            
connectContext.getSessionVariable().cloudPartitionVersionCacheTtlMs = 0L;
+            connectContext.getSessionVariable().cloudTableVersionCacheTtlMs = 
0L;
+            
Assertions.assertFalse(executor.shouldDisableCloudVersionCacheOnRetry(
+                    "errCode = 2, detailMessage = E-230 versions are already 
compacted"));
+        } finally {
+            Config.cloud_unique_id = originalCloudUniqueId;
+            Config.deploy_mode = originalDeployMode;
+            
connectContext.getSessionVariable().cloudPartitionVersionCacheTtlMs = 
originalPartitionTtl;
+            connectContext.getSessionVariable().cloudTableVersionCacheTtlMs = 
originalTableTtl;
+        }
+    }
 }
diff --git 
a/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy 
b/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy
index b2ad94dd2a2..8d87400e253 100644
--- a/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy
+++ b/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy
@@ -162,7 +162,62 @@ suite("test_retry_e-230", 'docker') {
                 // fe StmtExecutor retry time, at most 25 * 1.5s + 25 * 2.5s
                 assertTrue(cost > 4000 && cost < 100000)
 
+                def restoreTbl = 'test_retry_e_230_restore_tbl'
+                sql """ DROP TABLE IF EXISTS ${restoreTbl} """
+                sql """
+                    CREATE TABLE ${restoreTbl} (
+                    `k1` int(11) NULL,
+                    `k2` int(11) NULL
+                    )
+                    DUPLICATE KEY(`k1`, `k2`)
+                    COMMENT 'OLAP'
+                    DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+                    PROPERTIES (
+                    "replication_num"="1"
+                    );
+                    """
+                for (def i = 1; i <= 3; i++) {
+                    insert_sql """INSERT INTO ${restoreTbl} VALUES (${i}, 
${100 * i})""", 1
+                }
+
+                sql """ set cloud_partition_version_cache_ttl_ms = 3600000 """
+                sql """ set cloud_table_version_cache_ttl_ms = 3600000 """
+                def row_cnt = sql """select count() from ${restoreTbl}"""
+                assertEquals(row_cnt[0][0], 3)
+
+                cluster.injectDebugPoints(NodeType.BE, 
['CloudTablet.capture_rs_readers.return.e-230' : null])
+                cluster.injectDebugPoints(NodeType.FE, 
['StmtExecutor.retry.longtime' : null])
+                insert_sql """INSERT INTO ${restoreTbl} VALUES (4, 400)""", 1
+
+                def futrue5 = thread {
+                    Thread.sleep(4000)
+                    cluster.clearBackendDebugPoints()
+                }
+
+                begin = System.currentTimeMillis();
+                def futrue6 = thread {
+                    def result = sql """select * from ${restoreTbl} order by 
k1"""
+                    log.info("select result: {}", result)
+                }
+
+                futrue6.get()
+                cost = System.currentTimeMillis() - begin;
+                log.info("time cost restore check : {}", cost)
+                futrue5.get()
+                assertTrue(cost > 4000 && cost < 100000)
+
+                def ttlRows = sql_return_maparray """
+                    select
+                        @@session.cloud_partition_version_cache_ttl_ms as 
partition_ttl,
+                        @@session.cloud_table_version_cache_ttl_ms as table_ttl
+                """
+                assertEquals("3600000", ttlRows[0].partition_ttl.toString())
+                assertEquals("3600000", ttlRows[0].table_ttl.toString())
+                row_cnt = sql """select count() from ${restoreTbl}"""
+                assertEquals(row_cnt[0][0], 4)
             } finally {
+                sql """ set cloud_partition_version_cache_ttl_ms = DEFAULT """
+                sql """ set cloud_table_version_cache_ttl_ms = DEFAULT """
                 cluster.clearFrontendDebugPoints()
                 cluster.clearBackendDebugPoints()   
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to