This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new de1e1701f06 branch-3.1: [fix](metacache) Fix query hang caused by 
Caffeine cache deadlock in external table queries #57856 (#58065)
de1e1701f06 is described below

commit de1e1701f067306a6aa38edac0b769097a0cf065
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 25 11:11:40 2025 +0800

    branch-3.1: [fix](metacache) Fix query hang caused by Caffeine cache 
deadlock in external table queries #57856 (#58065)
    
    Cherry-picked from #57856
    
    Co-authored-by: zy-kkk <[email protected]>
---
 .../java/org/apache/doris/common/CacheFactory.java |  22 +++-
 .../doris/datasource/ExternalSchemaCache.java      |   2 +-
 .../doris/datasource/hive/HiveMetaStoreCache.java  |   6 +-
 .../hudi/source/HudiCachedFsViewProcessor.java     |   2 +-
 .../hudi/source/HudiCachedMetaClientProcessor.java |   1 -
 .../hudi/source/HudiCachedPartitionProcessor.java  |   2 +-
 .../datasource/iceberg/IcebergMetadataCache.java   |   8 +-
 .../doris/datasource/metacache/MetaCache.java      |   6 +-
 .../datasource/paimon/PaimonMetadataCache.java     |   2 +-
 .../org/apache/doris/common/CacheFactoryTest.java  |   6 +-
 .../metacache/MetaCacheDeadlockTest.java           | 114 +++++++++++++++++++++
 11 files changed, 153 insertions(+), 18 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
index 2b5e6968d17..674bf0aa39c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
@@ -76,7 +76,27 @@ public class CacheFactory {
 
     // Build a loading cache, with executor, it will use given executor for 
refresh
     public <K, V> LoadingCache<K, V> buildCache(CacheLoader<K, V> cacheLoader,
-            RemovalListener<K, V> removalListener, ExecutorService executor) {
+            ExecutorService executor) {
+        Caffeine<Object, Object> builder = buildWithParams();
+        builder.executor(executor);
+        return builder.build(cacheLoader);
+    }
+
+    // Build cache with sync removal listener to prevent deadlock when 
listener calls invalidateAll()
+    public <K, V> LoadingCache<K, V> 
buildCacheWithSyncRemovalListener(CacheLoader<K, V> cacheLoader,
+            RemovalListener<K, V> removalListener) {
+        Caffeine<Object, Object> builder = buildWithParams();
+        if (removalListener != null) {
+            builder.removalListener(removalListener);
+        }
+        builder.executor(Runnable::run);  // Sync execution to avoid thread 
pool deadlock
+        return builder.build(cacheLoader);
+    }
+
+    // Build cache with async removal listener. Use with caution if listener 
may trigger nested operations
+    public <K, V> LoadingCache<K, V> 
buildCacheWithAsyncRemovalListener(CacheLoader<K, V> cacheLoader,
+            RemovalListener<K, V> removalListener,
+            ExecutorService executor) {
         Caffeine<Object, Object> builder = buildWithParams();
         builder.executor(executor);
         if (removalListener != null) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
index 85e6fe4e41f..a1c0236eeb4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
@@ -58,7 +58,7 @@ public class ExternalSchemaCache {
                 Config.max_external_schema_cache_num,
                 false,
                 null);
-        schemaCache = schemaCacheFactory.buildCache(this::loadSchema, null, 
executor);
+        schemaCache = schemaCacheFactory.buildCache(this::loadSchema, 
executor);
     }
 
     private void initMetrics() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 0c011c386ee..072de0cda94 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -148,7 +148,7 @@ public class HiveMetaStoreCache {
                 Config.max_hive_partition_table_cache_num,
                 true,
                 null);
-        partitionValuesCache = 
partitionValuesCacheFactory.buildCache(this::loadPartitionValues, null,
+        partitionValuesCache = 
partitionValuesCacheFactory.buildCache(this::loadPartitionValues,
                 refreshExecutor);
 
         CacheFactory partitionCacheFactory = new CacheFactory(
@@ -167,7 +167,7 @@ public class HiveMetaStoreCache {
             public Map<PartitionCacheKey, HivePartition> loadAll(Iterable<? 
extends PartitionCacheKey> keys) {
                 return loadPartitions(keys);
             }
-        }, null, refreshExecutor);
+        }, refreshExecutor);
 
         setNewFileCache();
     }
@@ -205,7 +205,7 @@ public class HiveMetaStoreCache {
 
         LoadingCache<FileCacheKey, FileCacheValue> oldFileCache = 
fileCacheRef.get();
 
-        fileCacheRef.set(fileCacheFactory.buildCache(loader, null, 
this.refreshExecutor));
+        fileCacheRef.set(fileCacheFactory.buildCache(loader, 
this.refreshExecutor));
         if (Objects.nonNull(oldFileCache)) {
             oldFileCache.invalidateAll();
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
index b906c6a1dd2..84dccb38574 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
@@ -48,7 +48,7 @@ public class HudiCachedFsViewProcessor {
                 Config.max_external_table_cache_num,
                 true,
                 null);
-        this.fsViewCache = 
partitionCacheFactory.buildCache(this::createFsView, null, executor);
+        this.fsViewCache = 
partitionCacheFactory.buildCache(this::createFsView, executor);
     }
 
     private HoodieTableFileSystemView createFsView(FsViewKey key) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
index 140eed0d5eb..bb9351d4121 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
@@ -51,7 +51,6 @@ public class HudiCachedMetaClientProcessor {
 
         this.hudiTableMetaClientCache = partitionCacheFactory.buildCache(
                 this::createHoodieTableMetaClient,
-                null,
                 executor);
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 40db60eb78b..7a2824e7b06 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -63,7 +63,7 @@ public class HudiCachedPartitionProcessor extends 
HudiPartitionProcessor {
                 Config.max_external_table_cache_num,
                 true,
                 null);
-        this.partitionCache = partitionCacheFactory.buildCache(key -> new 
TablePartitionValues(), null, executor);
+        this.partitionCache = partitionCacheFactory.buildCache(key -> new 
TablePartitionValues(), executor);
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
index d10c2d1548b..ba9374ac285 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
@@ -64,7 +64,7 @@ public class IcebergMetadataCache {
                 Config.max_external_table_cache_num,
                 true,
                 null);
-        this.snapshotListCache = snapshotListCacheFactory.buildCache(key -> 
loadSnapshots(key), null, executor);
+        this.snapshotListCache = snapshotListCacheFactory.buildCache(key -> 
loadSnapshots(key), executor);
 
         CacheFactory tableCacheFactory = new CacheFactory(
                 
OptionalLong.of(Config.external_cache_expire_time_seconds_after_access),
@@ -72,7 +72,7 @@ public class IcebergMetadataCache {
                 Config.max_external_table_cache_num,
                 true,
                 null);
-        this.tableCache = tableCacheFactory.buildCache(key -> loadTable(key), 
null, executor);
+        this.tableCache = tableCacheFactory.buildCache(key -> loadTable(key), 
executor);
 
         CacheFactory snapshotCacheFactory = new CacheFactory(
                 
OptionalLong.of(Config.external_cache_expire_time_seconds_after_access),
@@ -80,8 +80,8 @@ public class IcebergMetadataCache {
                 Config.max_external_table_cache_num,
                 true,
                 null);
-        this.snapshotCache = snapshotCacheFactory.buildCache(key -> 
loadSnapshot(key), null, executor);
-        this.viewCache = tableCacheFactory.buildCache(key -> loadView(key), 
null, executor);
+        this.snapshotCache = snapshotCacheFactory.buildCache(key -> 
loadSnapshot(key), executor);
+        this.viewCache = tableCacheFactory.buildCache(key -> loadView(key), 
executor);
     }
 
     public Table getIcebergTable(ExternalTable dorisTable) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
index a98dbc132e6..d5dfa0c8bb9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
@@ -74,8 +74,10 @@ public class MetaCache<T> {
                 maxSize,
                 true,
                 null);
-        namesCache = namesCacheFactory.buildCache(namesCacheLoader, null, 
executor);
-        metaObjCache = objCacheFactory.buildCache(metaObjCacheLoader, 
removalListener, executor);
+        namesCache = namesCacheFactory.buildCache(namesCacheLoader, executor);
+        // Use sync removal listener to prevent deadlock (removal listener 
calls invalidateAll)
+        // NOTE: This cache should NOT use refreshAfterWrite, as it would 
become synchronous
+        metaObjCache = 
objCacheFactory.buildCacheWithSyncRemovalListener(metaObjCacheLoader, 
removalListener);
     }
 
     public List<String> listNames() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
index dc387e54816..45b3e4f9bb2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
@@ -60,7 +60,7 @@ public class PaimonMetadataCache {
                 Config.max_external_table_cache_num,
                 true,
                 null);
-        this.snapshotCache = snapshotCacheFactory.buildCache(key -> 
loadSnapshot(key), null, executor);
+        this.snapshotCache = snapshotCacheFactory.buildCache(key -> 
loadSnapshot(key), executor);
     }
 
     @NotNull
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
index 78ec20edb87..090dad2978e 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
@@ -124,7 +124,7 @@ public class CacheFactoryTest {
                 false,
                 ticker::read);
         LoadingCache<Integer, CacheValue> loadingCache = 
cacheFactory.buildCache(
-                key -> CacheValue.createValue("value" + key, counter), null, 
executor);
+                key -> CacheValue.createValue("value" + key, counter), 
executor);
         CacheValue value = loadingCache.get(1);
         Assertions.assertEquals("value1", value.getValue());
         Assertions.assertEquals(1, counter.get());
@@ -155,7 +155,7 @@ public class CacheFactoryTest {
                 false,
                 ticker::read);
         LoadingCache<Integer, CacheValue> loadingCache = 
cacheFactory.buildCache(
-                key -> CacheValue.createValue("value" + key, counter), null, 
executor);
+                key -> CacheValue.createValue("value" + key, counter), 
executor);
         CacheValue value = loadingCache.get(1);
         Assertions.assertEquals("value1", value.getValue());
         Assertions.assertEquals(1, counter.get());
@@ -193,7 +193,7 @@ public class CacheFactoryTest {
                 false,
                 ticker::read);
         LoadingCache<Integer, CacheValue> loadingCache = 
cacheFactory.buildCache(
-                key -> CacheValue.createValue("value" + key, counter), null, 
executor);
+                key -> CacheValue.createValue("value" + key, counter), 
executor);
         CacheValue value = loadingCache.get(1);
         Assertions.assertEquals("value1", value.getValue());
         Assertions.assertEquals(1, counter.get());
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/metacache/MetaCacheDeadlockTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/metacache/MetaCacheDeadlockTest.java
new file mode 100644
index 00000000000..26cce0ee929
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/metacache/MetaCacheDeadlockTest.java
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.metacache;
+
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.ThreadPoolManager;
+
+import com.github.benmanes.caffeine.cache.CacheLoader;
+import com.google.common.collect.Lists;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.OptionalLong;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Test to verify MetaCache does not deadlock when removal listener calls 
invalidateAll().
+ */
+public class MetaCacheDeadlockTest {
+
+    @Test
+    public void testNestedInvalidateAllWithBoundedExecutor() throws 
InterruptedException {
+        ExecutorService executor = new ThreadPoolExecutor(
+                1, 1,
+                0L, TimeUnit.MILLISECONDS,
+                new LinkedBlockingQueue<>(1),
+                new ThreadPoolManager.BlockedPolicy("TestExecutor", 200)
+        );
+
+        CacheLoader<String, List<Pair<String, String>>> namesCacheLoader = key 
-> Lists.newArrayList();
+        CacheLoader<String, Optional<String>> tableLoader = key -> 
Optional.of("table-" + key);
+
+        MetaCache<String> tableCache = new MetaCache<>(
+                "tableCache",
+                executor,
+                OptionalLong.empty(),
+                OptionalLong.empty(),
+                100,
+                namesCacheLoader,
+                tableLoader,
+                (key, value, cause) -> { });
+
+        for (int i = 0; i < 100; i++) {
+            tableCache.updateCache("table" + i, "table" + i, "table-" + i, i);
+        }
+
+        CacheLoader<String, Optional<String>> dbLoader = key -> {
+            try {
+                Thread.sleep(10);
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+            }
+            return Optional.of("db-" + key);
+        };
+
+        MetaCache<String> dbCache = new MetaCache<>(
+                "databaseCache",
+                executor,
+                OptionalLong.empty(),
+                OptionalLong.empty(),
+                1,
+                namesCacheLoader,
+                dbLoader,
+                (key, value, cause) -> tableCache.invalidateAll());
+
+        dbCache.updateCache("db0", "db0", "db-0", 0);
+
+        CountDownLatch startLatch = new CountDownLatch(1);
+        CountDownLatch doneLatch = new CountDownLatch(5);
+        for (int i = 1; i <= 5; i++) {
+            final int idx = i;
+            new Thread(() -> {
+                try {
+                    startLatch.await();
+                    dbCache.updateCache("db" + idx, "db" + idx, "db-" + idx, 
idx);
+                } catch (Exception e) {
+                    // Ignore
+                } finally {
+                    doneLatch.countDown();
+                }
+            }).start();
+        }
+
+        startLatch.countDown();
+        boolean completed = doneLatch.await(2, TimeUnit.SECONDS);
+
+        executor.shutdown();
+        boolean terminated = executor.awaitTermination(1, TimeUnit.SECONDS);
+
+        Assert.assertTrue("MetaCache deadlock detected. Ensure metaObjCache 
uses buildCacheWithSyncRemovalListener.",
+                completed && terminated);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to