This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new de1e1701f06 branch-3.1: [fix](metacache) Fix query hang caused by
Caffeine cache deadlock in external table queries #57856 (#58065)
de1e1701f06 is described below
commit de1e1701f067306a6aa38edac0b769097a0cf065
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 25 11:11:40 2025 +0800
branch-3.1: [fix](metacache) Fix query hang caused by Caffeine cache
deadlock in external table queries #57856 (#58065)
Cherry-picked from #57856
Co-authored-by: zy-kkk <[email protected]>
---
.../java/org/apache/doris/common/CacheFactory.java | 22 +++-
.../doris/datasource/ExternalSchemaCache.java | 2 +-
.../doris/datasource/hive/HiveMetaStoreCache.java | 6 +-
.../hudi/source/HudiCachedFsViewProcessor.java | 2 +-
.../hudi/source/HudiCachedMetaClientProcessor.java | 1 -
.../hudi/source/HudiCachedPartitionProcessor.java | 2 +-
.../datasource/iceberg/IcebergMetadataCache.java | 8 +-
.../doris/datasource/metacache/MetaCache.java | 6 +-
.../datasource/paimon/PaimonMetadataCache.java | 2 +-
.../org/apache/doris/common/CacheFactoryTest.java | 6 +-
.../metacache/MetaCacheDeadlockTest.java | 114 +++++++++++++++++++++
11 files changed, 153 insertions(+), 18 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
b/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
index 2b5e6968d17..674bf0aa39c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
@@ -76,7 +76,27 @@ public class CacheFactory {
// Build a loading cache, with executor, it will use given executor for
refresh
public <K, V> LoadingCache<K, V> buildCache(CacheLoader<K, V> cacheLoader,
- RemovalListener<K, V> removalListener, ExecutorService executor) {
+ ExecutorService executor) {
+ Caffeine<Object, Object> builder = buildWithParams();
+ builder.executor(executor);
+ return builder.build(cacheLoader);
+ }
+
+ // Build cache with sync removal listener to prevent deadlock when
listener calls invalidateAll()
+ public <K, V> LoadingCache<K, V>
buildCacheWithSyncRemovalListener(CacheLoader<K, V> cacheLoader,
+ RemovalListener<K, V> removalListener) {
+ Caffeine<Object, Object> builder = buildWithParams();
+ if (removalListener != null) {
+ builder.removalListener(removalListener);
+ }
+ builder.executor(Runnable::run); // Sync execution to avoid thread
pool deadlock
+ return builder.build(cacheLoader);
+ }
+
+ // Build cache with async removal listener. Use with caution if listener
may trigger nested operations
+ public <K, V> LoadingCache<K, V>
buildCacheWithAsyncRemovalListener(CacheLoader<K, V> cacheLoader,
+ RemovalListener<K, V> removalListener,
+ ExecutorService executor) {
Caffeine<Object, Object> builder = buildWithParams();
builder.executor(executor);
if (removalListener != null) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
index 85e6fe4e41f..a1c0236eeb4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
@@ -58,7 +58,7 @@ public class ExternalSchemaCache {
Config.max_external_schema_cache_num,
false,
null);
- schemaCache = schemaCacheFactory.buildCache(this::loadSchema, null,
executor);
+ schemaCache = schemaCacheFactory.buildCache(this::loadSchema,
executor);
}
private void initMetrics() {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 0c011c386ee..072de0cda94 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -148,7 +148,7 @@ public class HiveMetaStoreCache {
Config.max_hive_partition_table_cache_num,
true,
null);
- partitionValuesCache =
partitionValuesCacheFactory.buildCache(this::loadPartitionValues, null,
+ partitionValuesCache =
partitionValuesCacheFactory.buildCache(this::loadPartitionValues,
refreshExecutor);
CacheFactory partitionCacheFactory = new CacheFactory(
@@ -167,7 +167,7 @@ public class HiveMetaStoreCache {
public Map<PartitionCacheKey, HivePartition> loadAll(Iterable<?
extends PartitionCacheKey> keys) {
return loadPartitions(keys);
}
- }, null, refreshExecutor);
+ }, refreshExecutor);
setNewFileCache();
}
@@ -205,7 +205,7 @@ public class HiveMetaStoreCache {
LoadingCache<FileCacheKey, FileCacheValue> oldFileCache =
fileCacheRef.get();
- fileCacheRef.set(fileCacheFactory.buildCache(loader, null,
this.refreshExecutor));
+ fileCacheRef.set(fileCacheFactory.buildCache(loader,
this.refreshExecutor));
if (Objects.nonNull(oldFileCache)) {
oldFileCache.invalidateAll();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
index b906c6a1dd2..84dccb38574 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedFsViewProcessor.java
@@ -48,7 +48,7 @@ public class HudiCachedFsViewProcessor {
Config.max_external_table_cache_num,
true,
null);
- this.fsViewCache =
partitionCacheFactory.buildCache(this::createFsView, null, executor);
+ this.fsViewCache =
partitionCacheFactory.buildCache(this::createFsView, executor);
}
private HoodieTableFileSystemView createFsView(FsViewKey key) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
index 140eed0d5eb..bb9351d4121 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedMetaClientProcessor.java
@@ -51,7 +51,6 @@ public class HudiCachedMetaClientProcessor {
this.hudiTableMetaClientCache = partitionCacheFactory.buildCache(
this::createHoodieTableMetaClient,
- null,
executor);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 40db60eb78b..7a2824e7b06 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -63,7 +63,7 @@ public class HudiCachedPartitionProcessor extends
HudiPartitionProcessor {
Config.max_external_table_cache_num,
true,
null);
- this.partitionCache = partitionCacheFactory.buildCache(key -> new
TablePartitionValues(), null, executor);
+ this.partitionCache = partitionCacheFactory.buildCache(key -> new
TablePartitionValues(), executor);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
index d10c2d1548b..ba9374ac285 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
@@ -64,7 +64,7 @@ public class IcebergMetadataCache {
Config.max_external_table_cache_num,
true,
null);
- this.snapshotListCache = snapshotListCacheFactory.buildCache(key ->
loadSnapshots(key), null, executor);
+ this.snapshotListCache = snapshotListCacheFactory.buildCache(key ->
loadSnapshots(key), executor);
CacheFactory tableCacheFactory = new CacheFactory(
OptionalLong.of(Config.external_cache_expire_time_seconds_after_access),
@@ -72,7 +72,7 @@ public class IcebergMetadataCache {
Config.max_external_table_cache_num,
true,
null);
- this.tableCache = tableCacheFactory.buildCache(key -> loadTable(key),
null, executor);
+ this.tableCache = tableCacheFactory.buildCache(key -> loadTable(key),
executor);
CacheFactory snapshotCacheFactory = new CacheFactory(
OptionalLong.of(Config.external_cache_expire_time_seconds_after_access),
@@ -80,8 +80,8 @@ public class IcebergMetadataCache {
Config.max_external_table_cache_num,
true,
null);
- this.snapshotCache = snapshotCacheFactory.buildCache(key ->
loadSnapshot(key), null, executor);
- this.viewCache = tableCacheFactory.buildCache(key -> loadView(key),
null, executor);
+ this.snapshotCache = snapshotCacheFactory.buildCache(key ->
loadSnapshot(key), executor);
+ this.viewCache = tableCacheFactory.buildCache(key -> loadView(key),
executor);
}
public Table getIcebergTable(ExternalTable dorisTable) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
index a98dbc132e6..d5dfa0c8bb9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java
@@ -74,8 +74,10 @@ public class MetaCache<T> {
maxSize,
true,
null);
- namesCache = namesCacheFactory.buildCache(namesCacheLoader, null,
executor);
- metaObjCache = objCacheFactory.buildCache(metaObjCacheLoader,
removalListener, executor);
+ namesCache = namesCacheFactory.buildCache(namesCacheLoader, executor);
+ // Use sync removal listener to prevent deadlock (removal listener
calls invalidateAll)
+ // NOTE: This cache should NOT use refreshAfterWrite, as it would
become synchronous
+ metaObjCache =
objCacheFactory.buildCacheWithSyncRemovalListener(metaObjCacheLoader,
removalListener);
}
public List<String> listNames() {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
index dc387e54816..45b3e4f9bb2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java
@@ -60,7 +60,7 @@ public class PaimonMetadataCache {
Config.max_external_table_cache_num,
true,
null);
- this.snapshotCache = snapshotCacheFactory.buildCache(key ->
loadSnapshot(key), null, executor);
+ this.snapshotCache = snapshotCacheFactory.buildCache(key ->
loadSnapshot(key), executor);
}
@NotNull
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
index 78ec20edb87..090dad2978e 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/CacheFactoryTest.java
@@ -124,7 +124,7 @@ public class CacheFactoryTest {
false,
ticker::read);
LoadingCache<Integer, CacheValue> loadingCache =
cacheFactory.buildCache(
- key -> CacheValue.createValue("value" + key, counter), null,
executor);
+ key -> CacheValue.createValue("value" + key, counter),
executor);
CacheValue value = loadingCache.get(1);
Assertions.assertEquals("value1", value.getValue());
Assertions.assertEquals(1, counter.get());
@@ -155,7 +155,7 @@ public class CacheFactoryTest {
false,
ticker::read);
LoadingCache<Integer, CacheValue> loadingCache =
cacheFactory.buildCache(
- key -> CacheValue.createValue("value" + key, counter), null,
executor);
+ key -> CacheValue.createValue("value" + key, counter),
executor);
CacheValue value = loadingCache.get(1);
Assertions.assertEquals("value1", value.getValue());
Assertions.assertEquals(1, counter.get());
@@ -193,7 +193,7 @@ public class CacheFactoryTest {
false,
ticker::read);
LoadingCache<Integer, CacheValue> loadingCache =
cacheFactory.buildCache(
- key -> CacheValue.createValue("value" + key, counter), null,
executor);
+ key -> CacheValue.createValue("value" + key, counter),
executor);
CacheValue value = loadingCache.get(1);
Assertions.assertEquals("value1", value.getValue());
Assertions.assertEquals(1, counter.get());
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/metacache/MetaCacheDeadlockTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/metacache/MetaCacheDeadlockTest.java
new file mode 100644
index 00000000000..26cce0ee929
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/metacache/MetaCacheDeadlockTest.java
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.metacache;
+
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.ThreadPoolManager;
+
+import com.github.benmanes.caffeine.cache.CacheLoader;
+import com.google.common.collect.Lists;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.OptionalLong;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Test to verify MetaCache does not deadlock when removal listener calls
invalidateAll().
+ */
+public class MetaCacheDeadlockTest {
+
+ @Test
+ public void testNestedInvalidateAllWithBoundedExecutor() throws
InterruptedException {
+ ExecutorService executor = new ThreadPoolExecutor(
+ 1, 1,
+ 0L, TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue<>(1),
+ new ThreadPoolManager.BlockedPolicy("TestExecutor", 200)
+ );
+
+ CacheLoader<String, List<Pair<String, String>>> namesCacheLoader = key
-> Lists.newArrayList();
+ CacheLoader<String, Optional<String>> tableLoader = key ->
Optional.of("table-" + key);
+
+ MetaCache<String> tableCache = new MetaCache<>(
+ "tableCache",
+ executor,
+ OptionalLong.empty(),
+ OptionalLong.empty(),
+ 100,
+ namesCacheLoader,
+ tableLoader,
+ (key, value, cause) -> { });
+
+ for (int i = 0; i < 100; i++) {
+ tableCache.updateCache("table" + i, "table" + i, "table-" + i, i);
+ }
+
+ CacheLoader<String, Optional<String>> dbLoader = key -> {
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ return Optional.of("db-" + key);
+ };
+
+ MetaCache<String> dbCache = new MetaCache<>(
+ "databaseCache",
+ executor,
+ OptionalLong.empty(),
+ OptionalLong.empty(),
+ 1,
+ namesCacheLoader,
+ dbLoader,
+ (key, value, cause) -> tableCache.invalidateAll());
+
+ dbCache.updateCache("db0", "db0", "db-0", 0);
+
+ CountDownLatch startLatch = new CountDownLatch(1);
+ CountDownLatch doneLatch = new CountDownLatch(5);
+ for (int i = 1; i <= 5; i++) {
+ final int idx = i;
+ new Thread(() -> {
+ try {
+ startLatch.await();
+ dbCache.updateCache("db" + idx, "db" + idx, "db-" + idx,
idx);
+ } catch (Exception e) {
+ // Ignore
+ } finally {
+ doneLatch.countDown();
+ }
+ }).start();
+ }
+
+ startLatch.countDown();
+ boolean completed = doneLatch.await(2, TimeUnit.SECONDS);
+
+ executor.shutdown();
+ boolean terminated = executor.awaitTermination(1, TimeUnit.SECONDS);
+
+ Assert.assertTrue("MetaCache deadlock detected. Ensure metaObjCache
uses buildCacheWithSyncRemovalListener.",
+ completed && terminated);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]