This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 55554c6399 [#9148] improvement(core): Clear useless data in the
reversed index for entity cache. (#9162)
55554c6399 is described below
commit 55554c6399aa238a2d62e854b8be36d8a47b2777
Author: Mini Yu <[email protected]>
AuthorDate: Tue Nov 25 20:47:22 2025 +0800
[#9148] improvement(core): Clear useless data in the reversed index for
entity cache. (#9162)
### What changes were proposed in this pull request?
Clear the entry in the reverse index that has been removed from cache
data.
### Why are the changes needed?
We need to clear all out-of-date data in the cache.
Fix: #9148
### Does this PR introduce _any_ user-facing change?
N/A.
### How was this patch tested?
UTs
---
.../gravitino/cache/CaffeineEntityCache.java | 7 ++-
.../apache/gravitino/cache/ReverseIndexCache.java | 52 ++++++++++++++++++----
.../gravitino/storage/TestEntityStorage.java | 45 +++++++++++++++++++
3 files changed, 94 insertions(+), 10 deletions(-)
diff --git
a/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
b/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
index 1ecc27dc4f..316d4a3e01 100644
--- a/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
+++ b/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
@@ -94,6 +94,11 @@ public class CaffeineEntityCache extends BaseEntityCache {
private ScheduledExecutorService scheduler;
+ @VisibleForTesting
+ public ReverseIndexCache getReverseIndex() {
+ return reverseIndex;
+ }
+
private static final Set<SupportsRelationOperations.Type> RELATION_TYPES =
Sets.newHashSet(
SupportsRelationOperations.Type.METADATA_OBJECT_ROLE_REL,
@@ -476,7 +481,7 @@ public class CaffeineEntityCache extends BaseEntityCache {
k ->
reverseIndex
.getValuesForKeysStartingWith(k.toString())
- .forEach(rsk ->
reverseIndex.remove(rsk.toString())));
+ .forEach(rsk -> rsk.forEach(v ->
reverseIndex.remove(v))));
});
reverseIndex.remove(currentKeyToRemove);
diff --git
a/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
b/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
index 16c0c87532..16870d0411 100644
--- a/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
+++ b/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
@@ -20,12 +20,14 @@ package org.apache.gravitino.cache;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import com.googlecode.concurrenttrees.radix.ConcurrentRadixTree;
import com.googlecode.concurrenttrees.radix.RadixTree;
import
com.googlecode.concurrenttrees.radix.node.concrete.DefaultCharArrayNodeFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.gravitino.Entity;
import org.apache.gravitino.HasIdentifier;
import org.apache.gravitino.NameIdentifier;
@@ -45,6 +47,29 @@ public class ReverseIndexCache {
/** Registers a reverse index processor for a specific entity class. */
private final Map<Class<? extends Entity>, ReverseIndexRule>
reverseIndexRules = new HashMap<>();
+ /**
+ * Map from data entity key to a list of entity cache relation keys. This is
used for reverse
+ * indexing.
+ *
+ * <p>For example, a role entity may be related to multiple securable
objects, so we need to
+ * maintain a mapping from the role entity key to the list of securable
object keys. That is
+ * entityToReverseIndexMap: roleEntityKey -> [securableObjectKey1,
securableObjectKey2, ...]
+ *
+ * <p>This map is used to quickly find all the related entity cache keys
when we need to
+ * invalidate in the reverse index if a role entity is updated. The
following is an example: a
+ * Role a has securable objects s1 and s2, so we have the following mapping:
<br>
+ * cacheData: role1 -> role entity </br> <br>
+ * reverseIndex: s1 -> [role1], s2 -> [role1] </br>
+ *
+ * <p>This map will be: <br>
+ * role1 -> [s1, s2] </br>
+ *
+ * <p>When we update role1, we need to invalidate s1 and s2 from the reverse
index via this map,
+ * or the data will be in the memory forever.
+ */
+ private final Map<EntityCacheKey, List<EntityCacheKey>>
entityToReverseIndexMap =
+ Maps.newHashMap();
+
public ReverseIndexCache() {
this.reverseIndex = new ConcurrentRadixTree<>(new
DefaultCharArrayNodeFactory());
@@ -57,20 +82,28 @@ public class ReverseIndexCache {
GenericEntity.class,
ReverseIndexRules.GENERIC_METADATA_OBJECT_REVERSE_RULE);
}
- public boolean remove(EntityCacheKey key) {
- return reverseIndex.remove(key.toString());
- }
-
public Iterable<List<EntityCacheKey>> getValuesForKeysStartingWith(String
keyPrefix) {
return reverseIndex.getValuesForKeysStartingWith(keyPrefix);
}
- public Iterable<CharSequence> getKeysStartingWith(String keyPrefix) {
- return reverseIndex.getKeysStartingWith(keyPrefix);
- }
+ public boolean remove(EntityCacheKey key) {
+ List<EntityCacheKey> relatedKeys = entityToReverseIndexMap.remove(key);
+ if (CollectionUtils.isNotEmpty(relatedKeys)) {
+ for (EntityCacheKey relatedKey : relatedKeys) {
+ List<EntityCacheKey> existingKeys =
reverseIndex.getValueForExactKey(relatedKey.toString());
+ if (existingKeys != null && existingKeys.contains(key)) {
+ List<EntityCacheKey> newValues = Lists.newArrayList(existingKeys);
+ newValues.remove(key);
+ if (newValues.isEmpty()) {
+ reverseIndex.remove(relatedKey.toString());
+ } else {
+ reverseIndex.put(relatedKey.toString(), newValues);
+ }
+ }
+ }
+ }
- public boolean remove(String key) {
- return reverseIndex.remove(key);
+ return reverseIndex.remove(key.toString());
}
public int size() {
@@ -80,6 +113,7 @@ public class ReverseIndexCache {
public void put(
NameIdentifier nameIdentifier, Entity.EntityType type,
EntityCacheRelationKey key) {
EntityCacheKey entityCacheKey = EntityCacheKey.of(nameIdentifier, type);
+ entityToReverseIndexMap.computeIfAbsent(key, k ->
Lists.newArrayList()).add(entityCacheKey);
List<EntityCacheKey> existingKeys =
reverseIndex.getValueForExactKey(entityCacheKey.toString());
if (existingKeys == null) {
diff --git
a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
index d9d0a23338..458a58f18c 100644
--- a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
+++ b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
@@ -53,6 +53,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
+import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.reflect.FieldUtils;
@@ -74,7 +75,9 @@ import org.apache.gravitino.authorization.Role;
import org.apache.gravitino.authorization.SecurableObject;
import org.apache.gravitino.authorization.SecurableObjects;
import org.apache.gravitino.cache.CaffeineEntityCache;
+import org.apache.gravitino.cache.EntityCacheKey;
import org.apache.gravitino.cache.EntityCacheRelationKey;
+import org.apache.gravitino.cache.ReverseIndexCache;
import org.apache.gravitino.exceptions.NoSuchEntityException;
import org.apache.gravitino.exceptions.NonEmptyEntityException;
import org.apache.gravitino.file.Fileset;
@@ -2718,6 +2721,48 @@ public class TestEntityStorage {
Condition.ALLOW,
loadedWriteRole.securableObjects().get(0).privileges().get(0).condition());
+ // Now try to drop and then recreate the role
+ store.delete(readRole.nameIdentifier(), Entity.EntityType.ROLE);
+
+ ReverseIndexCache reverseIndexCache =
+ ((CaffeineEntityCache) ((RelationalEntityStore)
store).getCache()).getReverseIndex();
+ List<EntityCacheKey> reverseIndexValue =
+ reverseIndexCache.get(
+ NameIdentifier.of("metalake", "newCatalogName", "schema",
"fileset"),
+ Entity.EntityType.FILESET);
+ // As read role is deleted, the reverse index cache should not have it
anymore.
+ Assertions.assertEquals(1, reverseIndexValue.size());
+ Assertions.assertEquals(writeRole.nameIdentifier(),
reverseIndexValue.get(0).identifier());
+
+ store.put(readRole, true);
+ store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ reverseIndexValue =
+ reverseIndexCache.get(
+ NameIdentifier.of("metalake", "newCatalogName", "schema",
"fileset"),
+ Entity.EntityType.FILESET);
+ // As read role is recreated, the reverse index cache should have it
again.
+ Assertions.assertEquals(2, reverseIndexValue.size());
+ List<NameIdentifier> ids =
+
reverseIndexValue.stream().map(EntityCacheKey::identifier).collect(Collectors.toList());
+ Assertions.assertTrue(ids.contains(readRole.nameIdentifier()));
+ Assertions.assertTrue(ids.contains(writeRole.nameIdentifier()));
+
+ // Drop role1 and role2
+ store.delete(readRole.nameIdentifier(), Entity.EntityType.ROLE);
+ store.delete(writeRole.nameIdentifier(), Entity.EntityType.ROLE);
+
+ reverseIndexValue =
+ reverseIndexCache.get(
+ NameIdentifier.of("metalake", "newCatalogName", "schema",
"fileset"),
+ Entity.EntityType.FILESET);
+ // As both roles are deleted, the reverse index cache should not have
them anymore.
+ Assertions.assertNull(reverseIndexValue);
+
+ store.put(readRole, true);
+ store.put(writeRole, true);
+ store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ store.get(writeRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+
// first try to rename the fileset to fileset_new
store.update(
fileset.nameIdentifier(),