This is an automated email from the ASF dual-hosted git repository.

yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 55554c6399 [#9148] improvement(core): Clear useless data in the 
reversed index for entity cache. (#9162)
55554c6399 is described below

commit 55554c6399aa238a2d62e854b8be36d8a47b2777
Author: Mini Yu <[email protected]>
AuthorDate: Tue Nov 25 20:47:22 2025 +0800

    [#9148] improvement(core): Clear useless data in the reversed index for 
entity cache. (#9162)
    
    ### What changes were proposed in this pull request?
    
    Clear the entry in the reverse index that has been removed from cache
    data.
    
    ### Why are the changes needed?
    
    We need to clear all out-of-date data in the cache.
    
    Fix: #9148
    
    ### Does this PR introduce _any_ user-facing change?
    
    N/A.
    
    ### How was this patch tested?
    
    UTs
---
 .../gravitino/cache/CaffeineEntityCache.java       |  7 ++-
 .../apache/gravitino/cache/ReverseIndexCache.java  | 52 ++++++++++++++++++----
 .../gravitino/storage/TestEntityStorage.java       | 45 +++++++++++++++++++
 3 files changed, 94 insertions(+), 10 deletions(-)

diff --git 
a/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java 
b/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
index 1ecc27dc4f..316d4a3e01 100644
--- a/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
+++ b/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
@@ -94,6 +94,11 @@ public class CaffeineEntityCache extends BaseEntityCache {
 
   private ScheduledExecutorService scheduler;
 
+  @VisibleForTesting
+  public ReverseIndexCache getReverseIndex() {
+    return reverseIndex;
+  }
+
   private static final Set<SupportsRelationOperations.Type> RELATION_TYPES =
       Sets.newHashSet(
           SupportsRelationOperations.Type.METADATA_OBJECT_ROLE_REL,
@@ -476,7 +481,7 @@ public class CaffeineEntityCache extends BaseEntityCache {
                     k ->
                         reverseIndex
                             .getValuesForKeysStartingWith(k.toString())
-                            .forEach(rsk -> 
reverseIndex.remove(rsk.toString())));
+                            .forEach(rsk -> rsk.forEach(v -> 
reverseIndex.remove(v))));
           });
 
       reverseIndex.remove(currentKeyToRemove);
diff --git 
a/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java 
b/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
index 16c0c87532..16870d0411 100644
--- a/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
+++ b/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
@@ -20,12 +20,14 @@ package org.apache.gravitino.cache;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.googlecode.concurrenttrees.radix.ConcurrentRadixTree;
 import com.googlecode.concurrenttrees.radix.RadixTree;
 import 
com.googlecode.concurrenttrees.radix.node.concrete.DefaultCharArrayNodeFactory;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.gravitino.Entity;
 import org.apache.gravitino.HasIdentifier;
 import org.apache.gravitino.NameIdentifier;
@@ -45,6 +47,29 @@ public class ReverseIndexCache {
   /** Registers a reverse index processor for a specific entity class. */
   private final Map<Class<? extends Entity>, ReverseIndexRule> 
reverseIndexRules = new HashMap<>();
 
+  /**
+   * Map from data entity key to a list of entity cache relation keys. This is 
used for reverse
+   * indexing.
+   *
+   * <p>For example, a role entity may be related to multiple securable 
objects, so we need to
+   * maintain a mapping from the role entity key to the list of securable 
object keys. That is
+   * entityToReverseIndexMap: roleEntityKey -> [securableObjectKey1, 
securableObjectKey2, ...]
+   *
+   * <p>This map is used to quickly find all the related entity cache keys 
when we need to
+   * invalidate in the reverse index if a role entity is updated. The 
following is an example: a
+   * Role a has securable objects s1 and s2, so we have the following mapping: 
<br>
+   * cacheData: role1 -> role entity </br> <br>
+   * reverseIndex: s1 -> [role1], s2 -> [role1] </br>
+   *
+   * <p>This map will be: <br>
+   * role1 -> [s1, s2] </br>
+   *
+   * <p>When we update role1, we need to invalidate s1 and s2 from the reverse 
index via this map,
+   * or the data will be in the memory forever.
+   */
+  private final Map<EntityCacheKey, List<EntityCacheKey>> 
entityToReverseIndexMap =
+      Maps.newHashMap();
+
   public ReverseIndexCache() {
     this.reverseIndex = new ConcurrentRadixTree<>(new 
DefaultCharArrayNodeFactory());
 
@@ -57,20 +82,28 @@ public class ReverseIndexCache {
         GenericEntity.class, 
ReverseIndexRules.GENERIC_METADATA_OBJECT_REVERSE_RULE);
   }
 
-  public boolean remove(EntityCacheKey key) {
-    return reverseIndex.remove(key.toString());
-  }
-
   public Iterable<List<EntityCacheKey>> getValuesForKeysStartingWith(String 
keyPrefix) {
     return reverseIndex.getValuesForKeysStartingWith(keyPrefix);
   }
 
-  public Iterable<CharSequence> getKeysStartingWith(String keyPrefix) {
-    return reverseIndex.getKeysStartingWith(keyPrefix);
-  }
+  public boolean remove(EntityCacheKey key) {
+    List<EntityCacheKey> relatedKeys = entityToReverseIndexMap.remove(key);
+    if (CollectionUtils.isNotEmpty(relatedKeys)) {
+      for (EntityCacheKey relatedKey : relatedKeys) {
+        List<EntityCacheKey> existingKeys = 
reverseIndex.getValueForExactKey(relatedKey.toString());
+        if (existingKeys != null && existingKeys.contains(key)) {
+          List<EntityCacheKey> newValues = Lists.newArrayList(existingKeys);
+          newValues.remove(key);
+          if (newValues.isEmpty()) {
+            reverseIndex.remove(relatedKey.toString());
+          } else {
+            reverseIndex.put(relatedKey.toString(), newValues);
+          }
+        }
+      }
+    }
 
-  public boolean remove(String key) {
-    return reverseIndex.remove(key);
+    return reverseIndex.remove(key.toString());
   }
 
   public int size() {
@@ -80,6 +113,7 @@ public class ReverseIndexCache {
   public void put(
       NameIdentifier nameIdentifier, Entity.EntityType type, 
EntityCacheRelationKey key) {
     EntityCacheKey entityCacheKey = EntityCacheKey.of(nameIdentifier, type);
+    entityToReverseIndexMap.computeIfAbsent(key, k -> 
Lists.newArrayList()).add(entityCacheKey);
 
     List<EntityCacheKey> existingKeys = 
reverseIndex.getValueForExactKey(entityCacheKey.toString());
     if (existingKeys == null) {
diff --git 
a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java 
b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
index d9d0a23338..458a58f18c 100644
--- a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
+++ b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
@@ -53,6 +53,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
+import java.util.stream.Collectors;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.reflect.FieldUtils;
@@ -74,7 +75,9 @@ import org.apache.gravitino.authorization.Role;
 import org.apache.gravitino.authorization.SecurableObject;
 import org.apache.gravitino.authorization.SecurableObjects;
 import org.apache.gravitino.cache.CaffeineEntityCache;
+import org.apache.gravitino.cache.EntityCacheKey;
 import org.apache.gravitino.cache.EntityCacheRelationKey;
+import org.apache.gravitino.cache.ReverseIndexCache;
 import org.apache.gravitino.exceptions.NoSuchEntityException;
 import org.apache.gravitino.exceptions.NonEmptyEntityException;
 import org.apache.gravitino.file.Fileset;
@@ -2718,6 +2721,48 @@ public class TestEntityStorage {
           Condition.ALLOW,
           
loadedWriteRole.securableObjects().get(0).privileges().get(0).condition());
 
+      // Now try to drop and then recreate the role
+      store.delete(readRole.nameIdentifier(), Entity.EntityType.ROLE);
+
+      ReverseIndexCache reverseIndexCache =
+          ((CaffeineEntityCache) ((RelationalEntityStore) 
store).getCache()).getReverseIndex();
+      List<EntityCacheKey> reverseIndexValue =
+          reverseIndexCache.get(
+              NameIdentifier.of("metalake", "newCatalogName", "schema", 
"fileset"),
+              Entity.EntityType.FILESET);
+      // As read role is deleted, the reverse index cache should not have it 
anymore.
+      Assertions.assertEquals(1, reverseIndexValue.size());
+      Assertions.assertEquals(writeRole.nameIdentifier(), 
reverseIndexValue.get(0).identifier());
+
+      store.put(readRole, true);
+      store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE, 
RoleEntity.class);
+      reverseIndexValue =
+          reverseIndexCache.get(
+              NameIdentifier.of("metalake", "newCatalogName", "schema", 
"fileset"),
+              Entity.EntityType.FILESET);
+      // As read role is recreated, the reverse index cache should have it 
again.
+      Assertions.assertEquals(2, reverseIndexValue.size());
+      List<NameIdentifier> ids =
+          
reverseIndexValue.stream().map(EntityCacheKey::identifier).collect(Collectors.toList());
+      Assertions.assertTrue(ids.contains(readRole.nameIdentifier()));
+      Assertions.assertTrue(ids.contains(writeRole.nameIdentifier()));
+
+      // Drop role1 and role2
+      store.delete(readRole.nameIdentifier(), Entity.EntityType.ROLE);
+      store.delete(writeRole.nameIdentifier(), Entity.EntityType.ROLE);
+
+      reverseIndexValue =
+          reverseIndexCache.get(
+              NameIdentifier.of("metalake", "newCatalogName", "schema", 
"fileset"),
+              Entity.EntityType.FILESET);
+      // As both roles are deleted, the reverse index cache should not have 
them anymore.
+      Assertions.assertNull(reverseIndexValue);
+
+      store.put(readRole, true);
+      store.put(writeRole, true);
+      store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE, 
RoleEntity.class);
+      store.get(writeRole.nameIdentifier(), Entity.EntityType.ROLE, 
RoleEntity.class);
+
       // first try to rename the fileset to fileset_new
       store.update(
           fileset.nameIdentifier(),

Reply via email to