This is an automated email from the ASF dual-hosted git repository.
mchades pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 4b4a1916c2 [#8824] fix(core): Fix bugs in the logic about reverse
index in entity store Caffeine cache (#9068)
4b4a1916c2 is described below
commit 4b4a1916c2a15e5702df88ffa0264bdeeb1b3ac4
Author: Mini Yu <[email protected]>
AuthorDate: Wed Nov 12 17:35:32 2025 +0800
[#8824] fix(core): Fix bugs in the logic about reverse index in entity
store Caffeine cache (#9068)
### What changes were proposed in this pull request?
Use List<Key> NOT Key in the reversed index.
### Why are the changes needed?
Currently, the reversed index for the Caffine cache is as followings.
Assuming role1 has a securable object table1, then the data in the
reversed index will be
```text
KEY VALUE
table1 role1
```
When role2 also has a securable object table1, the index data will
overwrite that of role1 and the final data will be
```text
KEY VALUE
table1 role2
```
When table1 is deleted, Gravitino will only invalidate the data of
role2, and role1 will still hold the out-of-date data.
Fix: #8824
Fix: #8817
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
UTs
---
.../gravitino/cache/CaffeineEntityCache.java | 37 +++--
.../apache/gravitino/cache/ReverseIndexCache.java | 36 ++++-
.../gravitino/storage/TestEntityStorage.java | 150 +++++++++++++++++++++
3 files changed, 208 insertions(+), 15 deletions(-)
diff --git
a/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
b/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
index af014e1380..ceb1a3ef29 100644
--- a/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
+++ b/core/src/main/java/org/apache/gravitino/cache/CaffeineEntityCache.java
@@ -366,21 +366,30 @@ public class CaffeineEntityCache extends BaseEntityCache {
Optional<SupportsRelationOperations.Type> relTypeOpt) {
Queue<EntityCacheKey> queue = new ArrayDeque<>();
- EntityCacheKey valueForExactKey =
+ EntityCacheRelationKey valueForExactKey =
cacheIndex.getValueForExactKey(
relTypeOpt.isEmpty()
- ? EntityCacheKey.of(identifier, type).toString()
+ ? EntityCacheRelationKey.of(identifier, type).toString()
: EntityCacheRelationKey.of(identifier, type,
relTypeOpt.get()).toString());
if (valueForExactKey == null) {
- // No key to remove
- return false;
+ // It means the key does not exist in the cache. However, we still need
to handle some cases.
+ // For example, we have stored a role entity in the cache and entity to
role mapping in the
+ // reverse index. This is: cache data: role identifier -> role entity,
reverse index:
+ // the securable object -> role. When we update the securable object, we
need to invalidate
+ // the
+ // role entity from the cache though the securable object is not in the
cache data.
+ valueForExactKey = EntityCacheRelationKey.of(identifier, type,
relTypeOpt.orElse(null));
}
+ Set<EntityCacheKey> visited = Sets.newHashSet();
queue.offer(valueForExactKey);
-
while (!queue.isEmpty()) {
EntityCacheKey currentKeyToRemove = queue.poll();
+ if (visited.contains(currentKeyToRemove)) {
+ continue;
+ }
+ visited.add(currentKeyToRemove);
cacheData.invalidate(currentKeyToRemove);
cacheIndex.remove(currentKeyToRemove.toString());
@@ -392,7 +401,7 @@ public class CaffeineEntityCache extends BaseEntityCache {
queue.addAll(relatedEntityKeysToRemove);
// Look up from reverse index to go to next depth
- List<EntityCacheKey> reverseKeysToRemove =
+ List<List<EntityCacheKey>> reverseKeysToRemove =
Lists.newArrayList(
reverseIndex.getValuesForKeysStartingWith(
currentKeyToRemove.identifier().toString()));
@@ -400,15 +409,19 @@ public class CaffeineEntityCache extends BaseEntityCache {
key -> {
// Remove from reverse index
// Convert EntityCacheRelationKey to EntityCacheKey
- reverseIndex
- .getKeysStartingWith(key.toString())
+ key.stream()
.forEach(
- reverseIndexKey -> {
- reverseIndex.remove(reverseIndexKey.toString());
- });
+ k ->
+ reverseIndex
+ .getValuesForKeysStartingWith(k.toString())
+ .forEach(rsk ->
reverseIndex.remove(rsk.toString())));
});
- queue.addAll(reverseKeysToRemove);
+ reverseIndex.remove(currentKeyToRemove);
+ Set<EntityCacheKey> toAdd =
+ Sets.newHashSet(
+
reverseKeysToRemove.stream().flatMap(List::stream).collect(Collectors.toList()));
+ queue.addAll(toAdd);
}
return true;
diff --git
a/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
b/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
index 471da0915b..57ca31c974 100644
--- a/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
+++ b/core/src/main/java/org/apache/gravitino/cache/ReverseIndexCache.java
@@ -19,10 +19,12 @@
package org.apache.gravitino.cache;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
import com.googlecode.concurrenttrees.radix.ConcurrentRadixTree;
import com.googlecode.concurrenttrees.radix.RadixTree;
import
com.googlecode.concurrenttrees.radix.node.concrete.DefaultCharArrayNodeFactory;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.gravitino.Entity;
import org.apache.gravitino.HasIdentifier;
@@ -36,7 +38,7 @@ import org.apache.gravitino.meta.UserEntity;
* efficiently store and retrieve relationships between entities based on
their keys.
*/
public class ReverseIndexCache {
- private RadixTree<EntityCacheKey> reverseIndex;
+ private RadixTree<List<EntityCacheKey>> reverseIndex;
/** Registers a reverse index processor for a specific entity class. */
private final Map<Class<? extends Entity>, ReverseIndexRule>
reverseIndexRules = new HashMap<>();
@@ -52,7 +54,7 @@ public class ReverseIndexCache {
return reverseIndex.remove(key.toString());
}
- public Iterable<EntityCacheKey> getValuesForKeysStartingWith(String
keyPrefix) {
+ public Iterable<List<EntityCacheKey>> getValuesForKeysStartingWith(String
keyPrefix) {
return reverseIndex.getValuesForKeysStartingWith(keyPrefix);
}
@@ -71,7 +73,23 @@ public class ReverseIndexCache {
public void put(
NameIdentifier nameIdentifier, Entity.EntityType type,
EntityCacheRelationKey key) {
EntityCacheKey entityCacheKey = EntityCacheKey.of(nameIdentifier, type);
- reverseIndex.put(entityCacheKey.toString(), key);
+ List<EntityCacheKey> existingKeys =
reverseIndex.getValueForExactKey(entityCacheKey.toString());
+ if (existingKeys == null) {
+ reverseIndex.put(entityCacheKey.toString(), List.of(key));
+ } else {
+ if (existingKeys.contains(key)) {
+ return;
+ }
+
+ List<EntityCacheKey> newValues = Lists.newArrayList(existingKeys);
+ newValues.add(key);
+ reverseIndex.put(entityCacheKey.toString(), newValues);
+ }
+ }
+
+ public List<EntityCacheKey> get(NameIdentifier nameIdentifier,
Entity.EntityType type) {
+ EntityCacheKey entityCacheKey = EntityCacheKey.of(nameIdentifier, type);
+ return reverseIndex.getValueForExactKey(entityCacheKey.toString());
}
public void put(Entity entity, EntityCacheRelationKey key) {
@@ -100,4 +118,16 @@ public class ReverseIndexCache {
interface ReverseIndexRule {
void indexEntity(Entity entity, EntityCacheRelationKey key,
ReverseIndexCache cache);
}
+
+ @Override
+ public String toString() {
+ Iterable<CharSequence> keys = reverseIndex.getKeysStartingWith("");
+ StringBuilder sb = new StringBuilder();
+ for (CharSequence key : keys) {
+ sb.append(key).append(" ->
").append(reverseIndex.getValueForExactKey(key.toString()));
+ sb.append("\n");
+ }
+
+ return sb.toString();
+ }
}
diff --git
a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
index 0e8298a2a0..143e979f69 100644
--- a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
+++ b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java
@@ -34,6 +34,7 @@ import static
org.apache.gravitino.Configs.STORE_DELETE_AFTER_TIME;
import static org.apache.gravitino.Configs.VERSION_RETENTION_COUNT;
import static org.apache.gravitino.file.Fileset.LOCATION_NAME_UNKNOWN;
import static
org.apache.gravitino.storage.relational.TestJDBCBackend.createRoleEntity;
+import static
org.apache.gravitino.storage.relational.TestJDBCBackend.createUserEntity;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@@ -65,6 +66,7 @@ import org.apache.gravitino.EntityStoreFactory;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.Namespace;
import org.apache.gravitino.authorization.AuthorizationUtils;
+import org.apache.gravitino.authorization.Privilege.Condition;
import org.apache.gravitino.authorization.Privileges;
import org.apache.gravitino.authorization.Role;
import org.apache.gravitino.authorization.SecurableObject;
@@ -2639,6 +2641,154 @@ public class TestEntityStorage {
List<SecurableObject> securableObjects = newRow.securableObjects();
Assertions.assertEquals(1, securableObjects.size());
Assertions.assertEquals("newCatalogName",
securableObjects.get(0).name());
+
+ // Now try to create a schema and a fileset under the updated catalog
+ SchemaEntity schema =
+ createSchemaEntity(
+ RandomIdGenerator.INSTANCE.nextId(),
+ Namespace.of("metalake", "newCatalogName"),
+ "schema",
+ auditInfo);
+
+ store.put(schema, false);
+ FilesetEntity fileset =
+ createFilesetEntity(
+ RandomIdGenerator.INSTANCE.nextId(),
+ Namespace.of("metalake", "newCatalogName", "schema"),
+ "fileset",
+ auditInfo);
+ store.put(fileset, false);
+
+ // Now try to create two roles: one can read_fileset, another can
write_fileset
+ SecurableObject catalogObject =
+ SecurableObjects.ofCatalog("newCatalogName", Lists.newArrayList());
+ SecurableObject schemaObject =
+ SecurableObjects.ofSchema(catalogObject, "schema",
Lists.newArrayList());
+ SecurableObject securableFileset01 =
+ SecurableObjects.ofFileset(
+ schemaObject, "fileset",
Lists.newArrayList(Privileges.ReadFileset.allow()));
+ SecurableObject securableFileset02 =
+ SecurableObjects.ofFileset(
+ schemaObject, "fileset",
Lists.newArrayList(Privileges.WriteFileset.allow()));
+
+ RoleEntity readRole =
+ RoleEntity.builder()
+ .withId(RandomIdGenerator.INSTANCE.nextId())
+ .withName("roleReadFileset")
+ .withNamespace(AuthorizationUtils.ofRoleNamespace("metalake"))
+ .withProperties(null)
+ .withAuditInfo(auditInfo)
+ .withSecurableObjects(Lists.newArrayList(securableFileset01))
+ .build();
+ store.put(readRole, false);
+
+ RoleEntity writeRole =
+ RoleEntity.builder()
+ .withId(RandomIdGenerator.INSTANCE.nextId())
+ .withName("roleWriteFileset")
+ .withNamespace(AuthorizationUtils.ofRoleNamespace("metalake"))
+ .withProperties(null)
+ .withAuditInfo(auditInfo)
+ .withSecurableObjects(Lists.newArrayList(securableFileset02))
+ .build();
+ store.put(writeRole, false);
+
+ // Load the two roles and verify their securable objects
+ Role loadedReadRole =
+ store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ Role loadedWriteRole =
+ store.get(writeRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ Assertions.assertEquals(1, loadedReadRole.securableObjects().size());
+ Assertions.assertEquals(
+ "newCatalogName.schema.fileset",
loadedReadRole.securableObjects().get(0).fullName());
+ Assertions.assertEquals(1, loadedWriteRole.securableObjects().size());
+ Assertions.assertEquals(
+ "newCatalogName.schema.fileset",
loadedReadRole.securableObjects().get(0).fullName());
+ Assertions.assertEquals(
+ Condition.ALLOW,
+
loadedReadRole.securableObjects().get(0).privileges().get(0).condition());
+ Assertions.assertEquals(
+ Condition.ALLOW,
+
loadedWriteRole.securableObjects().get(0).privileges().get(0).condition());
+
+ // first try to rename the fileset to fileset_new
+ store.update(
+ fileset.nameIdentifier(),
+ FilesetEntity.class,
+ Entity.EntityType.FILESET,
+ e ->
+ createFilesetEntity(
+ fileset.id(), fileset.namespace(), "fileset_new",
fileset.auditInfo()));
+
+ // try to load the two roles again, the securable objects should reflect
the updated fileset
+ // name
+ loadedReadRole =
+ store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ loadedWriteRole =
+ store.get(writeRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+
+ Assertions.assertEquals(1, loadedReadRole.securableObjects().size());
+ Assertions.assertEquals(
+ "newCatalogName.schema.fileset_new",
loadedReadRole.securableObjects().get(0).fullName());
+ Assertions.assertEquals(1, loadedWriteRole.securableObjects().size());
+ Assertions.assertEquals(
+ "newCatalogName.schema.fileset_new",
loadedReadRole.securableObjects().get(0).fullName());
+ Assertions.assertEquals(
+ Condition.ALLOW,
+
loadedReadRole.securableObjects().get(0).privileges().get(0).condition());
+ Assertions.assertEquals(
+ Condition.ALLOW,
+
loadedWriteRole.securableObjects().get(0).privileges().get(0).condition());
+
+ // Now try to rename schema to schema_new
+ store.update(
+ NameIdentifier.of("metalake", "newCatalogName", "schema"),
+ SchemaEntity.class,
+ Entity.EntityType.SCHEMA,
+ e ->
+ createSchemaEntity(
+ schema.id(),
+ Namespace.of("metalake", "newCatalogName"),
+ "schema_new",
+ schema.auditInfo()));
+ // try to load the two roles again, the securable objects should reflect
the updated schema
+ loadedReadRole =
+ store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ loadedWriteRole =
+ store.get(writeRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ Assertions.assertEquals(1, loadedReadRole.securableObjects().size());
+ Assertions.assertEquals(
+ "newCatalogName.schema_new.fileset_new",
+ loadedReadRole.securableObjects().get(0).fullName());
+ Assertions.assertEquals(1, loadedWriteRole.securableObjects().size());
+ Assertions.assertEquals(
+ "newCatalogName.schema_new.fileset_new",
+ loadedReadRole.securableObjects().get(0).fullName());
+
+ // now create a user1 and assign the readRole to the user
+ UserEntity user1 =
+ createUserEntity(
+ RandomIdGenerator.INSTANCE.nextId(),
+ AuthorizationUtils.ofUserNamespace("metalake"),
+ "user1",
+ auditInfo);
+ store.put(user1, false);
+
+ // Now try to drop the fileset
+ store.delete(
+ NameIdentifier.of("metalake", "newCatalogName", "schema_new",
"fileset_new"),
+ Entity.EntityType.FILESET);
+ Assertions.assertFalse(store.exists(fileset.nameIdentifier(),
Entity.EntityType.FILESET));
+
+ // Now try to load the two roles again, the securable objects should be
empty
+ loadedReadRole =
+ store.get(readRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+ loadedWriteRole =
+ store.get(writeRole.nameIdentifier(), Entity.EntityType.ROLE,
RoleEntity.class);
+
+ Assertions.assertEquals(0, loadedReadRole.securableObjects().size());
+ Assertions.assertEquals(0, loadedWriteRole.securableObjects().size());
+
destroy(type);
}
}