This is an automated email from the ASF dual-hosted git repository.

yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new d2e261ac6 [#6054]  feat(core):  add more GCS permission to support 
fileset operations (#6141)
d2e261ac6 is described below

commit d2e261ac65b96470ba712f5a40c187dd9206cc9d
Author: FANNG <[email protected]>
AuthorDate: Fri Jan 10 15:20:54 2025 +0800

    [#6054]  feat(core):  add more GCS permission to support fileset operations 
(#6141)
    
    ### What changes were proposed in this pull request?
    
    1. for resource path like `a/b`, add "a", "a/", "a/b" read permission
    for GCS connector
    2. replace `storage.legacyBucketReader` with
    `storage.insightsCollectorService`, because `storage.legacyBucketReader`
    provides extra list permission for the bucket.
    
    ### Why are the changes needed?
    
    Fix: #6054
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    Iceberg GCS IT and fileset GCS credential IT
---
 .../gravitino/gcs/credential/GCSTokenProvider.java | 72 ++++++++++++++++++----
 .../gcs/credential/TestGCSTokenProvider.java       | 64 +++++++++++++++++++
 2 files changed, 124 insertions(+), 12 deletions(-)

diff --git 
a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
 
b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
index f499b8c3e..0c1c2ab8a 100644
--- 
a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
+++ 
b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
@@ -24,6 +24,8 @@ import com.google.auth.oauth2.CredentialAccessBoundary;
 import com.google.auth.oauth2.CredentialAccessBoundary.AccessBoundaryRule;
 import com.google.auth.oauth2.DownscopedCredentials;
 import com.google.auth.oauth2.GoogleCredentials;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -99,6 +101,57 @@ public class GCSTokenProvider implements CredentialProvider 
{
     return downscopedCredentials.refreshAccessToken();
   }
 
+  private List<String> getReadExpressions(String bucketName, String 
resourcePath) {
+    List<String> readExpressions = new ArrayList<>();
+    readExpressions.add(
+        String.format(
+            "resource.name.startsWith('projects/_/buckets/%s/objects/%s')",
+            bucketName, resourcePath));
+    getAllResources(resourcePath)
+        .forEach(
+            parentResourcePath ->
+                readExpressions.add(
+                    String.format(
+                        "resource.name == 'projects/_/buckets/%s/objects/%s'",
+                        bucketName, parentResourcePath)));
+    return readExpressions;
+  }
+
+  @VisibleForTesting
+  // "a/b/c" will get ["a", "a/", "a/b", "a/b/", "a/b/c"]
+  static List<String> getAllResources(String resourcePath) {
+    if (resourcePath.endsWith("/")) {
+      resourcePath = resourcePath.substring(0, resourcePath.length() - 1);
+    }
+    if (resourcePath.isEmpty()) {
+      return Arrays.asList("");
+    }
+    Preconditions.checkArgument(
+        !resourcePath.startsWith("/"), resourcePath + " should not start with 
/");
+    List<String> parts = Arrays.asList(resourcePath.split("/"));
+    List<String> results = new ArrayList<>();
+    String parent = "";
+    for (int i = 0; i < parts.size() - 1; i++) {
+      results.add(parts.get(i));
+      parent += parts.get(i) + "/";
+      results.add(parent);
+    }
+    results.add(parent + parts.get(parts.size() - 1));
+    return results;
+  }
+
+  @VisibleForTesting
+  // Remove the first '/', and append `/` if the path does not end with '/'.
+  static String normalizeUriPath(String resourcePath) {
+    if (resourcePath.startsWith("/")) {
+      resourcePath = resourcePath.substring(1);
+    }
+    if (resourcePath.endsWith("/")) {
+      return resourcePath;
+    }
+    return resourcePath + "/";
+  }
+
   private CredentialAccessBoundary getAccessBoundary(
       Set<String> readLocations, Set<String> writeLocations) {
     // bucketName -> read resource expressions
@@ -116,14 +169,11 @@ public class GCSTokenProvider implements 
CredentialProvider {
               URI uri = URI.create(location);
               String bucketName = getBucketName(uri);
               readBuckets.add(bucketName);
-              String resourcePath = uri.getPath().substring(1);
+              String resourcePath = normalizeUriPath(uri.getPath());
               List<String> resourceExpressions =
                   readExpressions.computeIfAbsent(bucketName, key -> new 
ArrayList<>());
               // add read privilege
-              resourceExpressions.add(
-                  String.format(
-                      
"resource.name.startsWith('projects/_/buckets/%s/objects/%s')",
-                      bucketName, resourcePath));
+              resourceExpressions.addAll(getReadExpressions(bucketName, 
resourcePath));
               // add list privilege
               resourceExpressions.add(
                   String.format(
@@ -146,21 +196,19 @@ public class GCSTokenProvider implements 
CredentialProvider {
         CredentialAccessBoundary.newBuilder();
     readBuckets.forEach(
         bucket -> {
-          // Hadoop GCS connector needs to get bucket info
+          // Hadoop GCS connector needs storage.buckets.get permission, the 
reason why not use
+          // inRole:roles/storage.legacyBucketReader is it provides extra list 
permission.
           AccessBoundaryRule bucketInfoRule =
               AccessBoundaryRule.newBuilder()
                   .setAvailableResource(toGCSBucketResource(bucket))
-                  
.setAvailablePermissions(Arrays.asList("inRole:roles/storage.legacyBucketReader"))
+                  .setAvailablePermissions(
+                      
Arrays.asList("inRole:roles/storage.insightsCollectorService"))
                   .build();
           credentialAccessBoundaryBuilder.addRule(bucketInfoRule);
           List<String> readConditions = readExpressions.get(bucket);
           AccessBoundaryRule rule =
               getAccessBoundaryRule(
-                  bucket,
-                  readConditions,
-                  Arrays.asList(
-                      "inRole:roles/storage.legacyObjectReader",
-                      "inRole:roles/storage.objectViewer"));
+                  bucket, readConditions, 
Arrays.asList("inRole:roles/storage.objectViewer"));
           if (rule == null) {
             return;
           }
diff --git 
a/bundles/gcp/src/test/java/org/apache/gravitino/gcs/credential/TestGCSTokenProvider.java
 
b/bundles/gcp/src/test/java/org/apache/gravitino/gcs/credential/TestGCSTokenProvider.java
new file mode 100644
index 000000000..66326fc2b
--- /dev/null
+++ 
b/bundles/gcp/src/test/java/org/apache/gravitino/gcs/credential/TestGCSTokenProvider.java
@@ -0,0 +1,64 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+
+package org.apache.gravitino.gcs.credential;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class TestGCSTokenProvider {
+
+  @Test
+  void testGetAllResources() {
+    Map<String, List<String>> checkResults =
+        ImmutableMap.of(
+            "a/b", Arrays.asList("a", "a/", "a/b"),
+            "a/b/", Arrays.asList("a", "a/", "a/b"),
+            "a", Arrays.asList("a"),
+            "a/", Arrays.asList("a"),
+            "", Arrays.asList(""),
+            "/", Arrays.asList(""));
+
+    checkResults.forEach(
+        (key, value) -> {
+          List<String> parentResources = GCSTokenProvider.getAllResources(key);
+          Assertions.assertArrayEquals(value.toArray(), 
parentResources.toArray());
+        });
+  }
+
+  @Test
+  void testNormalizePath() {
+    Map<String, String> checkResults =
+        ImmutableMap.of(
+            "/a/b/", "a/b/",
+            "/a/b", "a/b/",
+            "a/b", "a/b/",
+            "a/b/", "a/b/");
+
+    checkResults.forEach(
+        (k, v) -> {
+          String normalizedPath = GCSTokenProvider.normalizeUriPath(k);
+          Assertions.assertEquals(v, normalizedPath);
+        });
+  }
+}

Reply via email to