This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit e9772ce511d043d8490d83a65f1210694dbc0eb3
Author: Savyasach Reddy <[email protected]>
AuthorDate: Mon Nov 18 13:52:58 2024 +0530

    [NO ISSUE]: Support Reading credentials from String
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    details:
    - Read JSON credentials from a string instead of a file to read
      Parquet data from Google Cloud Storage
    
    Ext-ref: MB-64268
    Change-Id: Iac78b7df9688e134f2fc7e2eb737b909a71077e0
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19084
    Reviewed-by: Murtadha Hubail <[email protected]>
    Tested-by: Murtadha Hubail <[email protected]>
    Integration-Tests: Murtadha Hubail <[email protected]>
---
 .../external/util/google/gcs/GCSConstants.java     | 12 ++++++++
 .../asterix/external/util/google/gcs/GCSUtils.java | 35 ++++++++++++++++------
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java
index f2dbde7f80..2818043179 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java
@@ -44,4 +44,16 @@ public class GCSConstants {
     public static final String HADOOP_ENDPOINT = "fs.gs.storage.root.url";
     public static final String HADOOP_MAX_REQUESTS_PER_BATCH = 
"fs.gs.max.requests.per.batch";
     public static final String HADOOP_BATCH_THREADS = "fs.gs.batch.threads";
+
+    public static class JSON_CREDENTIALS_FIELDS {
+        public static final String PRIVATE_KEY_ID = "private_key_id";
+        public static final String PRIVATE_KEY = "private_key";
+        public static final String CLIENT_EMAIL = "client_email";
+    }
+
+    public static class HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS {
+        public static final String PRIVATE_KEY_ID = 
"fs.gs.auth.service.account.private.key.id";
+        public static final String PRIVATE_KEY = 
"fs.gs.auth.service.account.private.key";
+        public static final String CLIENT_EMAIL = 
"fs.gs.auth.service.account.email";
+    }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
index 5274c44a38..74a664da35 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
@@ -25,8 +25,6 @@ import static 
org.apache.asterix.external.util.ExternalDataUtils.getPrefix;
 import static 
org.apache.asterix.external.util.ExternalDataUtils.validateIncludeExclude;
 import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME;
 import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.ENDPOINT_FIELD_NAME;
-import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE;
-import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH;
 import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_TYPE;
 import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_UNAUTHENTICATED;
 import static 
org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_ENDPOINT;
@@ -37,7 +35,6 @@ import static 
org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -59,6 +56,10 @@ import org.apache.hyracks.api.exceptions.IWarningCollector;
 import org.apache.hyracks.api.exceptions.SourceLocation;
 import org.apache.hyracks.api.exceptions.Warning;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.json.JsonReadFeature;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.api.gax.paging.Page;
 import com.google.auth.oauth2.GoogleCredentials;
 import com.google.cloud.BaseServiceException;
@@ -73,6 +74,11 @@ public class GCSUtils {
 
     }
 
+    private static final ObjectMapper JSON_CREDENTIALS_OBJECT_MAPPER = new 
ObjectMapper();
+    static {
+        
JSON_CREDENTIALS_OBJECT_MAPPER.configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS.mappedFeature(),
 true);
+    }
+
     /**
      * Builds the client using the provided configuration
      *
@@ -218,7 +224,8 @@ public class GCSUtils {
      * @param configuration      properties
      * @param numberOfPartitions number of partitions in the cluster
      */
-    public static void configureHdfsJobConf(JobConf conf, Map<String, String> 
configuration, int numberOfPartitions) {
+    public static void configureHdfsJobConf(JobConf conf, Map<String, String> 
configuration, int numberOfPartitions)
+            throws AlgebricksException {
         String jsonCredentials = 
configuration.get(JSON_CREDENTIALS_FIELD_NAME);
         String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
 
@@ -234,15 +241,25 @@ public class GCSUtils {
         //        conf.set(GCSConstants.HADOOP_BATCH_THREADS, 
String.valueOf(numberOfPartitions));
 
         // authentication method
-        // TODO(htowaileb): find a way to pass the content instead of the path 
to keyfile, this line is temporary
-        Path credentials = Path.of("credentials.json");
         if (jsonCredentials == null) {
             // anonymous access
             conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_UNAUTHENTICATED);
         } else {
-            // TODO(htowaileb) need to pass the file content
-            conf.set(HADOOP_AUTH_TYPE, 
HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE);
-            conf.set(HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH, 
credentials.toAbsolutePath().toString());
+            try {
+                JsonNode jsonCreds = 
JSON_CREDENTIALS_OBJECT_MAPPER.readTree(jsonCredentials);
+                // Setting these values instead of 
HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH is supported
+                // in com.google.cloud.bigdataoss:util-hadoop only up to 
version hadoop3-2.2.x and is removed in
+                // version 3.x.y, which also removed support for hadoop-2
+                
conf.set(GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS.PRIVATE_KEY_ID,
+                        
jsonCreds.get(GCSConstants.JSON_CREDENTIALS_FIELDS.PRIVATE_KEY_ID).asText());
+                
conf.set(GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS.PRIVATE_KEY,
+                        
jsonCreds.get(GCSConstants.JSON_CREDENTIALS_FIELDS.PRIVATE_KEY).asText());
+                
conf.set(GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS.CLIENT_EMAIL,
+                        
jsonCreds.get(GCSConstants.JSON_CREDENTIALS_FIELDS.CLIENT_EMAIL).asText());
+            } catch (JsonProcessingException e) {
+                throw CompilationException.create(EXTERNAL_SOURCE_ERROR, 
"Unable to parse Json Credentials",
+                        getMessageOrToString(e));
+            }
         }
 
         // set endpoint if provided, default is https://storage.googleapis.com/

Reply via email to