This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit e9772ce511d043d8490d83a65f1210694dbc0eb3 Author: Savyasach Reddy <[email protected]> AuthorDate: Mon Nov 18 13:52:58 2024 +0530 [NO ISSUE]: Support Reading credentials from String - user model changes: no - storage format changes: no - interface changes: no details: - Read JSON credentials from a string instead of a file to read Parquet data from Google Cloud Storage Ext-ref: MB-64268 Change-Id: Iac78b7df9688e134f2fc7e2eb737b909a71077e0 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19084 Reviewed-by: Murtadha Hubail <[email protected]> Tested-by: Murtadha Hubail <[email protected]> Integration-Tests: Murtadha Hubail <[email protected]> --- .../external/util/google/gcs/GCSConstants.java | 12 ++++++++ .../asterix/external/util/google/gcs/GCSUtils.java | 35 ++++++++++++++++------ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java index f2dbde7f80..2818043179 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSConstants.java @@ -44,4 +44,16 @@ public class GCSConstants { public static final String HADOOP_ENDPOINT = "fs.gs.storage.root.url"; public static final String HADOOP_MAX_REQUESTS_PER_BATCH = "fs.gs.max.requests.per.batch"; public static final String HADOOP_BATCH_THREADS = "fs.gs.batch.threads"; + + public static class JSON_CREDENTIALS_FIELDS { + public static final String PRIVATE_KEY_ID = "private_key_id"; + public static final String PRIVATE_KEY = "private_key"; + public static final String CLIENT_EMAIL = "client_email"; + } + + public static class HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS { + public static final String PRIVATE_KEY_ID = "fs.gs.auth.service.account.private.key.id"; + public static final String PRIVATE_KEY = "fs.gs.auth.service.account.private.key"; + public static final String CLIENT_EMAIL = "fs.gs.auth.service.account.email"; + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java index 5274c44a38..74a664da35 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java @@ -25,8 +25,6 @@ import static org.apache.asterix.external.util.ExternalDataUtils.getPrefix; import static org.apache.asterix.external.util.ExternalDataUtils.validateIncludeExclude; import static org.apache.asterix.external.util.google.gcs.GCSConstants.APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME; import static org.apache.asterix.external.util.google.gcs.GCSConstants.ENDPOINT_FIELD_NAME; -import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE; -import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH; import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_TYPE; import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_UNAUTHENTICATED; import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_ENDPOINT; @@ -37,7 +35,6 @@ import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -59,6 +56,10 @@ import org.apache.hyracks.api.exceptions.IWarningCollector; import org.apache.hyracks.api.exceptions.SourceLocation; import org.apache.hyracks.api.exceptions.Warning; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.json.JsonReadFeature; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.api.gax.paging.Page; import com.google.auth.oauth2.GoogleCredentials; import com.google.cloud.BaseServiceException; @@ -73,6 +74,11 @@ public class GCSUtils { } + private static final ObjectMapper JSON_CREDENTIALS_OBJECT_MAPPER = new ObjectMapper(); + static { + JSON_CREDENTIALS_OBJECT_MAPPER.configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS.mappedFeature(), true); + } + /** * Builds the client using the provided configuration * @@ -218,7 +224,8 @@ public class GCSUtils { * @param configuration properties * @param numberOfPartitions number of partitions in the cluster */ - public static void configureHdfsJobConf(JobConf conf, Map<String, String> configuration, int numberOfPartitions) { + public static void configureHdfsJobConf(JobConf conf, Map<String, String> configuration, int numberOfPartitions) + throws AlgebricksException { String jsonCredentials = configuration.get(JSON_CREDENTIALS_FIELD_NAME); String endpoint = configuration.get(ENDPOINT_FIELD_NAME); @@ -234,15 +241,25 @@ public class GCSUtils { // conf.set(GCSConstants.HADOOP_BATCH_THREADS, String.valueOf(numberOfPartitions)); // authentication method - // TODO(htowaileb): find a way to pass the content instead of the path to keyfile, this line is temporary - Path credentials = Path.of("credentials.json"); if (jsonCredentials == null) { // anonymous access conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_UNAUTHENTICATED); } else { - // TODO(htowaileb) need to pass the file content - conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE); - conf.set(HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH, credentials.toAbsolutePath().toString()); + try { + JsonNode jsonCreds = JSON_CREDENTIALS_OBJECT_MAPPER.readTree(jsonCredentials); + // Setting these values instead of HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH is supported + // in com.google.cloud.bigdataoss:util-hadoop only up to version hadoop3-2.2.x and is removed in + // version 3.x.y, which also removed support for hadoop-2 + conf.set(GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS.PRIVATE_KEY_ID, + jsonCreds.get(GCSConstants.JSON_CREDENTIALS_FIELDS.PRIVATE_KEY_ID).asText()); + conf.set(GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS.PRIVATE_KEY, + jsonCreds.get(GCSConstants.JSON_CREDENTIALS_FIELDS.PRIVATE_KEY).asText()); + conf.set(GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_FIELDS.CLIENT_EMAIL, + jsonCreds.get(GCSConstants.JSON_CREDENTIALS_FIELDS.CLIENT_EMAIL).asText()); + } catch (JsonProcessingException e) { + throw CompilationException.create(EXTERNAL_SOURCE_ERROR, "Unable to parse Json Credentials", + getMessageOrToString(e)); + } } // set endpoint if provided, default is https://storage.googleapis.com/
