This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 03fd6d0  [ASTERIXDB-2931][EXT] Reading Parquet files with Temp. and 
Anon. credentials
03fd6d0 is described below

commit 03fd6d0ff69a800324f8cee6ca5d2f82c210684d
Author: Wail Alkowaileet <[email protected]>
AuthorDate: Thu Jul 15 15:18:03 2021 -0700

    [ASTERIXDB-2931][EXT] Reading Parquet files with Temp. and Anon. credentials
    
    - user mode changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Support reading Parquet files from S3 with anonymous and temporary
      credentials
    
    Change-Id: Id4193e1b827a782db8650d403b7420117b7b4ef1
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12345
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Wael Alkowaileet <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
---
 .../parquet-anonymous-access.00.ddl.sqlpp          | 34 ++++++++++++++++++++
 .../parquet-anonymous-access.01.query.sqlpp        | 23 ++++++++++++++
 .../parquet-anonymous-access.99.ddl.sqlpp          | 20 ++++++++++++
 .../parquet-temporary-access.00.ddl.sqlpp          | 37 ++++++++++++++++++++++
 .../parquet-temporary-access.01.query.sqlpp        | 23 ++++++++++++++
 .../parquet-temporary-access.99.ddl.sqlpp          | 20 ++++++++++++
 .../parquet-anonymous-access.01.adm                |  1 +
 .../parquet-temporary-access.01.adm                |  1 +
 .../runtimets/testsuite_external_dataset_s3.xml    | 10 ++++++
 .../external/util/ExternalDataConstants.java       |  9 ++++++
 .../asterix/external/util/ExternalDataUtils.java   | 26 +++++++++++++--
 11 files changed, 202 insertions(+), 2 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.00.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.00.ddl.sqlpp
new file mode 100644
index 0000000..2337ac4
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.00.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE ParquetType AS {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING S3 (
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001";),
+("container"="playground"),
+("definition"="parquet-data/reviews"),
+("format"="parquet"),
+("include"="*dummy_tweet.parquet")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.01.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.01.query.sqlpp
new file mode 100644
index 0000000..4631047
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.01.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT VALUE count(p.text)
+FROM ParquetDataset p;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.99.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.99.ddl.sqlpp
new file mode 100644
index 0000000..86a1b59
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.99.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.00.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.00.ddl.sqlpp
new file mode 100644
index 0000000..15ec56a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.00.ddl.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE ParquetType AS {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING S3 (
+("accessKeyId"="dummyAccessKey"),
+("secretAccessKey"="dummySecretKey"),
+("sessionToken"="dummySessionToken"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001";),
+("container"="playground"),
+("definition"="parquet-data/reviews"),
+("format"="parquet"),
+("include"="*dummy_tweet.parquet")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.01.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.01.query.sqlpp
new file mode 100644
index 0000000..4631047
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.01.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT VALUE count(p.text)
+FROM ParquetDataset p;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.99.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.99.ddl.sqlpp
new file mode 100644
index 0000000..86a1b59
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.99.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.01.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.01.adm
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/parquet-anonymous-access/parquet-anonymous-access.01.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.01.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.01.adm
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/parquet-temporary-access/parquet-temporary-access.01.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index e3295fd..41c769d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -119,6 +119,16 @@
         <output-dir compare="Text">common/parquet/object-concat</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="external-dataset/s3">
+      <compilation-unit name="parquet-anonymous-access">
+        <output-dir compare="Text">parquet-anonymous-access</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/s3">
+      <compilation-unit name="parquet-temporary-access">
+        <output-dir compare="Text">parquet-temporary-access</output-dir>
+      </compilation-unit>
+    </test-case>
     <!-- Parquet Tests End -->
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/empty-string-definition">
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 92a491d..51865f0 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -319,10 +319,19 @@ public class ExternalDataConstants {
          */
         public static final String HADOOP_ACCESS_KEY_ID = "fs.s3a.access.key";
         public static final String HADOOP_SECRET_ACCESS_KEY = 
"fs.s3a.secret.key";
+        public static final String HADOOP_SESSION_TOKEN = 
"fs.s3a.session.token";
         public static final String HADOOP_PATH_STYLE_ACCESS = 
"fs.s3a.path.style.access";
         public static final String HADOOP_REGION = "fs.s3a.region";
         public static final String HADOOP_SERVICE_END_POINT = 
"fs.s3a.endpoint";
         public static final String HADOOP_S3_PROTOCOL = "s3a";
+
+        //Hadoop credentials provider key
+        public static final String HADOOP_CREDENTIAL_PROVIDER_KEY = 
"fs.s3a.aws.credentials.provider";
+        //Anonymous credential provider
+        public static final String HADOOP_ANONYMOUS_ACCESS = 
"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider";
+        //Temporary credential provider
+        public static final String HADOOP_TEMP_ACCESS = 
"org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider";
+
     }
 
     public static class AzureBlob {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 86e733e..750ff13 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -22,8 +22,12 @@ import static 
org.apache.asterix.common.exceptions.ErrorCode.REQUIRED_PARAM_IF_P
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.ACCESS_KEY_ID_FIELD_NAME;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.ERROR_METHOD_NOT_IMPLEMENTED;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_ACCESS_KEY_ID;
+import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_ANONYMOUS_ACCESS;
+import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_CREDENTIAL_PROVIDER_KEY;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_PATH_STYLE_ACCESS;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SECRET_ACCESS_KEY;
+import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SESSION_TOKEN;
+import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_TEMP_ACCESS;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.SECRET_ACCESS_KEY_FIELD_NAME;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.ACCOUNT_KEY_FIELD_NAME;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.ACCOUNT_NAME_FIELD_NAME;
@@ -834,11 +838,29 @@ public class ExternalDataUtils {
         public static void configureAwsS3HdfsJobConf(JobConf conf, Map<String, 
String> configuration) {
             String accessKeyId = 
configuration.get(ExternalDataConstants.AwsS3.ACCESS_KEY_ID_FIELD_NAME);
             String secretAccessKey = 
configuration.get(ExternalDataConstants.AwsS3.SECRET_ACCESS_KEY_FIELD_NAME);
+            String sessionToken = 
configuration.get(ExternalDataConstants.AwsS3.SESSION_TOKEN_FIELD_NAME);
             String regionId = 
configuration.get(ExternalDataConstants.AwsS3.REGION_FIELD_NAME);
             String serviceEndpoint = 
configuration.get(ExternalDataConstants.AwsS3.SERVICE_END_POINT_FIELD_NAME);
 
-            conf.set(HADOOP_ACCESS_KEY_ID, accessKeyId);
-            conf.set(HADOOP_SECRET_ACCESS_KEY, secretAccessKey);
+            /*
+             * Authentication Methods:
+             * 1- Anonymous: no accessKeyId and no secretAccessKey
+             * 2- Temporary: has to provide accessKeyId, secretAccessKey and 
sessionToken
+             * 3- Private: has to provide accessKeyId and secretAccessKey
+             */
+            if (accessKeyId == null) {
+                //Tells hadoop-aws it is an anonymous access
+                conf.set(HADOOP_CREDENTIAL_PROVIDER_KEY, 
HADOOP_ANONYMOUS_ACCESS);
+            } else {
+                conf.set(HADOOP_ACCESS_KEY_ID, accessKeyId);
+                conf.set(HADOOP_SECRET_ACCESS_KEY, secretAccessKey);
+                if (sessionToken != null) {
+                    conf.set(HADOOP_SESSION_TOKEN, sessionToken);
+                    //Tells hadoop-aws it is a temporary access
+                    conf.set(HADOOP_CREDENTIAL_PROVIDER_KEY, 
HADOOP_TEMP_ACCESS);
+                }
+            }
+
             /*
              * This is to allow S3 definition to have path-style form. Should 
always be true to match the current
              * way we access files in S3

Reply via email to