This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e68132cc2 [ASTERIXDB-3196][*DB] Support S3 objects with special chars
7e68132cc2 is described below

commit 7e68132cc2539cc5345bb028d30bef7d19918a74
Author: Murtadha Hubail <mhub...@apache.org>
AuthorDate: Mon Jun 19 13:22:49 2023 -0700

    [ASTERIXDB-3196][*DB] Support S3 objects with special chars
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    
    - Encode/decode S3 URIs to support objects with special chars.
    - Add test case.
    
    Change-Id: I75879b117c8b5bb761e138e6657ae1261a08f8de
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17605
    Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Murtadha Hubail <mhub...@apache.org>
    Reviewed-by: Wail Alkowaileet <wael....@gmail.com>
---
 .../test/cloud_storage/CloudStorageTest.java       |  6 +++---
 .../cloud_storage/special-chars/test.000.ddl.sqlpp | 24 ++++++++++++++++++++++
 .../special-chars/test.001.update.sqlpp            | 22 ++++++++++++++++++++
 .../special-chars/test.002.query.sqlpp             | 23 +++++++++++++++++++++
 .../cloud_storage/special-chars/test.999.ddl.sqlpp | 20 ++++++++++++++++++
 .../cloud_storage/special-chars/result.002.adm     |  1 +
 .../runtimets/testsuite_cloud_storage.xml          |  5 +++++
 .../cloud/clients/aws/s3/S3CloudClient.java        |  2 +-
 .../asterix/cloud/clients/aws/s3/S3Utils.java      | 23 ++++++++++++++++++---
 9 files changed, 119 insertions(+), 7 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
index 505e239c85..f3fd8054d9 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
@@ -26,7 +26,7 @@ import org.apache.asterix.test.common.TestExecutor;
 import org.apache.asterix.test.runtime.LangExecutionUtil;
 import org.apache.asterix.testframework.context.TestCaseContext;
 import org.junit.AfterClass;
-import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.FixMethodOrder;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -51,8 +51,8 @@ public class CloudStorageTest {
         this.tcCtx = tcCtx;
     }
 
-    @Before
-    public void setUp() throws Exception {
+    @BeforeClass
+    public static void setUp() throws Exception {
         CloudUtils.startS3CloudEnvironment();
         LangExecutionUtil.setUp(CONFIG_FILE_NAME, testExecutor);
         System.setProperty(GlobalConfig.CONFIG_FILE_PROPERTY, 
CONFIG_FILE_NAME);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp
new file mode 100644
index 0000000000..18a98e38c2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE `part1`.`p%r t2` IF EXISTS;
+CREATE DATAVERSE `part1`.`p%r t2`;
+
+USE `part1`.`p%r t2`;
+CREATE COLLECTION `some@dataset` PRIMARY KEY (id: int);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp
new file mode 100644
index 0000000000..3629d7f000
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE `part1`.`p%r t2`;
+
+UPSERT INTO `some@dataset` {"id": 1};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp
new file mode 100644
index 0000000000..ff2ab8fe0b
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE `part1`.`p%r t2`;
+
+SELECT VALUE COUNT(*)
+FROM `some@dataset`;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp
new file mode 100644
index 0000000000..3580ae92cc
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE `part1`.`p%r t2` IF EXISTS;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm
new file mode 100644
index 0000000000..56a6051ca2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
index 5360f43eda..9c1d924d43 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
@@ -24,5 +24,10 @@
         <output-dir compare="Text">query</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="cloud_storage">
+      <compilation-unit name="special-chars">
+        <output-dir compare="Text">special-chars</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
 </test-suite>
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
index 97b164fbcf..6d65f69039 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
@@ -223,7 +223,7 @@ public class S3CloudClient implements ICloudClient {
     private Set<String> filterAndGet(List<S3Object> contents, FilenameFilter 
filter) {
         Set<String> files = new HashSet<>();
         for (S3Object s3Object : contents) {
-            String path = s3Object.key();
+            String path = S3Utils.decodeURI(s3Object.key());
             if (filter.accept(null, IoUtil.getFileNameFromPath(path))) {
                 files.add(path);
             }
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
index 996b22d99c..e3e9e98eb2 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
@@ -18,6 +18,10 @@
  */
 package org.apache.asterix.cloud.clients.aws.s3;
 
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLDecoder;
+import java.nio.charset.Charset;
 import java.util.List;
 
 import software.amazon.awssdk.services.s3.S3Client;
@@ -33,11 +37,9 @@ public class S3Utils {
 
     public static List<S3Object> listS3Objects(S3Client s3Client, String 
bucket, String path) {
         String newMarker = null;
-
         ListObjectsV2Response listObjectsResponse;
         ListObjectsV2Request.Builder listObjectsBuilder = 
ListObjectsV2Request.builder().bucket(bucket);
-        listObjectsBuilder.prefix(path);
-
+        listObjectsBuilder.prefix(encodeURI(path));
         while (true) {
             // List the objects from the start, or from the last marker in 
case of truncated result
             if (newMarker == null) {
@@ -55,4 +57,19 @@ public class S3Utils {
         }
         return listObjectsResponse.contents();
     }
+
+    public static String encodeURI(String path) {
+        if (path.isEmpty()) {
+            return path;
+        }
+        try {
+            return new URI("s3", "//", path).getRawFragment();
+        } catch (URISyntaxException e) {
+            throw new IllegalArgumentException(e);
+        }
+    }
+
+    public static String decodeURI(String path) {
+        return URLDecoder.decode(path, Charset.defaultCharset());
+    }
 }

Reply via email to