This is an automated email from the ASF dual-hosted git repository. mhubail pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push: new 7e68132cc2 [ASTERIXDB-3196][*DB] Support S3 objects with special chars 7e68132cc2 is described below commit 7e68132cc2539cc5345bb028d30bef7d19918a74 Author: Murtadha Hubail <mhub...@apache.org> AuthorDate: Mon Jun 19 13:22:49 2023 -0700 [ASTERIXDB-3196][*DB] Support S3 objects with special chars - user model changes: no - storage format changes: no - interface changes: no Details: - Encode/decode S3 URIs to support objects with special chars. - Add test case. Change-Id: I75879b117c8b5bb761e138e6657ae1261a08f8de Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17605 Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Murtadha Hubail <mhub...@apache.org> Reviewed-by: Wail Alkowaileet <wael....@gmail.com> --- .../test/cloud_storage/CloudStorageTest.java | 6 +++--- .../cloud_storage/special-chars/test.000.ddl.sqlpp | 24 ++++++++++++++++++++++ .../special-chars/test.001.update.sqlpp | 22 ++++++++++++++++++++ .../special-chars/test.002.query.sqlpp | 23 +++++++++++++++++++++ .../cloud_storage/special-chars/test.999.ddl.sqlpp | 20 ++++++++++++++++++ .../cloud_storage/special-chars/result.002.adm | 1 + .../runtimets/testsuite_cloud_storage.xml | 5 +++++ .../cloud/clients/aws/s3/S3CloudClient.java | 2 +- .../asterix/cloud/clients/aws/s3/S3Utils.java | 23 ++++++++++++++++++--- 9 files changed, 119 insertions(+), 7 deletions(-) diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java index 505e239c85..f3fd8054d9 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java @@ -26,7 +26,7 @@ import org.apache.asterix.test.common.TestExecutor; import org.apache.asterix.test.runtime.LangExecutionUtil; import org.apache.asterix.testframework.context.TestCaseContext; import org.junit.AfterClass; -import org.junit.Before; +import org.junit.BeforeClass; import org.junit.FixMethodOrder; import org.junit.Test; import org.junit.runner.RunWith; @@ -51,8 +51,8 @@ public class CloudStorageTest { this.tcCtx = tcCtx; } - @Before - public void setUp() throws Exception { + @BeforeClass + public static void setUp() throws Exception { CloudUtils.startS3CloudEnvironment(); LangExecutionUtil.setUp(CONFIG_FILE_NAME, testExecutor); System.setProperty(GlobalConfig.CONFIG_FILE_PROPERTY, CONFIG_FILE_NAME); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp new file mode 100644 index 0000000000..18a98e38c2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE `part1`.`p%r t2` IF EXISTS; +CREATE DATAVERSE `part1`.`p%r t2`; + +USE `part1`.`p%r t2`; +CREATE COLLECTION `some@dataset` PRIMARY KEY (id: int); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp new file mode 100644 index 0000000000..3629d7f000 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE `part1`.`p%r t2`; + +UPSERT INTO `some@dataset` {"id": 1}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp new file mode 100644 index 0000000000..ff2ab8fe0b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE `part1`.`p%r t2`; + +SELECT VALUE COUNT(*) +FROM `some@dataset`; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp new file mode 100644 index 0000000000..3580ae92cc --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE `part1`.`p%r t2` IF EXISTS; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm new file mode 100644 index 0000000000..56a6051ca2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml index 5360f43eda..9c1d924d43 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml @@ -24,5 +24,10 @@ <output-dir compare="Text">query</output-dir> </compilation-unit> </test-case> + <test-case FilePath="cloud_storage"> + <compilation-unit name="special-chars"> + <output-dir compare="Text">special-chars</output-dir> + </compilation-unit> + </test-case> </test-group> </test-suite> diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java index 97b164fbcf..6d65f69039 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java @@ -223,7 +223,7 @@ public class S3CloudClient implements ICloudClient { private Set<String> filterAndGet(List<S3Object> contents, FilenameFilter filter) { Set<String> files = new HashSet<>(); for (S3Object s3Object : contents) { - String path = s3Object.key(); + String path = S3Utils.decodeURI(s3Object.key()); if (filter.accept(null, IoUtil.getFileNameFromPath(path))) { files.add(path); } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java index 996b22d99c..e3e9e98eb2 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java @@ -18,6 +18,10 @@ */ package org.apache.asterix.cloud.clients.aws.s3; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.nio.charset.Charset; import java.util.List; import software.amazon.awssdk.services.s3.S3Client; @@ -33,11 +37,9 @@ public class S3Utils { public static List<S3Object> listS3Objects(S3Client s3Client, String bucket, String path) { String newMarker = null; - ListObjectsV2Response listObjectsResponse; ListObjectsV2Request.Builder listObjectsBuilder = ListObjectsV2Request.builder().bucket(bucket); - listObjectsBuilder.prefix(path); - + listObjectsBuilder.prefix(encodeURI(path)); while (true) { // List the objects from the start, or from the last marker in case of truncated result if (newMarker == null) { @@ -55,4 +57,19 @@ public class S3Utils { } return listObjectsResponse.contents(); } + + public static String encodeURI(String path) { + if (path.isEmpty()) { + return path; + } + try { + return new URI("s3", "//", path).getRawFragment(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + public static String decodeURI(String path) { + return URLDecoder.decode(path, Charset.defaultCharset()); + } }