>From Murtadha Hubail <[email protected]>: Murtadha Hubail has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17605 )
Change subject: [ASTERIXDB-3196][*DB] Support S3 objects with special chars ...................................................................... [ASTERIXDB-3196][*DB] Support S3 objects with special chars - user model changes: no - storage format changes: no - interface changes: no Details: - Encode/decode S3 URIs to support objects with special chars. - Add test case. Change-Id: I75879b117c8b5bb761e138e6657ae1261a08f8de Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17605 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> Reviewed-by: Wail Alkowaileet <[email protected]> --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp M asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java 9 files changed, 142 insertions(+), 7 deletions(-) Approvals: Murtadha Hubail: Looks good to me, but someone else must approve Wail Alkowaileet: Looks good to me, approved Jenkins: Verified; Verified Objections: Anon. E. Moose #1000171: Violations found diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java index 505e239..f3fd805 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java @@ -26,7 +26,7 @@ import org.apache.asterix.test.runtime.LangExecutionUtil; import org.apache.asterix.testframework.context.TestCaseContext; import org.junit.AfterClass; -import org.junit.Before; +import org.junit.BeforeClass; import org.junit.FixMethodOrder; import org.junit.Test; import org.junit.runner.RunWith; @@ -51,8 +51,8 @@ this.tcCtx = tcCtx; } - @Before - public void setUp() throws Exception { + @BeforeClass + public static void setUp() throws Exception { CloudUtils.startS3CloudEnvironment(); LangExecutionUtil.setUp(CONFIG_FILE_NAME, testExecutor); System.setProperty(GlobalConfig.CONFIG_FILE_PROPERTY, CONFIG_FILE_NAME); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp new file mode 100644 index 0000000..18a98e3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE `part1`.`p%r t2` IF EXISTS; +CREATE DATAVERSE `part1`.`p%r t2`; + +USE `part1`.`p%r t2`; +CREATE COLLECTION `some@dataset` PRIMARY KEY (id: int); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp new file mode 100644 index 0000000..3629d7f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE `part1`.`p%r t2`; + +UPSERT INTO `some@dataset` {"id": 1}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp new file mode 100644 index 0000000..ff2ab8f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE `part1`.`p%r t2`; + +SELECT VALUE COUNT(*) +FROM `some@dataset`; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp new file mode 100644 index 0000000..3580ae9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE `part1`.`p%r t2` IF EXISTS; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml index 5360f43..9c1d924 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml @@ -24,5 +24,10 @@ <output-dir compare="Text">query</output-dir> </compilation-unit> </test-case> + <test-case FilePath="cloud_storage"> + <compilation-unit name="special-chars"> + <output-dir compare="Text">special-chars</output-dir> + </compilation-unit> + </test-case> </test-group> </test-suite> diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java index 97b164f..6d65f69 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java @@ -223,7 +223,7 @@ private Set<String> filterAndGet(List<S3Object> contents, FilenameFilter filter) { Set<String> files = new HashSet<>(); for (S3Object s3Object : contents) { - String path = s3Object.key(); + String path = S3Utils.decodeURI(s3Object.key()); if (filter.accept(null, IoUtil.getFileNameFromPath(path))) { files.add(path); } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java index 996b22d..e3e9e98 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java @@ -18,6 +18,10 @@ */ package org.apache.asterix.cloud.clients.aws.s3; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.nio.charset.Charset; import java.util.List; import software.amazon.awssdk.services.s3.S3Client; @@ -33,11 +37,9 @@ public static List<S3Object> listS3Objects(S3Client s3Client, String bucket, String path) { String newMarker = null; - ListObjectsV2Response listObjectsResponse; ListObjectsV2Request.Builder listObjectsBuilder = ListObjectsV2Request.builder().bucket(bucket); - listObjectsBuilder.prefix(path); - + listObjectsBuilder.prefix(encodeURI(path)); while (true) { // List the objects from the start, or from the last marker in case of truncated result if (newMarker == null) { @@ -55,4 +57,19 @@ } return listObjectsResponse.contents(); } + + public static String encodeURI(String path) { + if (path.isEmpty()) { + return path; + } + try { + return new URI("s3", "//", path).getRawFragment(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + public static String decodeURI(String path) { + return URLDecoder.decode(path, Charset.defaultCharset()); + } } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17605 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I75879b117c8b5bb761e138e6657ae1261a08f8de Gerrit-Change-Number: 17605 Gerrit-PatchSet: 2 Gerrit-Owner: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-MessageType: merged
