This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 6991ae67e7401226df5cad6c7abfb11e8598c993 Author: Hussain Towaileb <[email protected]> AuthorDate: Fri Sep 5 10:55:44 2025 +0300 [ASTERIXDB-3643][EXT]: make path-style addressing configurable Ext-ref: MB-68398 Change-Id: I068f9caa9d9ea800d62954a1863b91d2074c29c6 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20296 Reviewed-by: Michael Blow <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> Tested-by: Hussain Towaileb <[email protected]> --- .../boolean-value/external_dataset.000.ddl.sqlpp | 38 ++++++++++++++++++++++ .../boolean-value/external_dataset.099.ddl.sqlpp | 20 ++++++++++++ .../external_dataset.000.ddl.sqlpp | 38 ++++++++++++++++++++++ .../external_dataset.099.ddl.sqlpp | 20 ++++++++++++ .../runtimets/testsuite_external_dataset_s3.xml | 13 +++++++- .../asterix/external/util/aws/s3/S3AuthUtils.java | 35 +++++++++++++++++--- .../asterix/external/util/aws/s3/S3Constants.java | 1 + 7 files changed, 159 insertions(+), 6 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/boolean-value/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/boolean-value/external_dataset.000.ddl.sqlpp new file mode 100644 index 0000000000..cac44314d4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/boolean-value/external_dataset.000.ddl.sqlpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +}; + +drop dataset test if exists; +create external dataset test(test) using S3 ( + ("accessKeyId"="dummyAccessKey"), + ("secretAccessKey"="dummySecretKey"), + ("region"="us-west-1"), + ("pathStyleAddressing"="true"), + ("serviceEndpoint"="http://127.0.0.1:8001"), + ("container"="playground"), + ("definition"="json-data/reviews/single-line/json"), + ("format"="json") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/boolean-value/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/boolean-value/external_dataset.099.ddl.sqlpp new file mode 100644 index 0000000000..548e63267e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/boolean-value/external_dataset.099.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/non-boolean-value/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/non-boolean-value/external_dataset.000.ddl.sqlpp new file mode 100644 index 0000000000..5d19d3e4dd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/non-boolean-value/external_dataset.000.ddl.sqlpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +}; + +drop dataset test if exists; +create external dataset test(test) using S3 ( + ("accessKeyId"="dummyAccessKey"), + ("secretAccessKey"="dummySecretKey"), + ("region"="us-west-1"), + ("pathStyleAddressing"="non-boolean-value"), + ("serviceEndpoint"="http://127.0.0.1:8001"), + ("container"="playground"), + ("definition"="json-data/reviews/single-line/json"), + ("format"="json") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/non-boolean-value/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/non-boolean-value/external_dataset.099.ddl.sqlpp new file mode 100644 index 0000000000..548e63267e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/path-style-access/non-boolean-value/external_dataset.099.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 7a07da00e5..5b1d265e20 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -1196,6 +1196,17 @@ <expected-error>Provided S3 region is not supported: 'some-new-region'</expected-error> </compilation-unit> </test-case> + <test-case FilePath="external-dataset/s3"> + <compilation-unit name="path-style-access/boolean-value"> + <output-dir compare="Text">path-style-access/boolean-value</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset/s3"> + <compilation-unit name="path-style-access/non-boolean-value"> + <output-dir compare="Text">path-style-access/non-boolean-value</output-dir> + <expected-error>ASX1173: Invalid value for parameter 'pathStyleAddressing', allowed value(s): true, false</expected-error> + </compilation-unit> + </test-case> <test-case FilePath="external-dataset/s3"> <compilation-unit name="cross-region/boolean-value"> <output-dir compare="Text">cross-region/boolean-value</output-dir> @@ -1204,7 +1215,7 @@ <test-case FilePath="external-dataset/s3"> <compilation-unit name="cross-region/non-boolean-value"> <output-dir compare="Text">cross-region/non-boolean-value</output-dir> - <expected-error>c</expected-error> + <expected-error>ASX1173: Invalid value for parameter 'crossRegion', allowed value(s): true, false</expected-error> </compilation-unit> </test-case> <test-case FilePath="external-dataset/s3"> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3AuthUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3AuthUtils.java index 035415d9c9..90bb04f309 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3AuthUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3AuthUtils.java @@ -53,6 +53,7 @@ import static org.apache.asterix.external.util.aws.s3.S3Constants.HADOOP_SESSION import static org.apache.asterix.external.util.aws.s3.S3Constants.HADOOP_SIMPLE; import static org.apache.asterix.external.util.aws.s3.S3Constants.HADOOP_TEMPORARY; import static org.apache.asterix.external.util.aws.s3.S3Constants.INSTANCE_PROFILE_FIELD_NAME; +import static org.apache.asterix.external.util.aws.s3.S3Constants.PATH_STYLE_ADDRESSING_FIELD_NAME; import static org.apache.asterix.external.util.aws.s3.S3Constants.REGION_FIELD_NAME; import static org.apache.asterix.external.util.aws.s3.S3Constants.ROLE_ARN_FIELD_NAME; import static org.apache.asterix.external.util.aws.s3.S3Constants.SECRET_ACCESS_KEY_FIELD_NAME; @@ -160,6 +161,9 @@ public class S3AuthUtils { } } + boolean pathStyleAddressing = + validateAndGetPathStyleAddressing(configuration.get(PATH_STYLE_ADDRESSING_FIELD_NAME), serviceEndpoint); + builder.forcePathStyle(pathStyleAddressing); return builder.build(); } @@ -217,6 +221,25 @@ public class S3AuthUtils { } } + public static boolean validateAndGetPathStyleAddressing(String pathStyleAddressing, String endpoint) + throws CompilationException { + if (pathStyleAddressing == null) { + return endpoint != null && !endpoint.isEmpty(); + } + validatePathStyleAddressing(pathStyleAddressing); + return Boolean.parseBoolean(pathStyleAddressing); + } + + public static void validatePathStyleAddressing(String pathStyleAddressing) throws CompilationException { + if (pathStyleAddressing == null) { + return; + } + if (!"true".equalsIgnoreCase(pathStyleAddressing) && !"false".equalsIgnoreCase(pathStyleAddressing)) { + throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE, PATH_STYLE_ADDRESSING_FIELD_NAME, + "true, false"); + } + } + public static boolean validateAndGetCrossRegion(String crossRegion) throws CompilationException { if (crossRegion == null) { return false; @@ -385,11 +408,13 @@ public class S3AuthUtils { jobConf.set(HADOOP_SERVICE_END_POINT, Constants.CENTRAL_ENDPOINT); } - /* - * This is to allow S3 definition to have path-style form. Should always be true to match the current - * way we access files in S3 - */ - jobConf.set(HADOOP_PATH_STYLE_ACCESS, ExternalDataConstants.TRUE); + boolean pathStyleAddressing = + validateAndGetPathStyleAddressing(configuration.get(PATH_STYLE_ADDRESSING_FIELD_NAME), serviceEndpoint); + if (pathStyleAddressing) { + jobConf.set(HADOOP_PATH_STYLE_ACCESS, ExternalDataConstants.TRUE); + } else { + jobConf.set(HADOOP_PATH_STYLE_ACCESS, ExternalDataConstants.FALSE); + } /* * Set the size of S3 connection pool to be the number of partitions diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java index 807e887bf3..102c378cca 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java @@ -28,6 +28,7 @@ public class S3Constants { // Authentication specific parameters public static final String REGION_FIELD_NAME = "region"; + public static final String PATH_STYLE_ADDRESSING_FIELD_NAME = "pathStyleAddressing"; public static final String CROSS_REGION_FIELD_NAME = "crossRegion"; public static final String INSTANCE_PROFILE_FIELD_NAME = "instanceProfile"; public static final String ACCESS_KEY_ID_FIELD_NAME = "accessKeyId";
