>From Ritik Raj <[email protected]>: Ritik Raj has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21198?usp=email )
( 6 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: [ASTERIXDB-3768][CLOUD] Add configurable S3 checksum behavior for S3-compatible storage ...................................................................... [ASTERIXDB-3768][CLOUD] Add configurable S3 checksum behavior for S3-compatible storage - user model changes: no - storage format changes: no - interface changes: no AWS SDK Java v2 >= 2.30.0 introduced new cross-SDK checksum defaults (WHEN_SUPPORTED) that break S3-compatible storage solutions (e.g. OCI) which do not support the newer checksum APIs. Add a new CLOUD_STORAGE_S3_CHECKSUM_BEHAVIOR configuration option (values: when_required | when_supported | auto) with a smart default: - when_required: when a custom endpoint is configured (S3-compatible) - auto (SDK default): when using native AWS S3 ext-ref: MB-71732 Change-Id: If6618d3a336e9bf134efb1f219660421edc27c43 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21198 Reviewed-by: Michael Blow <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> --- M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java M hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/ICloudProperties.java A hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/S3ChecksumBehavior.java 9 files changed, 131 insertions(+), 18 deletions(-) Approvals: Jenkins: Verified; Verified Michael Blow: Looks good to me, approved diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm index 10c1856..cffa1fa 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm @@ -40,6 +40,7 @@ "cloud.storage.prefix" : "", "cloud.storage.region" : "", "cloud.storage.s3.access.key.id" : null, + "cloud.storage.s3.checksum.behavior" : "auto", "cloud.storage.s3.client.read.timeout" : -1, "cloud.storage.s3.parallel.downloader.client.type" : "crt", "cloud.storage.s3.secret.access.key" : null, diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm index 22c4fc8..6f93933 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm @@ -40,6 +40,7 @@ "cloud.storage.prefix" : "", "cloud.storage.region" : "", "cloud.storage.s3.access.key.id" : null, + "cloud.storage.s3.checksum.behavior" : "auto", "cloud.storage.s3.client.read.timeout" : -1, "cloud.storage.s3.parallel.downloader.client.type" : "crt", "cloud.storage.s3.secret.access.key" : null, diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm index a36d3b5..b01ff20 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm @@ -40,6 +40,7 @@ "cloud.storage.prefix" : "", "cloud.storage.region" : "", "cloud.storage.s3.access.key.id" : null, + "cloud.storage.s3.checksum.behavior" : "auto", "cloud.storage.s3.client.read.timeout" : -1, "cloud.storage.s3.parallel.downloader.client.type" : "crt", "cloud.storage.s3.secret.access.key" : null, diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java index dd8485c..94b6f5a 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java @@ -25,6 +25,7 @@ import org.apache.asterix.external.util.aws.AwsConstants; import org.apache.hyracks.cloud.io.ICloudProperties; +import org.apache.hyracks.cloud.io.S3ChecksumBehavior; import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -56,12 +57,14 @@ private final boolean roundRobinDnsResolver; private final String accessKeyId; private final String secretAccessKey; + private final S3ChecksumBehavior checksumBehavior; public S3ClientConfig(String region, String endpoint, String prefix, boolean anonymousAuth, Collection<String> certificates, long profilerLogInterval, int writeBufferSize, S3ParallelDownloaderClientType parallelDownloaderClientType, boolean roundRobinDnsResolver) { this(region, endpoint, prefix, anonymousAuth, certificates, profilerLogInterval, writeBufferSize, 1, 0, 0, 0, - false, false, 0, 0, -1, parallelDownloaderClientType, roundRobinDnsResolver, "", ""); + false, false, 0, 0, -1, parallelDownloaderClientType, roundRobinDnsResolver, "", "", + S3ChecksumBehavior.defaultForEndpoint(endpoint)); } private S3ClientConfig(String region, String endpoint, String prefix, boolean anonymousAuth, @@ -70,7 +73,7 @@ boolean forcePathStyle, boolean disableSslVerify, int requestsMaxPendingHttpConnections, int requestsHttpConnectionAcquireTimeout, int s3ReadTimeoutInSeconds, S3ParallelDownloaderClientType parallelDownloaderClientType, boolean roundRobinDnsResolver, - String accessKeyId, String secretAccessKey) { + String accessKeyId, String secretAccessKey, S3ChecksumBehavior checksumBehavior) { this.region = Objects.requireNonNull(region, "region"); this.endpoint = endpoint; this.prefix = Objects.requireNonNull(prefix, "prefix"); @@ -91,6 +94,7 @@ this.roundRobinDnsResolver = roundRobinDnsResolver; this.accessKeyId = accessKeyId; this.secretAccessKey = secretAccessKey; + this.checksumBehavior = Objects.requireNonNull(checksumBehavior, "checksumBehavior"); } public static S3ClientConfig of(ICloudProperties cloudProperties) { @@ -104,7 +108,7 @@ cloudProperties.getRequestsHttpConnectionAcquireTimeout(), cloudProperties.getS3ReadTimeoutInSeconds(), S3ParallelDownloaderClientType.valueOf(cloudProperties.getS3ParallelDownloaderClientType()), cloudProperties.useRoundRobinDnsResolver(), cloudProperties.getS3AccessKeyId(), - cloudProperties.getS3SecretAccessKey()); + cloudProperties.getS3SecretAccessKey(), cloudProperties.getS3ChecksumBehavior()); } public enum S3ParallelDownloaderClientType { @@ -126,7 +130,6 @@ } public static S3ClientConfig of(Map<String, String> configuration, int writeBufferSize) { - // Used to determine local vs. actual S3 String endPoint = configuration.getOrDefault(AwsConstants.SERVICE_END_POINT_FIELD_NAME, ""); // Disabled long profilerLogInterval = 0; @@ -227,4 +230,8 @@ public boolean useRoundRobinDnsResolver() { return roundRobinDnsResolver; } + + public S3ChecksumBehavior getChecksumBehavior() { + return checksumBehavior; + } } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java index de69617..e1fa4e9 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java @@ -52,6 +52,7 @@ import org.apache.asterix.common.exceptions.RuntimeDataException; import org.apache.asterix.external.util.aws.AwsUtils; import org.apache.asterix.external.util.aws.AwsUtils.CloseableAwsClients; +import org.apache.asterix.external.util.aws.s3.S3Utils; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.io.FileReference; import org.apache.hyracks.api.util.IoUtil; @@ -382,6 +383,7 @@ builder.credentialsProvider(credentialsProvider); builder.region(Region.of(config.getRegion())); builder.forcePathStyle(config.isForcePathStyle()); + S3Utils.applyChecksumBehavior(builder, config.getChecksumBehavior()); AttributeMap.Builder customHttpConfigBuilder = AttributeMap.builder(); if (config.getRequestsMaxHttpConnections() > 0) { diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java index 98c54f3..2de95f6 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java @@ -41,6 +41,7 @@ import org.apache.hyracks.api.config.IOptionType; import org.apache.hyracks.api.config.Section; import org.apache.hyracks.cloud.io.ICloudProperties; +import org.apache.hyracks.cloud.io.S3ChecksumBehavior; import org.apache.hyracks.util.StorageUtil; public class CloudProperties extends AbstractProperties implements ICloudProperties { @@ -100,7 +101,11 @@ CLOUD_STORAGE_S3_USE_ROUND_ROBIN_DNS_RESOLVER(BOOLEAN, false), CLOUD_STORAGE_S3_ACCESS_KEY_ID(STRING, (String) null), CLOUD_STORAGE_S3_SECRET_ACCESS_KEY(STRING, (String) null), - CLOUD_STORAGE_AZURE_CLIENT_ID(STRING, (String) null),; + CLOUD_STORAGE_AZURE_CLIENT_ID(STRING, (String) null), + CLOUD_STORAGE_S3_CHECKSUM_BEHAVIOR(STRING, (Function<IApplicationConfig, String>) app -> { + String endpoint = app.getString(CLOUD_STORAGE_ENDPOINT); + return S3ChecksumBehavior.defaultForEndpoint(endpoint).name().toLowerCase(); + }); private final IOptionType interpreter; private final Object defaultValue; @@ -151,6 +156,7 @@ case CLOUD_STORAGE_S3_ACCESS_KEY_ID: case CLOUD_STORAGE_S3_SECRET_ACCESS_KEY: case CLOUD_STORAGE_AZURE_CLIENT_ID: + case CLOUD_STORAGE_S3_CHECKSUM_BEHAVIOR: return Section.COMMON; default: throw new IllegalStateException("NYI: " + this); @@ -240,6 +246,13 @@ return "The S3 secret access key for static credential authentication (defaults to null, which indicates to use default credential chain)"; case CLOUD_STORAGE_AZURE_CLIENT_ID: return "The Azure user managed identity client ID (defaults to null, which takes the system managed identity client ID)"; + case CLOUD_STORAGE_S3_CHECKSUM_BEHAVIOR: + return "The checksum behavior for S3 requests and responses. Accepted values: " + + "'when_required' (only checksums mandated by the operation), " + + "'when_supported' (checksums on all eligible operations, SDK >= 2.30 default), " + + "'auto' (no explicit override, defer to SDK default). " + + "Defaults to 'when_required' when a custom endpoint is configured " + + "(S3-compatible stores), 'auto' for native AWS S3."; default: throw new IllegalStateException("NYI: " + this); } @@ -260,6 +273,9 @@ if (this == CLOUD_STORAGE_S3_PARALLEL_DOWNLOADER_CLIENT_TYPE) { return "crt if no custom endpoint is set; async otherwise"; } + if (this == CLOUD_STORAGE_S3_CHECKSUM_BEHAVIOR) { + return "when_required if a custom endpoint is set; auto otherwise"; + } return IOption.super.usageDefaultOverride(accessor, optionPrinter); } @@ -406,4 +422,9 @@ public String getAzureClientId() { return accessor.getString(Option.CLOUD_STORAGE_AZURE_CLIENT_ID); } + + // Parses the stored string value to the S3ChecksumBehavior enum + public S3ChecksumBehavior getS3ChecksumBehavior() { + return S3ChecksumBehavior.fromString(accessor.getString(Option.CLOUD_STORAGE_S3_CHECKSUM_BEHAVIOR)); + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java index 7556187..352af33 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java @@ -89,6 +89,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.UUID; import java.util.function.BiPredicate; import java.util.regex.Matcher; @@ -114,7 +115,8 @@ import org.apache.hyracks.api.exceptions.IWarningCollector; import org.apache.hyracks.api.exceptions.SourceLocation; import org.apache.hyracks.api.exceptions.Warning; -import org.apache.hyracks.util.annotations.AiProvenance; +import org.apache.hyracks.cloud.io.ICloudProperties; +import org.apache.hyracks.cloud.io.S3ChecksumBehavior; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -193,23 +195,33 @@ } else if (certificates != null && !certificates.isBlank()) { builder.httpClient(createHttpClient(certificates)); } - if (serviceEndpoint != null) { - configureS3CompatibleSettings(serviceEndpoint, builder); + ICloudProperties cloudProperties = appCtx.getCloudProperties(); + S3ChecksumBehavior checksumBehavior; + if (cloudProperties != null) { + checksumBehavior = cloudProperties.getS3ChecksumBehavior(); + } else { + // No cloud properties (external data context): default based on whether a custom endpoint is in use + checksumBehavior = S3ChecksumBehavior.defaultForEndpoint(serviceEndpoint); } + applyChecksumBehavior(builder, checksumBehavior); awsClients.setConsumingClient(builder.build()); return awsClients; } - @AiProvenance(agent = AiProvenance.Agent.CLAUDE_SONNET_4_6, tool = AiProvenance.Tool.GITHUB_COPILOT) - private static void configureS3CompatibleSettings(String serviceEndpoint, S3ClientBuilder builder) { - // AWS SDK 2.43+ sends CRC64NVME request checksums by default for all eligible operations. - // S3-compatible endpoints (non-AWS) and older mock servers do not understand this header and - // may reject or mishandle requests, returning empty or error responses. When a custom endpoint - // is configured (i.e. not talking to real AWS S3), disable automatic checksum calculation so - // only operations that explicitly require a checksum will include one. - if (serviceEndpoint != null) { - builder.requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED); - builder.responseChecksumValidation(ResponseChecksumValidation.WHEN_REQUIRED); + public static void applyChecksumBehavior(S3ClientBuilder builder, S3ChecksumBehavior behavior) { + Objects.requireNonNull(behavior, "checksumBehavior"); + switch (behavior) { + case WHEN_REQUIRED: + builder.requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED); + builder.responseChecksumValidation(ResponseChecksumValidation.WHEN_REQUIRED); + break; + case WHEN_SUPPORTED: + builder.requestChecksumCalculation(RequestChecksumCalculation.WHEN_SUPPORTED); + builder.responseChecksumValidation(ResponseChecksumValidation.WHEN_SUPPORTED); + break; + case AUTO: + // leave SDK defaults untouched + break; } } diff --git a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/ICloudProperties.java b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/ICloudProperties.java index 103c1b3..abbaf59 100644 --- a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/ICloudProperties.java +++ b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/ICloudProperties.java @@ -94,5 +94,10 @@ String getS3SecretAccessKey(); + /** + * Valid values for {@link #getS3ChecksumBehavior()}. + */ + S3ChecksumBehavior getS3ChecksumBehavior(); + String getAzureClientId(); } diff --git a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/S3ChecksumBehavior.java b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/S3ChecksumBehavior.java new file mode 100644 index 0000000..0ab7e1b --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/io/S3ChecksumBehavior.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hyracks.cloud.io; + +import static org.apache.hyracks.util.annotations.AiProvenance.Agent.CLAUDE_SONNET_4_6; +import static org.apache.hyracks.util.annotations.AiProvenance.ContributionKind.GENERATED; +import static org.apache.hyracks.util.annotations.AiProvenance.Tool.GITHUB_COPILOT; + +import org.apache.hyracks.util.annotations.AiProvenance; + +/** + * Controls the AWS SDK v2 checksum behavior for S3 clients. + * Introduced in SDK 2.30.0 where the default changed to {@link #WHEN_SUPPORTED}, + * which is not supported by all S3-compatible storage solutions. + */ +@AiProvenance(agent = CLAUDE_SONNET_4_6, tool = GITHUB_COPILOT, contributionKind = GENERATED) +public enum S3ChecksumBehavior { + /** Calculate/validate checksums only when required by the operation. Safe for S3-compatible endpoints. */ + WHEN_REQUIRED, + /** Calculate/validate checksums whenever supported — the SDK default since 2.30.0. */ + WHEN_SUPPORTED, + /** Leave the SDK defaults untouched. Appropriate for native AWS S3. */ + AUTO; + + /** Parses the config string (case-insensitive). Returns {@code null} if the input is {@code null}. */ + public static S3ChecksumBehavior fromString(String s) { + if (s == null) { + return null; + } + for (S3ChecksumBehavior b : values()) { + if (b.name().equalsIgnoreCase(s)) { + return b; + } + } + throw new IllegalArgumentException( + "Unrecognized S3 checksum behavior: '" + s + "'. Valid values: when_required, when_supported, auto"); + } + + /** + * Returns the appropriate default based on whether a custom S3-compatible endpoint is configured. + * When an endpoint is present the SDK's newer checksum defaults may not be supported, so {@link #WHEN_REQUIRED} + * is used. When no custom endpoint is configured (native AWS S3) {@link #AUTO} defers to SDK defaults. + */ + public static S3ChecksumBehavior defaultForEndpoint(String endpoint) { + return (endpoint == null || endpoint.isEmpty()) ? AUTO : WHEN_REQUIRED; + } +} -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21198?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: lumina Gerrit-Change-Id: If6618d3a336e9bf134efb1f219660421edc27c43 Gerrit-Change-Number: 21198 Gerrit-PatchSet: 8 Gerrit-Owner: Ritik Raj <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Michael Blow <[email protected]> Gerrit-Reviewer: Ritik Raj <[email protected]>
