steveloughran commented on code in PR #7396: URL: https://github.com/apache/hadoop/pull/7396#discussion_r2011894842
########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChecksumSupport.java: ########## @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.util.Set; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; +import software.amazon.awssdk.services.s3.model.ChecksumAlgorithm; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import static org.apache.hadoop.fs.s3a.Constants.CHECKSUM_ALGORITHM; +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * Utility class to support operations on S3 object checksum. + */ +public final class ChecksumSupport { + + private ChecksumSupport() { + } + + /** + * Checksum algorithms that are supported by S3A. + */ + private static final Set<ChecksumAlgorithm> SUPPORTED_CHECKSUM_ALGORITHMS = ImmutableSet.of( + ChecksumAlgorithm.CRC32, + ChecksumAlgorithm.CRC32_C, + ChecksumAlgorithm.SHA1, + ChecksumAlgorithm.SHA256); + + /** + * Get the checksum algorithm to be used for data integrity check of the objects in S3. + * This operation includes validating if the provided value is a supported checksum algorithm. + * @param conf configuration to scan + * @return the checksum algorithm to be passed on S3 requests + * @throws IllegalArgumentException if the checksum algorithm is not known or not supported + */ + public static ChecksumAlgorithm getChecksumAlgorithm(Configuration conf) { + final String checksumAlgorithmString = conf.getTrimmed(CHECKSUM_ALGORITHM); + if (StringUtils.isBlank(checksumAlgorithmString)) { + return null; + } + final ChecksumAlgorithm checksumAlgorithm = Review Comment: use the (new) method `ConfigurationHelper.resolveEnum()` here; it does case conversion and gives you a fallback. Trying to get the right case for a simple enum config option is a real PITA ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AChecksum.java: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; Review Comment: can you use assertJ assertions; we are moving to Junit5 and adopting Assertj now avoids future pain. Plus is error messages are so good ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java: ########## @@ -1734,6 +1734,15 @@ private Constants() { */ public static final boolean CHECKSUM_VALIDATION_DEFAULT = false; + /** + * Indicates the algorithm used to create the checksum for the object + * to be uploaded to S3. Unset by default. It supports the following values: + * 'CRC32', 'CRC32C', 'SHA1', and 'SHA256' + * value:{@value} + */ + public static final String CHECKSUM_ALGORITHM = Review Comment: let's call this fs.s3a.create.checksum.algorithm, as there are lots of fs.s3a.create. items and it makes clear that this is what the checksum is exclusively for. We are having *a lot* of checksum problems with the 2.30 SDK and I don't want confusion ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/files/TestUploadEtag.java: ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.commit.files; + +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.s3.model.CompletedPart; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class TestUploadEtag { Review Comment: can you use assertJ asserts, with descriptions on the assertNull() assertions ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AChecksum.java: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; +import software.amazon.awssdk.services.s3.model.ChecksumAlgorithm; +import software.amazon.awssdk.services.s3.model.ChecksumMode; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants; +import org.apache.hadoop.fs.s3a.impl.ChecksumSupport; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.rm; +import static org.apache.hadoop.fs.s3a.Constants.CHECKSUM_ALGORITHM; + +/** + * Tests S3 checksum algorithm. + * If CHECKSUM_ALGORITHM config is not set in auth-keys.xml, + * SHA256 algorithm will be picked. + */ +public class ITestS3AChecksum extends AbstractS3ATestBase { + + private static final ChecksumAlgorithm DEFAULT_CHECKSUM_ALGORITHM = ChecksumAlgorithm.SHA256; + + private ChecksumAlgorithm checksumAlgorithm; + + private static final int[] SIZES = { + 1, 2, 3, 4, 5, 254, 255, 256, 257, 2 ^ 12 - 1 + }; + + @Override + protected Configuration createConfiguration() { + final Configuration conf = super.createConfiguration(); + S3ATestUtils.disableFilesystemCaching(conf); + checksumAlgorithm = ChecksumSupport.getChecksumAlgorithm(conf); + if (checksumAlgorithm == null) { + checksumAlgorithm = DEFAULT_CHECKSUM_ALGORITHM; + LOG.info("No checksum algorithm found in configuration, will use default {}", + checksumAlgorithm); + conf.set(CHECKSUM_ALGORITHM, checksumAlgorithm.toString()); + } + conf.setBoolean(S3AAuditConstants.REJECT_OUT_OF_SPAN_OPERATIONS, false); + return conf; + } + + @Test + public void testChecksum() throws IOException { + for (int size : SIZES) { + validateChecksumForFilesize(size); + } + } + + private void validateChecksumForFilesize(int len) throws IOException { + describe("Create a file of size " + len); + String src = String.format("%s-%04x", methodName.getMethodName(), len); + Path path = writeThenReadFile(src, len); + assertChecksum(path); + rm(getFileSystem(), path, false, false); + } + + private void assertChecksum(Path path) throws IOException { + final String key = getFileSystem().pathToKey(path); + HeadObjectRequest.Builder requestBuilder = getFileSystem().getRequestFactory() + .newHeadObjectRequestBuilder(key) + .checksumMode(ChecksumMode.ENABLED); + HeadObjectResponse headObject = getFileSystem().getS3AInternals() + .getAmazonS3Client("Call head object with checksum enabled") + .headObject(requestBuilder.build()); + switch (checksumAlgorithm) { + case CRC32: + Assert.assertNotNull(headObject.checksumCRC32()); Review Comment: can we have text for each assertions, e ``` Assertions.assertThat(headObject.headObject.checksumCRC32C()) .describedAs("headObject.checksumCRC32C()") .isNotNull()) ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AChecksum.java: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; +import software.amazon.awssdk.services.s3.model.ChecksumAlgorithm; +import software.amazon.awssdk.services.s3.model.ChecksumMode; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants; +import org.apache.hadoop.fs.s3a.impl.ChecksumSupport; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.rm; +import static org.apache.hadoop.fs.s3a.Constants.CHECKSUM_ALGORITHM; + +/** + * Tests S3 checksum algorithm. + * If CHECKSUM_ALGORITHM config is not set in auth-keys.xml, + * SHA256 algorithm will be picked. + */ +public class ITestS3AChecksum extends AbstractS3ATestBase { + + private static final ChecksumAlgorithm DEFAULT_CHECKSUM_ALGORITHM = ChecksumAlgorithm.SHA256; + + private ChecksumAlgorithm checksumAlgorithm; + + private static final int[] SIZES = { + 1, 2, 3, 4, 5, 254, 255, 256, 257, 2 ^ 12 - 1 + }; + + @Override + protected Configuration createConfiguration() { + final Configuration conf = super.createConfiguration(); + S3ATestUtils.disableFilesystemCaching(conf); + checksumAlgorithm = ChecksumSupport.getChecksumAlgorithm(conf); Review Comment: add ``` S3ATestUtils.removeBaseAndBucketOverrides(conf, CHECKSUM_ALGORITHM, REJECT_OUT_OF_SPAN_OPERATIONS) ``` needed to ensuer that bucket overrides don't cause test failures -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org