steveloughran commented on code in PR #6884: URL: https://github.com/apache/hadoop/pull/6884#discussion_r1819181162
########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BaseS3AFileSystemHandler.java: ########## @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.util.ReflectionUtils; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Constants.S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Statistic.CLIENT_SIDE_ENCRYPTION_ENABLED; + +/** + * An implementation of the {@link S3AFileSystemHandler} interface. + * This handles certain filesystem operations when s3 client side encryption is disabled. + */ +public class BaseS3AFileSystemHandler implements S3AFileSystemHandler { + + /** + * Constructs a new instance of {@code BaseS3AFileSystemHandler}. + */ + public BaseS3AFileSystemHandler() { + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @param includeSelf a boolean indicating whether the path itself should + * be included in the listing + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path, boolean includeSelf) { + return includeSelf + ? Listing.ACCEPT_ALL_BUT_S3N + : new Listing.AcceptAllButSelfAndS3nDirs(path); + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path) { + return new Listing.AcceptFilesOnly(path); + } + + /** + * Retrieves an object from the S3. + * + * @param store The S3AStore object representing the S3 bucket. + * @param request The GetObjectRequest containing the details of the object to retrieve. + * @param factory The RequestFactory used to create the GetObjectRequest. + * @return A ResponseInputStream containing the GetObjectResponse. + * @throws IOException If an error occurs while retrieving the object. + */ + @Override + public ResponseInputStream<GetObjectResponse> getObject(S3AStore store, GetObjectRequest request, Review Comment: move send arg onto next line ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BaseS3AFileSystemHandler.java: ########## @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.util.ReflectionUtils; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Constants.S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Statistic.CLIENT_SIDE_ENCRYPTION_ENABLED; + +/** + * An implementation of the {@link S3AFileSystemHandler} interface. + * This handles certain filesystem operations when s3 client side encryption is disabled. + */ +public class BaseS3AFileSystemHandler implements S3AFileSystemHandler { + + /** + * Constructs a new instance of {@code BaseS3AFileSystemHandler}. + */ + public BaseS3AFileSystemHandler() { + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @param includeSelf a boolean indicating whether the path itself should + * be included in the listing + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path, boolean includeSelf) { + return includeSelf + ? Listing.ACCEPT_ALL_BUT_S3N + : new Listing.AcceptAllButSelfAndS3nDirs(path); + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path) { + return new Listing.AcceptFilesOnly(path); + } + + /** + * Retrieves an object from the S3. + * + * @param store The S3AStore object representing the S3 bucket. + * @param request The GetObjectRequest containing the details of the object to retrieve. + * @param factory The RequestFactory used to create the GetObjectRequest. + * @return A ResponseInputStream containing the GetObjectResponse. + * @throws IOException If an error occurs while retrieving the object. + */ + @Override + public ResponseInputStream<GetObjectResponse> getObject(S3AStore store, GetObjectRequest request, + RequestFactory factory) throws IOException { + return store.getOrCreateS3Client().getObject(request); + } + + /** + * Set the client side encryption gauge to 0. + * @param ioStatisticsStore The IOStatisticsStore of the filesystem. + */ + @Override + public void setCSEGauge(IOStatisticsStore ioStatisticsStore) { + ioStatisticsStore.setGauge(CLIENT_SIDE_ENCRYPTION_ENABLED.getSymbol(), 0L); + } + + /** + * Retrieves the client-side encryption materials for the given bucket and encryption algorithm. Review Comment: nit: line width ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CSEUtils.java: ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.util.Preconditions; + +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; + +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CSE_CUSTOM_KEYRING_CLASS_NAME; +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.CSE_CUSTOM; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.CSE_KMS; +import static org.apache.hadoop.fs.s3a.S3AUtils.formatRange; +import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.CRYPTO_CEK_ALGORITHM; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.UNENCRYPTED_CONTENT_LENGTH; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; + +/** + * S3 client side encryption (CSE) utility class. + */ [email protected] Review Comment: private ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java: ########## @@ -373,7 +380,7 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void * @param conf config to build the URI from. * @return an endpoint uri */ - private static URI getS3Endpoint(String endpoint, final Configuration conf) { + public static URI getS3Endpoint(String endpoint, final Configuration conf) { Review Comment: make protected ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java: ########## @@ -464,9 +466,10 @@ private boolean buildNextStatusBatch(S3ListResult objects) { // Skip over keys that are ourselves and old S3N _$folder$ files if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) { S3AFileStatus status = createFileStatus(keyPath, s3Object, - listingOperationCallbacks.getDefaultBlockSize(keyPath), - getStoreContext().getUsername(), - s3Object.eTag(), null, isCSEEnabled); + listingOperationCallbacks.getDefaultBlockSize(keyPath), Review Comment: let move username and blocksize retrievel out of the for() loop...they don't change after all ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java: ########## @@ -763,6 +766,61 @@ public boolean accept(FileStatus status) { } } + /** + * Accept all entries except the base path and those which map to S3N + * pseudo directory markers and CSE instruction file. + */ + public static class AcceptFilesOnlyExceptCSEInstructionFile implements FileStatusAcceptor { Review Comment: * cut the S3N support in this one, and make the class final * move to CSEV1CompatibleS3AFileSystemHandler class ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryption.java: ########## @@ -266,6 +272,211 @@ public void testEncryptionEnabledAndDisabledFS() throws Exception { } } + /** + * Test to check if unencrypted objects are read with V1 client compatibility. + * @throws IOException + * @throws Exception + */ + @Test + public void testUnencryptedObjectReadWithV1CompatibilityConfig() throws Exception { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem nonCseFs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + nonCseFs.initialize(getFileSystem().getUri(), conf); Review Comment: try/close clause must be active from here ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryption.java: ########## @@ -266,6 +272,211 @@ public void testEncryptionEnabledAndDisabledFS() throws Exception { } } + /** + * Test to check if unencrypted objects are read with V1 client compatibility. + * @throws IOException + * @throws Exception + */ + @Test + public void testUnencryptedObjectReadWithV1CompatibilityConfig() throws Exception { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem nonCseFs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + nonCseFs.initialize(getFileSystem().getUri(), conf); + + Path file = path(getMethodName()); + // write unencrypted file + ContractTestUtils.writeDataset(nonCseFs, file, new byte[SMALL_FILE_SIZE], + SMALL_FILE_SIZE, SMALL_FILE_SIZE, true); + + Configuration cseConf = new Configuration(getConfiguration()); + cseConf.setBoolean(S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED, true); + // create filesystem with cse enabled and v1 compatibility. + S3AFileSystem cseFs = createTestFileSystem(cseConf); + cseFs.initialize(getFileSystem().getUri(), cseConf); + + // read unencrypted file. It should not throw any exception. + try (FSDataInputStream in = cseFs.open(file)) { + in.read(new byte[SMALL_FILE_SIZE]); + } finally { + // close the filesystem + nonCseFs.close(); + cseFs.close(); + } + } + + /** + * Test to check if file name with suffix ".instruction" is ignored with V1 compatibility. + * @throws IOException + */ + @Test + public void testSkippingCSEInstructionFileWithV1Compatibility() throws IOException { Review Comment: where you should add the new delete and rename tests. create the file in subdir 1 under methodPath, rename it, verify source dir is gone. delete rename target dir, again assert not found. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManager.java: ########## @@ -61,6 +61,8 @@ S3TransferManager getOrCreateTransferManager() */ S3AsyncClient getOrCreateAsyncClient() throws IOException; + S3Client getOrCreateUnencryptedS3Client() throws IOException; Review Comment: pull javadocs up from implementation ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AFileSystemHandler.java: ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; + +/** + * An interface that defines the contract for handling certain filesystem operations. + */ +public interface S3AFileSystemHandler { Review Comment: Not sure this is the right name, but I can't think of anything obvious to use instead. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CSEUtils.java: ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.util.Preconditions; + +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; + +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CSE_CUSTOM_KEYRING_CLASS_NAME; +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.CSE_CUSTOM; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.CSE_KMS; +import static org.apache.hadoop.fs.s3a.S3AUtils.formatRange; +import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.CRYPTO_CEK_ALGORITHM; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.UNENCRYPTED_CONTENT_LENGTH; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; + +/** + * S3 client side encryption (CSE) utility class. + */ [email protected] [email protected] +public final class CSEUtils { + + private CSEUtils() { + } + + /** + * Checks if the file suffix ends CSE file suffix. + * {@link org.apache.hadoop.fs.s3a.Constants#S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX} + * when the config + * @param key file name + * @return true if file name ends with CSE instruction file suffix + */ + public static boolean isCSEInstructionFile(String key) { + return key.endsWith(S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX); + } + + /** + * Checks if CSE-KMS or CSE-CUSTOM is set. + * @param encryptionMethod type of encryption used + * @return true if encryption method is CSE-KMS or CSE-CUSTOM + */ + public static boolean isCSEEnabled(String encryptionMethod) { + return CSE_KMS.getMethod().equals(encryptionMethod) || + CSE_CUSTOM.getMethod().equals(encryptionMethod); + } + + /** + * Checks if a given S3 object is encrypted or not by checking following two conditions + * 1. if object metadata contains x-amz-cek-alg + * 2. if instruction file is present + * + * @param s3Client S3 client + * @param factory S3 request factory + * @param key key value of the s3 object + * @return true if S3 object is encrypted + */ + public static boolean isObjectEncrypted(S3Client s3Client, RequestFactory factory, String key) { Review Comment: have it take S3AStore, so when we push all s3 api calls into the interface, it'll be easier to move. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CSEV1CompatibleS3AFileSystemHandler.java: ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.util.ReflectionUtils; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Constants.S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.impl.CSEUtils.isObjectEncrypted; + +/** + * An extension of the {@link CSES3AFileSystemHandler} class. + * This handles certain file system operations when client-side encryption is enabled with v1 client + * compatibility. + * {@link org.apache.hadoop.fs.s3a.Constants#S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED}. + */ +public class CSEV1CompatibleS3AFileSystemHandler extends CSES3AFileSystemHandler { + + /** + * Constructs a new instance of {@code CSEV1CompatibleS3AFileSystemHandler}. + */ + public CSEV1CompatibleS3AFileSystemHandler() { + } + + /** + * {@inheritDoc} + * + * <p>This implementation returns a {@link Listing.AcceptAllButS3nDirsAndCSEInstructionFile} + * or {@link Listing.AcceptAllButSelfAndS3nDirsAndCSEInstructionFile} object + * based on the value of the {@code includeSelf} parameter. + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path, boolean includeSelf) { + return includeSelf + ? new Listing.AcceptAllButS3nDirsAndCSEInstructionFile() + : new Listing.AcceptAllButSelfAndS3nDirsAndCSEInstructionFile(path); + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path) { + return new Listing.AcceptFilesOnlyExceptCSEInstructionFile(path); + } + + /** + * Retrieves an object from the S3. + * If the S3 object is encrypted, it uses the encrypted S3 client to retrieve the object else + * it uses the unencrypted S3 client. + * + * @param store The S3AStore object representing the S3 bucket. + * @param request The GetObjectRequest containing the details of the object to retrieve. + * @param factory The RequestFactory used to create the GetObjectRequest. + * @return A ResponseInputStream containing the GetObjectResponse. + * @throws IOException If an error occurs while retrieving the object. + */ + @Override + public ResponseInputStream<GetObjectResponse> getObject(S3AStore store, GetObjectRequest request, + RequestFactory factory) throws IOException { + boolean isEncrypted = isObjectEncrypted(store.getOrCreateS3Client(), factory, request.key()); + return isEncrypted ? store.getOrCreateS3Client().getObject(request) + : store.getOrCreateUnencryptedS3Client().getObject(request); + } + + /** + * Retrieves the S3 client factory for the specified class and configuration. + * + * @param conf The Hadoop configuration object. + * @return The S3 client factory instance. + */ + @Override + public S3ClientFactory getUnencryptedS3ClientFactory(Configuration conf) { + Class<? extends S3ClientFactory> s3ClientFactoryClass = conf.getClass( + S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, + S3ClientFactory.class); + return ReflectionUtils.newInstance(s3ClientFactoryClass, conf); + } + + + /** + * Retrieves the unpadded size of an object in the S3 bucket. + * + * @param key The key (path) of the object in the S3 bucket. + * @param length The length of the object. + * @param store The S3AStore object representing the S3 bucket. + * @param bucket The name of the S3 bucket. + * @param factory The RequestFactory used to create the HeadObjectRequest. + * @param response The HeadObjectResponse containing the metadata of the object. + * @return The unpadded size of the object in bytes. + * @throws IOException If an error occurs while retrieving the object size. + */ + @Override + public long getS3ObjectSize(String key, long length, S3AStore store, String bucket, + RequestFactory factory, HeadObjectResponse response) throws IOException { + return CSEUtils.getUnPaddedObjectLength(store.getOrCreateS3Client(), bucket, Review Comment: pass the S3AStore into CSEUtils, have it create the client. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java: ########## @@ -75,18 +77,18 @@ public class Listing extends AbstractStoreOperation { private static final Logger LOG = S3AFileSystem.LOG; - private final boolean isCSEEnabled; + private final S3AFileSystemHandler handler; - static final FileStatusAcceptor ACCEPT_ALL_BUT_S3N = + public static final FileStatusAcceptor ACCEPT_ALL_BUT_S3N = new AcceptAllButS3nDirs(); private final ListingOperationCallbacks listingOperationCallbacks; public Listing(ListingOperationCallbacks listingOperationCallbacks, - StoreContext storeContext) { + StoreContext storeContext, S3AFileSystemHandler handler) { Review Comment: nit: on next line now things are already being chopped down ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AFileSystemHandler.java: ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; + +/** + * An interface that defines the contract for handling certain filesystem operations. Review Comment: how about "helps map from object store semantics to that of the fileystem, with special support for encrypted stores" ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java: ########## @@ -160,11 +161,17 @@ public S3AsyncClient createS3AsyncClient( .thresholdInBytes(parameters.getMultiPartThreshold()) .build(); - return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket) - .httpClientBuilder(httpClientBuilder) - .multipartConfiguration(multipartConfiguration) - .multipartEnabled(parameters.isMultipartCopy()) - .build(); + S3AsyncClientBuilder s3AsyncClientBuilder = + configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket) + .httpClientBuilder(httpClientBuilder); + + // TODO: Enable multi part upload with cse once it is available. Review Comment: ok, so no MPE right now. Is this planned? ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CSEUtils.java: ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.util.Preconditions; + +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; + +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CSE_CUSTOM_KEYRING_CLASS_NAME; +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.CSE_CUSTOM; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.CSE_KMS; +import static org.apache.hadoop.fs.s3a.S3AUtils.formatRange; +import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.CRYPTO_CEK_ALGORITHM; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.UNENCRYPTED_CONTENT_LENGTH; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; + +/** + * S3 client side encryption (CSE) utility class. + */ [email protected] [email protected] +public final class CSEUtils { + + private CSEUtils() { + } + + /** + * Checks if the file suffix ends CSE file suffix. + * {@link org.apache.hadoop.fs.s3a.Constants#S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX} + * when the config + * @param key file name + * @return true if file name ends with CSE instruction file suffix + */ + public static boolean isCSEInstructionFile(String key) { + return key.endsWith(S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX); + } + + /** + * Checks if CSE-KMS or CSE-CUSTOM is set. + * @param encryptionMethod type of encryption used + * @return true if encryption method is CSE-KMS or CSE-CUSTOM + */ + public static boolean isCSEEnabled(String encryptionMethod) { + return CSE_KMS.getMethod().equals(encryptionMethod) || + CSE_CUSTOM.getMethod().equals(encryptionMethod); + } + + /** + * Checks if a given S3 object is encrypted or not by checking following two conditions + * 1. if object metadata contains x-amz-cek-alg + * 2. if instruction file is present + * + * @param s3Client S3 client + * @param factory S3 request factory + * @param key key value of the s3 object + * @return true if S3 object is encrypted + */ + public static boolean isObjectEncrypted(S3Client s3Client, RequestFactory factory, String key) { + HeadObjectRequest.Builder requestBuilder = factory.newHeadObjectRequestBuilder(key); + HeadObjectResponse headObjectResponse = s3Client.headObject(requestBuilder.build()); + if (headObjectResponse.hasMetadata() && + headObjectResponse.metadata().get(CRYPTO_CEK_ALGORITHM) != null) { + return true; + } + HeadObjectRequest.Builder instructionFileRequestBuilder = + factory.newHeadObjectRequestBuilder(key + S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX); + try { + s3Client.headObject(instructionFileRequestBuilder.build()); Review Comment: we should somehow be tracking invocation count/performance ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java: ########## @@ -464,9 +466,10 @@ private boolean buildNextStatusBatch(S3ListResult objects) { // Skip over keys that are ourselves and old S3N _$folder$ files Review Comment: we should get ride of that $folder$ stuff these days, shouldn't we? not this PR though ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java: ########## @@ -447,7 +449,7 @@ private boolean requestNextBatch() throws IOException { * @param objects the next object listing * @return true if this added any entries after filtering */ - private boolean buildNextStatusBatch(S3ListResult objects) { + private boolean buildNextStatusBatch(S3ListResult objects) throws IOException { Review Comment: upate javadocs to indicate this can happen with CSE only ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalListingOperationCallbacks.java: ########## @@ -30,6 +30,8 @@ import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.fs.store.audit.AuditSpan; +import software.amazon.awssdk.services.s3.model.S3Object; Review Comment: nit: move above the asf block ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CSEV1CompatibleS3AFileSystemHandler.java: ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.util.ReflectionUtils; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Constants.S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.impl.CSEUtils.isObjectEncrypted; + +/** + * An extension of the {@link CSES3AFileSystemHandler} class. + * This handles certain file system operations when client-side encryption is enabled with v1 client + * compatibility. + * {@link org.apache.hadoop.fs.s3a.Constants#S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED}. + */ +public class CSEV1CompatibleS3AFileSystemHandler extends CSES3AFileSystemHandler { + + /** + * Constructs a new instance of {@code CSEV1CompatibleS3AFileSystemHandler}. + */ + public CSEV1CompatibleS3AFileSystemHandler() { + } + + /** + * {@inheritDoc} + * + * <p>This implementation returns a {@link Listing.AcceptAllButS3nDirsAndCSEInstructionFile} + * or {@link Listing.AcceptAllButSelfAndS3nDirsAndCSEInstructionFile} object + * based on the value of the {@code includeSelf} parameter. + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path, boolean includeSelf) { + return includeSelf + ? new Listing.AcceptAllButS3nDirsAndCSEInstructionFile() + : new Listing.AcceptAllButSelfAndS3nDirsAndCSEInstructionFile(path); + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path) { + return new Listing.AcceptFilesOnlyExceptCSEInstructionFile(path); + } + + /** + * Retrieves an object from the S3. + * If the S3 object is encrypted, it uses the encrypted S3 client to retrieve the object else + * it uses the unencrypted S3 client. + * + * @param store The S3AStore object representing the S3 bucket. + * @param request The GetObjectRequest containing the details of the object to retrieve. + * @param factory The RequestFactory used to create the GetObjectRequest. + * @return A ResponseInputStream containing the GetObjectResponse. + * @throws IOException If an error occurs while retrieving the object. + */ + @Override + public ResponseInputStream<GetObjectResponse> getObject(S3AStore store, GetObjectRequest request, + RequestFactory factory) throws IOException { + boolean isEncrypted = isObjectEncrypted(store.getOrCreateS3Client(), factory, request.key()); + return isEncrypted ? store.getOrCreateS3Client().getObject(request) Review Comment: second HEAD request ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java: ########## @@ -464,9 +466,10 @@ private boolean buildNextStatusBatch(S3ListResult objects) { // Skip over keys that are ourselves and old S3N _$folder$ files if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) { S3AFileStatus status = createFileStatus(keyPath, s3Object, - listingOperationCallbacks.getDefaultBlockSize(keyPath), - getStoreContext().getUsername(), - s3Object.eTag(), null, isCSEEnabled); + listingOperationCallbacks.getDefaultBlockSize(keyPath), + getStoreContext().getUsername(), + s3Object.eTag(), null, Review Comment: move null to a new line ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CSEV1CompatibleS3AFileSystemHandler.java: ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Listing; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.util.ReflectionUtils; + +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.Constants.S3_CLIENT_FACTORY_IMPL; +import static org.apache.hadoop.fs.s3a.impl.CSEUtils.isObjectEncrypted; + +/** + * An extension of the {@link CSES3AFileSystemHandler} class. + * This handles certain file system operations when client-side encryption is enabled with v1 client + * compatibility. + * {@link org.apache.hadoop.fs.s3a.Constants#S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED}. + */ +public class CSEV1CompatibleS3AFileSystemHandler extends CSES3AFileSystemHandler { + + /** + * Constructs a new instance of {@code CSEV1CompatibleS3AFileSystemHandler}. + */ + public CSEV1CompatibleS3AFileSystemHandler() { + } + + /** + * {@inheritDoc} + * + * <p>This implementation returns a {@link Listing.AcceptAllButS3nDirsAndCSEInstructionFile} + * or {@link Listing.AcceptAllButSelfAndS3nDirsAndCSEInstructionFile} object + * based on the value of the {@code includeSelf} parameter. + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path, boolean includeSelf) { + return includeSelf + ? new Listing.AcceptAllButS3nDirsAndCSEInstructionFile() + : new Listing.AcceptAllButSelfAndS3nDirsAndCSEInstructionFile(path); + } + + /** + * Returns a {@link Listing.FileStatusAcceptor} object. + * That determines which files and directories should be included in a listing operation. + * + * @param path the path for which the listing is being performed + * @return a {@link Listing.FileStatusAcceptor} object + */ + @Override + public Listing.FileStatusAcceptor getFileStatusAcceptor(Path path) { + return new Listing.AcceptFilesOnlyExceptCSEInstructionFile(path); + } + + /** + * Retrieves an object from the S3. + * If the S3 object is encrypted, it uses the encrypted S3 client to retrieve the object else + * it uses the unencrypted S3 client. + * + * @param store The S3AStore object representing the S3 bucket. + * @param request The GetObjectRequest containing the details of the object to retrieve. + * @param factory The RequestFactory used to create the GetObjectRequest. + * @return A ResponseInputStream containing the GetObjectResponse. + * @throws IOException If an error occurs while retrieving the object. + */ + @Override + public ResponseInputStream<GetObjectResponse> getObject(S3AStore store, GetObjectRequest request, + RequestFactory factory) throws IOException { + boolean isEncrypted = isObjectEncrypted(store.getOrCreateS3Client(), factory, request.key()); Review Comment: HEAD request ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/CustomKeyring.java: ########## @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.util.List; + +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.kms.KmsClient; +import software.amazon.encryption.s3.materials.DecryptionMaterials; +import software.amazon.encryption.s3.materials.EncryptedDataKey; +import software.amazon.encryption.s3.materials.EncryptionMaterials; +import software.amazon.encryption.s3.materials.Keyring; +import software.amazon.encryption.s3.materials.KmsKeyring; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; + +/** + * Custom Keyring implementation by warpping over KmsKeyring. + * This is used for testing {@link ITestS3AClientSideEncryptionCustom}. + */ +public class CustomKeyring implements Keyring { + private final KmsClient kmsClient; + private final Configuration conf; + private final KmsKeyring kmsKeyring; + + + public CustomKeyring(Configuration conf) throws IOException { + this.conf = conf; + String bucket = S3ATestUtils.getFsName(conf); + kmsClient = KmsClient.builder() + .region(Region.of(conf.get(AWS_REGION, AWS_S3_DEFAULT_REGION))) Review Comment: what if is in a different region? ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/CustomKeyring.java: ########## @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.util.List; + +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.kms.KmsClient; +import software.amazon.encryption.s3.materials.DecryptionMaterials; +import software.amazon.encryption.s3.materials.EncryptedDataKey; +import software.amazon.encryption.s3.materials.EncryptionMaterials; +import software.amazon.encryption.s3.materials.Keyring; +import software.amazon.encryption.s3.materials.KmsKeyring; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; + +/** + * Custom Keyring implementation by warpping over KmsKeyring. Review Comment: typo ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryption.java: ########## @@ -266,6 +272,211 @@ public void testEncryptionEnabledAndDisabledFS() throws Exception { } } + /** + * Test to check if unencrypted objects are read with V1 client compatibility. + * @throws IOException + * @throws Exception + */ + @Test + public void testUnencryptedObjectReadWithV1CompatibilityConfig() throws Exception { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem nonCseFs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + nonCseFs.initialize(getFileSystem().getUri(), conf); + + Path file = path(getMethodName()); Review Comment: methodPath() can do this ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryption.java: ########## @@ -266,6 +272,211 @@ public void testEncryptionEnabledAndDisabledFS() throws Exception { } } + /** + * Test to check if unencrypted objects are read with V1 client compatibility. + * @throws IOException + * @throws Exception + */ + @Test + public void testUnencryptedObjectReadWithV1CompatibilityConfig() throws Exception { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem nonCseFs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + nonCseFs.initialize(getFileSystem().getUri(), conf); + + Path file = path(getMethodName()); + // write unencrypted file + ContractTestUtils.writeDataset(nonCseFs, file, new byte[SMALL_FILE_SIZE], + SMALL_FILE_SIZE, SMALL_FILE_SIZE, true); + + Configuration cseConf = new Configuration(getConfiguration()); + cseConf.setBoolean(S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED, true); + // create filesystem with cse enabled and v1 compatibility. + S3AFileSystem cseFs = createTestFileSystem(cseConf); + cseFs.initialize(getFileSystem().getUri(), cseConf); + + // read unencrypted file. It should not throw any exception. + try (FSDataInputStream in = cseFs.open(file)) { + in.read(new byte[SMALL_FILE_SIZE]); + } finally { + // close the filesystem + nonCseFs.close(); + cseFs.close(); + } + } + + /** + * Test to check if file name with suffix ".instruction" is ignored with V1 compatibility. + * @throws IOException + */ + @Test + public void testSkippingCSEInstructionFileWithV1Compatibility() throws IOException { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem fs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + fs.initialize(getFileSystem().getUri(), conf); + + // write file with suffix ".instruction" + Path filePath = path(getMethodName()); + Path file = new Path(filePath, + "file" + S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX); + ContractTestUtils.writeDataset(fs, file, new byte[SMALL_FILE_SIZE], + SMALL_FILE_SIZE, SMALL_FILE_SIZE, true); + + // create filesystem with cse enabled and v1 compatibility. + Configuration cseConf = new Configuration(getConfiguration()); + cseConf.setBoolean(S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED, true); + S3AFileSystem cseFs = createTestFileSystem(cseConf); + cseFs.initialize(getFileSystem().getUri(), cseConf); + try { Review Comment: move to line 332 ########## hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md: ########## @@ -704,10 +704,20 @@ clients where S3-CSE has not been enabled. ### Features -- Supports client side encryption with keys managed in AWS KMS. +- Supports client side encryption with keys managed in AWS KMS (CSE-KMS) +- Supports client side encryption with custom keys by implementing custom [Keyring](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/choose-keyring.html) (CSE-CUSTOM) +- Backward compatible with older encryption clients like `AmazonS3EncryptionClient.java`(V1) and `AmazonS3EncryptionClientV2.java`(V2) - encryption settings propagated into jobs through any issued delegation tokens. - encryption information stored as headers in the uploaded object. +### Compatibility Issues + +- The V1 and V2 clients support reading unencrypted S3 objects, whereas the V3 client does not. In order to read S3 objects in a directory with a mix of encrypted and unencrypted objects. +- Unlike the V2 and V3 clients which always pads 16 bytes, V1 client pads extra bytes to the next multiple of 16. For example if unencrypted object size is 12bytes, V1 client pads extra 4bytes to make it multiple of 16. +- The V1 client supports storing encryption metadata in instruction file. Review Comment: explain this in its own section and consequences: file rename loses this, file delete doesn't clean it up ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java: ########## @@ -200,9 +203,14 @@ public void testSingleObjectDeleteNoPermissionsTranslated() throws Throwable { Path path = requireDefaultExternalData(getConfiguration()); S3AFileSystem fs = (S3AFileSystem) path.getFileSystem( getConfiguration()); - AccessDeniedException aex = intercept(AccessDeniedException.class, + Class exceptionClass = AccessDeniedException.class; + if (CSEUtils.isCSEEnabled(getEncryptionAlgorithm( + getTestBucketName(getConfiguration()), getConfiguration()).getMethod())) { + exceptionClass = AWSClientIOException.class; Review Comment: why a different exception? It should be access denies in both cases, shouldn't it? After all, that is what test is meant to do: delete no permissions are translated. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java: ########## @@ -766,6 +766,40 @@ private Constants() { public static final String S3_ENCRYPTION_CONTEXT = "fs.s3a.encryption.context"; + /** + * Client side encryption (CSE-CUSTOM) with custom cryptographic material manager class name. + * Custom keyring class name for CSE-KMS. + * value:{@value} + */ + public static final String S3_ENCRYPTION_CSE_CUSTOM_KEYRING_CLASS_NAME = + "fs.s3a.encryption.cse.custom.keyring.class.name"; + + /** + * Config to provide backward compatibility with V1 encryption client. + * Enabling this configuration will invoke the followings + * 1. Unencrypted s3 objects will be read using unecrypted/base s3 client when CSE is enabled. + * 2. Size of encrypted object will be calculated using ranged S3 calls. + * 3. While listing s3 objects, encryption metadata file with suffix + * {@link #S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX} will be skipped. + * This is to provide backward compatibility with V1 client. + * value:{@value} + */ + public static final String S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED = + "fs.s3a.encryption.cse.v1.compatibility.enabled"; + + /** + * Default value : {@value}. + */ + public static final boolean S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED_DEFAULT = false; + + /** + * Suffix of instruction file : {@value}. + */ + public static final String S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX = ".instruction"; + + Review Comment: nit: cut the surplus lnes ########## hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md: ########## @@ -704,10 +704,20 @@ clients where S3-CSE has not been enabled. ### Features -- Supports client side encryption with keys managed in AWS KMS. +- Supports client side encryption with keys managed in AWS KMS (CSE-KMS) +- Supports client side encryption with custom keys by implementing custom [Keyring](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/choose-keyring.html) (CSE-CUSTOM) +- Backward compatible with older encryption clients like `AmazonS3EncryptionClient.java`(V1) and `AmazonS3EncryptionClientV2.java`(V2) - encryption settings propagated into jobs through any issued delegation tokens. - encryption information stored as headers in the uploaded object. +### Compatibility Issues + +- The V1 and V2 clients support reading unencrypted S3 objects, whereas the V3 client does not. In order to read S3 objects in a directory with a mix of encrypted and unencrypted objects. +- Unlike the V2 and V3 clients which always pads 16 bytes, V1 client pads extra bytes to the next multiple of 16. For example if unencrypted object size is 12bytes, V1 client pads extra 4bytes to make it multiple of 16. Review Comment: - Unlike the V2 and V3 clients which always append 16 bytes to a file, the, V1 client pads extra bytes to the next multiple of 16. For example if unencrypted object size is 28 bytes, the V1 client pads an extra 4 bytes to make it at multiple of 16. ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryption.java: ########## @@ -266,6 +272,211 @@ public void testEncryptionEnabledAndDisabledFS() throws Exception { } } + /** + * Test to check if unencrypted objects are read with V1 client compatibility. + * @throws IOException + * @throws Exception + */ + @Test + public void testUnencryptedObjectReadWithV1CompatibilityConfig() throws Exception { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem nonCseFs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + nonCseFs.initialize(getFileSystem().getUri(), conf); + + Path file = path(getMethodName()); + // write unencrypted file + ContractTestUtils.writeDataset(nonCseFs, file, new byte[SMALL_FILE_SIZE], + SMALL_FILE_SIZE, SMALL_FILE_SIZE, true); + + Configuration cseConf = new Configuration(getConfiguration()); + cseConf.setBoolean(S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED, true); + // create filesystem with cse enabled and v1 compatibility. + S3AFileSystem cseFs = createTestFileSystem(cseConf); + cseFs.initialize(getFileSystem().getUri(), cseConf); + + // read unencrypted file. It should not throw any exception. + try (FSDataInputStream in = cseFs.open(file)) { + in.read(new byte[SMALL_FILE_SIZE]); + } finally { + // close the filesystem + nonCseFs.close(); + cseFs.close(); + } + } + + /** + * Test to check if file name with suffix ".instruction" is ignored with V1 compatibility. + * @throws IOException + */ + @Test + public void testSkippingCSEInstructionFileWithV1Compatibility() throws IOException { + maybeSkipTest(); + // initialize base s3 client. + Configuration conf = new Configuration(getConfiguration()); + S3AFileSystem fs = createTestFileSystem(conf); + removeBaseAndBucketOverrides(getTestBucketName(conf), + conf, + S3_ENCRYPTION_ALGORITHM, + S3_ENCRYPTION_KEY, + SERVER_SIDE_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_KEY); + fs.initialize(getFileSystem().getUri(), conf); + + // write file with suffix ".instruction" + Path filePath = path(getMethodName()); + Path file = new Path(filePath, + "file" + S3_ENCRYPTION_CSE_INSTRUCTION_FILE_SUFFIX); + ContractTestUtils.writeDataset(fs, file, new byte[SMALL_FILE_SIZE], + SMALL_FILE_SIZE, SMALL_FILE_SIZE, true); + + // create filesystem with cse enabled and v1 compatibility. + Configuration cseConf = new Configuration(getConfiguration()); + cseConf.setBoolean(S3_ENCRYPTION_CSE_V1_COMPATIBILITY_ENABLED, true); + S3AFileSystem cseFs = createTestFileSystem(cseConf); + cseFs.initialize(getFileSystem().getUri(), cseConf); + try { + // listing from fs without cse + Assertions.assertThat(fs.listStatus(filePath)).describedAs( + "Number of files aren't the same " + "as expected from FileStatus dir") + .hasSize(1); + + // listing fs without cse with v1 compatibility + Assertions.assertThat(cseFs.listStatus(filePath)).describedAs( Review Comment: verify parent dir listing also skips it -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
