Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 6f6a61bb5 -> 30baa089d
HADOOP-14476 make InconsistentAmazonS3Client usable in downstream tests (Aaron Fabbri) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/30baa089 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/30baa089 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/30baa089 Branch: refs/heads/HADOOP-13345 Commit: 30baa089d833dbd5dc981de06b0e9a17ead038e8 Parents: 6f6a61b Author: Aaron Fabbri <[email protected]> Authored: Wed Jun 7 13:11:15 2017 -0700 Committer: Aaron Fabbri <[email protected]> Committed: Thu Jun 8 11:09:11 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/fs/s3a/Constants.java | 16 ++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +- .../fs/s3a/InconsistentAmazonS3Client.java | 54 ++++++- .../fs/s3a/InconsistentS3ClientFactory.java | 40 +++++ .../site/markdown/tools/hadoop-aws/testing.md | 127 +++++++++++++++- .../hadoop/fs/s3a/ITestS3AInconsistency.java | 100 +++++++++++++ .../fs/s3a/ITestS3GuardListConsistency.java | 146 +++---------------- .../hadoop/fs/s3a/ITestS3GuardWriteBack.java | 141 ++++++++++++++++++ .../fs/s3a/InconsistentS3ClientFactory.java | 35 ----- 9 files changed, 495 insertions(+), 166 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 0fb1197..1a464d0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -435,4 +435,20 @@ public final class Constants { @InterfaceStability.Unstable public static final String S3GUARD_METASTORE_DYNAMO = "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore"; + + /** + * Inconsistency (visibility delay) injection settings. + */ + @InterfaceStability.Unstable + public static final String FAIL_INJECT_INCONSISTENCY_KEY = + "fs.s3a.failinject.inconsistency.key.substring"; + + @InterfaceStability.Unstable + public static final String FAIL_INJECT_INCONSISTENCY_MSEC = + "fs.s3a.failinject.inconsistency.msec"; + + @InterfaceStability.Unstable + public static final String FAIL_INJECT_INCONSISTENCY_PROBABILITY = + "fs.s3a.failinject.inconsistency.probability"; + } http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index a329580..f33b25e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -43,7 +43,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.intOption; public class DefaultS3ClientFactory extends Configured implements S3ClientFactory { - private static final Logger LOG = S3AFileSystem.LOG; + protected static final Logger LOG = S3AFileSystem.LOG; @Override public AmazonS3 createS3Client(URI name) throws IOException { http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java index 98ea16a..5b62c66 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java @@ -29,9 +29,14 @@ import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.PutObjectResult; import com.amazonaws.services.s3.model.S3ObjectSummary; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.s3a.Constants.*; + import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -43,22 +48,38 @@ import java.util.Map; * inconsistency and/or errors. Used for testing S3Guard. * Currently only delays listing visibility, not affecting GET. */ [email protected] [email protected] public class InconsistentAmazonS3Client extends AmazonS3Client { /** * Keys containing this substring will be subject to delayed visibility. */ - public static final String DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME"; + public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME"; /** * How many seconds affected keys will be delayed from appearing in listing. * This should probably be a config value. */ - public static final long DELAY_KEY_MILLIS = 5 * 1000; + public static final long DEFAULT_DELAY_KEY_MSEC = 5 * 1000; + + public static final float DEFAULT_DELAY_KEY_PROBABILITY = 1.0f; + + /** Special config value since we can't store empty strings in XML. */ + public static final String MATCH_ALL_KEYS = "*"; private static final Logger LOG = LoggerFactory.getLogger(InconsistentAmazonS3Client.class); + /** Empty string matches all keys. */ + private String delayKeySubstring; + + /** Probability to delay visibility of a matching key. */ + private float delayKeyProbability; + + /** Time in milliseconds to delay visibility of newly modified object. */ + private long delayKeyMsec; + /** * Composite of data we need to track about recently deleted objects: * when it was deleted (same was with recently put objects) and the object @@ -91,8 +112,25 @@ public class InconsistentAmazonS3Client extends AmazonS3Client { private Map<String, Long> delayedPutKeys = new HashMap<>(); public InconsistentAmazonS3Client(AWSCredentialsProvider credentials, - ClientConfiguration clientConfiguration) { + ClientConfiguration clientConfiguration, Configuration conf) { super(credentials, clientConfiguration); + setupConfig(conf); + } + + protected void setupConfig(Configuration conf) { + + delayKeySubstring = conf.get(FAIL_INJECT_INCONSISTENCY_KEY, + DEFAULT_DELAY_KEY_SUBSTRING); + // "" is a substring of all strings, use it to match all keys. + if (delayKeySubstring.equals(MATCH_ALL_KEYS)) { + delayKeySubstring = ""; + } + delayKeyProbability = conf.getFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, + DEFAULT_DELAY_KEY_PROBABILITY); + delayKeyMsec = conf.getLong(FAIL_INJECT_INCONSISTENCY_MSEC, + DEFAULT_DELAY_KEY_MSEC); + LOG.info("Enabled with {} msec delay, substring {}, probability {}", + delayKeyMsec, delayKeySubstring, delayKeyProbability); } @Override @@ -191,7 +229,7 @@ public class InconsistentAmazonS3Client extends AmazonS3Client { return false; } long currentTime = System.currentTimeMillis(); - long deadline = enqueueTime + DELAY_KEY_MILLIS; + long deadline = enqueueTime + delayKeyMsec; if (currentTime >= deadline) { delayedDeletes.remove(key); LOG.debug("no longer delaying {}", key); @@ -231,11 +269,17 @@ public class InconsistentAmazonS3Client extends AmazonS3Client { * @return true if we should delay */ private boolean shouldDelay(String key) { - boolean delay = key.contains(DELAY_KEY_SUBSTRING); + boolean delay = key.contains(delayKeySubstring); + delay = delay && trueWithProbability(delayKeyProbability); LOG.debug("{} -> {}", key, delay); return delay; } + + private boolean trueWithProbability(float p) { + return Math.random() < p; + } + /** * Record this key as something that should not become visible in * listObject replies for a while, to simulate eventual list consistency. http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java new file mode 100644 index 0000000..17d268b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.services.s3.AmazonS3; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * S3 Client factory used for testing with eventual consistency fault injection. + */ [email protected] [email protected] +public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { + + @Override + protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials, + ClientConfiguration awsConf) { + LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **"); + return new InconsistentAmazonS3Client(credentials, awsConf, getConf()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 3b83f1f..0bf2261 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -691,7 +691,7 @@ use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical. -#### Subclasses Existing Shared Base Blasses +#### Subclasses Existing Shared Base Classes Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable. These set things up for testing against the object stores, provide good threadnames, @@ -798,7 +798,7 @@ We really appreciate this — you will too. ### How to keep your credentials really safe -Although the `auth-keys.xml` file is marged as ignored in git and subversion, +Although the `auth-keys.xml` file is marked as ignored in git and subversion, it is still in your source tree, and there's always that risk that it may creep out. @@ -813,3 +813,126 @@ using an absolute XInclude reference to it. </configuration> ``` + +# Failure Injection + +**Warning do not enable any type of failure injection in production. The +following settings are for test development only.** + +## Inconsistency Injection + +One of the challenges with S3A integration tests is the fact that S3 is an +eventually-consistent storage system. In practice, we rarely see delays in +visibility of recently created objects both in listings (listStatus()) and +when getting a single file's metadata (getFileStatus()). Since this behavior +is rare and non-deterministic, thorough integration testing is challenging. + +To address this, we developed a shim layer on top of the `AmazonS3Client` +class which artificially delays certain paths from appearing in listings. +This is implemented in the class `InconsistentAmazonS3Client`. + +### Enabling the InconsistentAmazonS3CClient + +There are two ways of enabling the `InconsistentAmazonS3Client`: at +config-time, or programmatically. For an example of programmatic test usage, +see `ITestS3GuardListConsistency`. + +To enable the inconsistency injecting client via configuration, set the +following class name for the client factory configuration: + +```xml +<property> + <name>fs.s3a.s3.client.factory.impl</name> + <value>org.apache.hadoop.fs.s3a.InconsistentS3ClientFactory</value> +</property> +``` + +The inconsistent client works by: + +1. Choosing which objects will be "inconsistent" at the time the object is +created or deleted. +2. When listObjects is called, any keys that we have marked as +inconsistent above will not be returned in the results (until the +configured delay has elapsed). Similarly, deleted items may be *added* to +missing results to delay the visibility of the delete. + +There are two ways of choosing which keys (filenames) will be affected: By +substring, and by random probability. + +```xml +<property> + <name>fs.s3a.failinject.inconsistency.key.substring</name> + <value>DELAY_LISTING_ME</value> +</property> + +<property> + <name>fs.s3a.failinject.inconsistency.probability</name> + <value>1.0</value> +</property> +``` + +By default, any object which has the substring "DELAY_LISTING_ME" in its key +will subject to delayed visibility. For example, the path +`s3a://my-bucket/test/DELAY_LISTING_ME/file.txt` would match this condition. +To match all keys use the value "\*" (a single asterisk). This is a special +value: *We don't support arbitrary wildcards.* + +The default probability of delaying an object is 1.0. This means that *all* +keys that match the substring will get delayed visibility. Note that we take +the logical *and* of the two conditions (substring matches *and* probability +random chance occurs). Here are some example configurations: + +``` +| substring | probability | behavior | +|-----------|-------------|--------------------------------------------| +| | 0.001 | An empty <value> tag in .xml config will | +| | | be interpreted as unset and revert to the | +| | | default value, "DELAY_LISTING_ME" | +| | | | +| * | 0.001 | 1/1000 chance of *any* key being delayed. | +| | | | +| delay | 0.01 | 1/100 chance of any key containing "delay" | +| | | | +| delay | 1.0 | All keys containing substring "delay" .. | +``` + +You can also configure how long you want the delay in visibility to last. +The default is 5000 milliseconds (five seconds). + +```xml +<property> + <name>fs.s3a.failinject.inconsistency.msec</name> + <value>5000</value> +</property> +``` + +#### Limitations of Inconsistency Injection + +Although we can delay visibility of an object or parent directory va the +`InconsistentAmazonS3Client` we do not keep the key of that object from +appearing in all prefix searches. For example, if we create the following +object with the default configuration above, in an otherwise empty bucket: + +``` + s3a://bucket/a/b/c/DELAY_LISTING_ME +``` + +Then the following paths will still be visible as directories: + +``` + s3a://bucket/a + s3a://bucket/a/b +``` + +Whereas getFileStatus() on the following *will* be subject to delayed +visibility (FileNotFoundException until delay has elapsed): + +``` + s3a://bucket/a/b/c + s3a://bucket/a/b/c/DELAY_LISTING_ME +``` + + In real-life S3 inconsistency, however, we expect that all the above paths + (including `a` and `b`) will be subject to delayed visiblity. + + http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java new file mode 100644 index 0000000..eb4f70b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.s3a.S3AContract; +import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.Test; + +import java.io.FileNotFoundException; +import java.util.concurrent.Callable; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; +import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*; + +/** + * Tests S3A behavior under forced inconsistency via {@link + * InconsistentAmazonS3Client}. + * + * These tests are for validating expected behavior *without* S3Guard, but + * may also run with S3Guard enabled. For tests that validate S3Guard's + * consistency features, see {@link ITestS3GuardListConsistency}. + */ +public class ITestS3AInconsistency extends AbstractS3ATestBase { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class, + S3ClientFactory.class); + conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING); + conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f); + conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC); + return new S3AContract(conf); + } + + @Test + public void testGetFileStatus() throws Exception { + S3AFileSystem fs = getFileSystem(); + + // 1. Make sure no ancestor dirs exist + Path dir = path("ancestor"); + fs.delete(dir, true); + waitUntilDeleted(dir); + + // 2. Create a descendant file, which implicitly creates ancestors + // This file has delayed visibility. + touch(getFileSystem(), + path("ancestor/file-" + DEFAULT_DELAY_KEY_SUBSTRING)); + + // 3. Assert expected behavior. If S3Guard is enabled, we should be able + // to get status for ancestor. If S3Guard is *not* enabled, S3A will + // fail to infer the existence of the ancestor since visibility of the + // child file is delayed, and its key prefix search will return nothing. + try { + FileStatus status = fs.getFileStatus(dir); + if (fs.hasMetadataStore()) { + assertTrue("Ancestor is dir", status.isDirectory()); + } else { + fail("getFileStatus should fail due to delayed visibility."); + } + } catch (FileNotFoundException e) { + if (fs.hasMetadataStore()) { + fail("S3Guard failed to list parent of inconsistent child."); + } + LOG.info("File not found, as expected."); + } + } + + private void waitUntilDeleted(final Path p) throws Exception { + LambdaTestUtils.eventually(30 * 1000, 1000, + new Callable<Void>() { + @Override + public Void call() throws Exception { + assertPathDoesNotExist("Dir should be deleted", p); + return null; + } + } + ); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index 8771fd2..e06afd0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -20,32 +20,31 @@ package org.apache.hadoop.fs.s3a; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; -import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; import org.junit.Assume; import org.junit.Test; import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URI; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING; +import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*; /** * Test S3Guard list consistency feature by injecting delayed listObjects() * visibility via {@link InconsistentAmazonS3Client}. + * + * Tests here generally: + * 1. Use the inconsistency injection mentioned above. + * 2. Only run when S3Guard is enabled. */ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { @@ -53,6 +52,10 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { protected AbstractFSContract createContract(Configuration conf) { conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class, S3ClientFactory.class); + // Other configs would break test assumptions + conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING); + conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f); + conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC); return new S3AContract(conf); } @@ -75,7 +78,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { for (Path mkdir : mkdirs) { assertTrue(fs.mkdirs(mkdir)); } - Thread.sleep(InconsistentAmazonS3Client.DELAY_KEY_MILLIS); + Thread.sleep(DEFAULT_DELAY_KEY_MSEC); } assertTrue("srcdirs and dstdirs must have equal length", @@ -102,14 +105,14 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { public void testConsistentListAfterRename() throws Exception { Path[] mkdirs = { path("d1/f"), - path("d1/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING) + path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING) }; Path[] srcdirs = {path("d1")}; Path[] dstdirs = {path("d2")}; Path[] yesdirs = {path("d2"), path("d2/f"), - path("d2/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)}; + path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING)}; Path[] nodirs = {path("d1"), path("d1/f"), - path("d1/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)}; + path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)}; doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs); getFileSystem().delete(path("d1"), true); getFileSystem().delete(path("d2"), true); @@ -159,7 +162,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client Path inconsistentPath = - path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING); + path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING); Path[] testDirs = {path("a/b/dir1"), path("a/b/dir2"), @@ -168,7 +171,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { for (Path path : testDirs) { assertTrue(fs.mkdirs(path)); } - Thread.sleep(2 * InconsistentAmazonS3Client.DELAY_KEY_MILLIS); + Thread.sleep(2 * DEFAULT_DELAY_KEY_MSEC); for (Path path : testDirs) { assertTrue(fs.delete(path, false)); } @@ -199,7 +202,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client Path inconsistentPath = - path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING); + path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING); Path[] testDirs = {path("a/b/dir1"), path("a/b/dir2"), @@ -208,7 +211,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { for (Path path : testDirs) { assertTrue(fs.mkdirs(path)); } - Thread.sleep(2 * InconsistentAmazonS3Client.DELAY_KEY_MILLIS); + Thread.sleep(2 * DEFAULT_DELAY_KEY_MSEC); assertTrue(fs.delete(testDirs[1], false)); assertTrue(fs.delete(testDirs[2], false)); @@ -222,7 +225,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { assertFalse(list.contains(path("a3/b/dir2"))); // This should fail without S3Guard, and succeed with it. assertFalse(list.contains( - path("a3/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING))); + path("a3/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING))); try { RemoteIterator<LocatedFileStatus> old = fs.listFilesAndEmptyDirectories( @@ -245,7 +248,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client Path inconsistentPath = - path("a/b/dir3-" + DELAY_KEY_SUBSTRING); + path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING); Path[] testDirs = {path("a/b/dir1"), path("a/b/dir2"), @@ -308,7 +311,8 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { for (; index < normalPathNum + delayedPathNum; index++) { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client - testDirs.add(path(rootDir + "/dir-" + index + DELAY_KEY_SUBSTRING)); + testDirs.add(path(rootDir + "/dir-" + index + + DEFAULT_DELAY_KEY_SUBSTRING)); } for (Path path : testDirs) { @@ -334,8 +338,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { } /** - * Similar to {@link #testConsistentListStatus()}, this tests that the S3AFS - * listFiles() call will return consistent file list. + * Tests that the S3AFS listFiles() call will return consistent file list. */ @Test public void testConsistentListFiles() throws Exception { @@ -399,7 +402,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { for (; index < normalFileNum + delayedFileNum; index++) { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client - fileNames.add("file-" + index + "-" + DELAY_KEY_SUBSTRING); + fileNames.add("file-" + index + "-" + DEFAULT_DELAY_KEY_SUBSTRING); } int filesAndEmptyDirectories = 0; @@ -449,107 +452,4 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { } } - private static S3AFileSystem asS3AFS(FileSystem fs) { - assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem); - return (S3AFileSystem)fs; - } - - /** Create a separate S3AFileSystem instance for testing. */ - private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard, - boolean authoritativeMeta) - throws IOException { - Configuration conf; - - // Create a FileSystem that is S3-backed only - conf = createConfiguration(); - S3ATestUtils.disableFilesystemCaching(conf); - if (disableS3Guard) { - conf.set(Constants.S3_METADATA_STORE_IMPL, - Constants.S3GUARD_METASTORE_NULL); - } else { - S3ATestUtils.maybeEnableS3Guard(conf); - conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta); - } - FileSystem fs = FileSystem.get(fsURI, conf); - return asS3AFS(fs); - } - - private static void assertPathDoesntExist(FileSystem fs, Path p) - throws IOException { - try { - FileStatus s = fs.getFileStatus(p); - } catch (FileNotFoundException e) { - return; - } - fail("Path should not exist: " + p); - } - - /** - * In listStatus(), when S3Guard is enabled, the full listing for a - * directory is "written back" to the MetadataStore before the listing is - * returned. Currently this "write back" behavior occurs when - * fs.s3a.metadatastore.authoritative is true. This test validates this - * behavior. - * @throws Exception on failure - */ - @Test - public void testListStatusWriteBack() throws Exception { - Assume.assumeTrue(getFileSystem().hasMetadataStore()); - - Configuration conf; - Path directory = path("ListStatusWriteBack"); - - // "raw" S3AFileSystem without S3Guard - S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false); - - // Another with S3Guard and write-back disabled - S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false); - - // Another S3Guard and write-back enabled - S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true); - - // delete the existing directory (in case of last test failure) - noS3Guard.delete(directory, true); - // Create a directory on S3 only - noS3Guard.mkdirs(new Path(directory, "OnS3")); - // Create a directory on both S3 and metadata store - Path p = new Path(directory, "OnS3AndMS"); - assertPathDoesntExist(noWriteBack, p); - noWriteBack.mkdirs(p); - - FileStatus[] fsResults; - DirListingMetadata mdResults; - - // FS should return both even though S3Guard is not writing back to MS - fsResults = noWriteBack.listStatus(directory); - assertEquals("Filesystem enabled S3Guard without write back should have " - + "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults), - 2, fsResults.length); - - // Metadata store without write-back should still only contain /OnS3AndMS, - // because newly discovered /OnS3 is not written back to metadata store - mdResults = noWriteBack.getMetadataStore().listChildren(directory); - assertEquals("Metadata store without write back should still only know " - + "about /OnS3AndMS, but it has: " + mdResults, - 1, mdResults.numEntries()); - - // FS should return both (and will write it back) - fsResults = yesWriteBack.listStatus(directory); - assertEquals("Filesystem enabled S3Guard with write back should have " - + " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults), - 2, fsResults.length); - - // Metadata store with write-back should contain both because the newly - // discovered /OnS3 should have been written back to metadata store - mdResults = yesWriteBack.getMetadataStore().listChildren(directory); - assertEquals("Unexpected number of results from metadata store. " - + "Should have /OnS3 and /OnS3AndMS: " + mdResults, - 2, mdResults.numEntries()); - - // If we don't clean this up, the next test run will fail because it will - // have recorded /OnS3 being deleted even after it's written to noS3Guard. - getFileSystem().getMetadataStore().forgetMetadata( - new Path(directory, "OnS3")); - } - } http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java new file mode 100644 index 0000000..a63b696 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; +import org.junit.Assume; +import org.junit.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; + +/** + * Test cases that validate S3Guard's behavior for writing things like + * directory listings back to the MetadataStore. + */ +public class ITestS3GuardWriteBack extends AbstractS3ATestBase { + + /** + * In listStatus(), when S3Guard is enabled, the full listing for a + * directory is "written back" to the MetadataStore before the listing is + * returned. Currently this "write back" behavior occurs when + * fs.s3a.metadatastore.authoritative is true. This test validates this + * behavior. + * @throws Exception on failure + */ + @Test + public void testListStatusWriteBack() throws Exception { + Assume.assumeTrue(getFileSystem().hasMetadataStore()); + + Path directory = path("ListStatusWriteBack"); + + // "raw" S3AFileSystem without S3Guard + S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false); + + // Another with S3Guard and write-back disabled + S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false); + + // Another S3Guard and write-back enabled + S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true); + + // delete the existing directory (in case of last test failure) + noS3Guard.delete(directory, true); + // Create a directory on S3 only + noS3Guard.mkdirs(new Path(directory, "OnS3")); + // Create a directory on both S3 and metadata store + Path p = new Path(directory, "OnS3AndMS"); + assertPathDoesntExist(noWriteBack, p); + noWriteBack.mkdirs(p); + + FileStatus[] fsResults; + DirListingMetadata mdResults; + + // FS should return both even though S3Guard is not writing back to MS + fsResults = noWriteBack.listStatus(directory); + assertEquals("Filesystem enabled S3Guard without write back should have " + + "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults), + 2, fsResults.length); + + // Metadata store without write-back should still only contain /OnS3AndMS, + // because newly discovered /OnS3 is not written back to metadata store + mdResults = noWriteBack.getMetadataStore().listChildren(directory); + assertEquals("Metadata store without write back should still only know " + + "about /OnS3AndMS, but it has: " + mdResults, + 1, mdResults.numEntries()); + + // FS should return both (and will write it back) + fsResults = yesWriteBack.listStatus(directory); + assertEquals("Filesystem enabled S3Guard with write back should have " + + " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults), + 2, fsResults.length); + + // Metadata store with write-back should contain both because the newly + // discovered /OnS3 should have been written back to metadata store + mdResults = yesWriteBack.getMetadataStore().listChildren(directory); + assertEquals("Unexpected number of results from metadata store. " + + "Should have /OnS3 and /OnS3AndMS: " + mdResults, + 2, mdResults.numEntries()); + + // If we don't clean this up, the next test run will fail because it will + // have recorded /OnS3 being deleted even after it's written to noS3Guard. + getFileSystem().getMetadataStore().forgetMetadata( + new Path(directory, "OnS3")); + } + + /** Create a separate S3AFileSystem instance for testing. */ + private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard, + boolean authoritativeMeta) throws IOException { + Configuration conf; + + // Create a FileSystem that is S3-backed only + conf = createConfiguration(); + S3ATestUtils.disableFilesystemCaching(conf); + if (disableS3Guard) { + conf.set(Constants.S3_METADATA_STORE_IMPL, + Constants.S3GUARD_METASTORE_NULL); + } else { + S3ATestUtils.maybeEnableS3Guard(conf); + conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta); + } + FileSystem fs = FileSystem.get(fsURI, conf); + return asS3AFS(fs); + } + + private static S3AFileSystem asS3AFS(FileSystem fs) { + assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem); + return (S3AFileSystem)fs; + } + + private static void assertPathDoesntExist(FileSystem fs, Path p) + throws IOException { + try { + FileStatus s = fs.getFileStatus(p); + } catch (FileNotFoundException e) { + return; + } + fail("Path should not exist: " + p); + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java deleted file mode 100644 index 88a9c78..0000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.s3.AmazonS3; - -/** - * S3 Client factory used for testing with eventual consistency fault injection. - */ -public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { - - @Override - protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials, - ClientConfiguration awsConf) { - return new InconsistentAmazonS3Client(credentials, awsConf); - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
