[BEAM-2277] Add ResourceIdTester and test existing ResourceId implementations
A first cut at some of the parts of the ResourceId spec. Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a6a5ff7b Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a6a5ff7b Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a6a5ff7b Branch: refs/heads/master Commit: a6a5ff7be387ef295fc7f921de36a3ea77327bc1 Parents: fbb0de1 Author: Dan Halperin <[email protected]> Authored: Fri May 12 09:20:34 2017 -0700 Committer: Dan Halperin <[email protected]> Committed: Fri May 12 14:59:10 2017 -0700 ---------------------------------------------------------------------- .../apache/beam/sdk/io/LocalResourceIdTest.java | 6 + .../apache/beam/sdk/io/fs/ResourceIdTester.java | 151 +++++++++++++++++++ .../google-cloud-platform-core/pom.xml | 6 + .../gcp/storage/GcsResourceIdTest.java | 9 ++ sdks/java/io/hadoop-file-system/pom.xml | 13 ++ .../beam/sdk/io/hdfs/HadoopResourceIdTest.java | 63 ++++++++ 6 files changed, 248 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java index 7ea85cf..e1ca303 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java @@ -31,6 +31,7 @@ import java.io.File; import java.nio.file.Paths; import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.io.fs.ResourceIdTester; import org.apache.commons.lang3.SystemUtils; import org.junit.Rule; import org.junit.Test; @@ -259,6 +260,11 @@ public class LocalResourceIdTest { "xyz.txt"); } + @Test + public void testResourceIdTester() throws Exception { + ResourceIdTester.runResourceIdBattery(toResourceIdentifier("/tmp/foo/")); + } + private LocalResourceId toResourceIdentifier(String str) throws Exception { boolean isDirectory; if (SystemUtils.IS_OS_WINDOWS) { http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java new file mode 100644 index 0000000..fe50ada --- /dev/null +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.fs; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_DIRECTORY; +import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_FILE; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.testing.EqualsTester; +import java.util.ArrayList; +import java.util.List; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.apache.beam.sdk.io.FileSystems; + +/** + * A utility to test {@link ResourceId} implementations. + */ +@Experimental(Kind.FILESYSTEM) +public final class ResourceIdTester { + /** + * Enforces that the {@link ResourceId} implementation of {@code baseDirectory} meets the + * {@link ResourceId} spec. + */ + public static void runResourceIdBattery(ResourceId baseDirectory) { + checkArgument( + baseDirectory.isDirectory(), "baseDirectory %s is not a directory", baseDirectory); + + List<ResourceId> allResourceIds = new ArrayList<>(); + allResourceIds.add(baseDirectory); + + // Validate that individual resources meet the fairly restrictive spec we have. + validateResourceIds(allResourceIds); + + // Validate operations with resolving child resources. + validateResolvingIds(baseDirectory, allResourceIds); + + // Validate safeguards against resolving bad paths. + validateFailureResolvingIds(baseDirectory); + } + + private static void validateResolvingIds( + ResourceId baseDirectory, List<ResourceId> allResourceIds) { + ResourceId file1 = baseDirectory.resolve("child1", RESOLVE_FILE); + ResourceId file2 = baseDirectory.resolve("child2", RESOLVE_FILE); + ResourceId file2a = baseDirectory.resolve("child2", RESOLVE_FILE); + allResourceIds.add(file1); + allResourceIds.add(file2); + assertFalse("Resolved file isDirectory()", file1.isDirectory()); + assertFalse("Resolved file isDirectory()", file2.isDirectory()); + assertFalse("Resolved file isDirectory()", file2a.isDirectory()); + + ResourceId dir1 = baseDirectory.resolve("child1", RESOLVE_DIRECTORY); + ResourceId dir2 = baseDirectory.resolve("child2", RESOLVE_DIRECTORY); + ResourceId dir2a = baseDirectory.resolve("child2", RESOLVE_DIRECTORY); + assertTrue("Resolved directory isDirectory()", dir1.isDirectory()); + assertTrue("Resolved directory isDirectory()", dir2.isDirectory()); + assertTrue("Resolved directory isDirectory()", dir2a.isDirectory()); + allResourceIds.add(dir1); + allResourceIds.add(dir2); + + // ResourceIds in equality groups. + new EqualsTester() + .addEqualityGroup(file1) + .addEqualityGroup(file2, file2a) + .addEqualityGroup(dir1, dir1.getCurrentDirectory()) + .addEqualityGroup(dir2, dir2a, dir2.getCurrentDirectory()) + .addEqualityGroup(baseDirectory, file1.getCurrentDirectory(), file2.getCurrentDirectory()) + .testEquals(); + + // ResourceId toString() in equality groups. + new EqualsTester() + .addEqualityGroup(file1.toString()) + .addEqualityGroup(file2.toString(), file2a.toString()) + .addEqualityGroup(dir1.toString(), dir1.getCurrentDirectory().toString()) + .addEqualityGroup(dir2.toString(), dir2a.toString(), dir2.getCurrentDirectory().toString()) + .addEqualityGroup( + baseDirectory.toString(), + file1.getCurrentDirectory().toString(), + file2.getCurrentDirectory().toString()) + .testEquals(); + + // TODO: test resolving strings that need to be escaped. + // Possible spec: https://tools.ietf.org/html/rfc3986#section-2 + // May need options to be filesystem-independent, e.g., if filesystems ban certain chars. + } + + private static void validateFailureResolvingIds(ResourceId baseDirectory) { + try { + ResourceId badFile = baseDirectory.resolve("file/", RESOLVE_FILE); + fail(String.format("Resolving badFile %s should have failed", badFile)); + } catch (Throwable t) { + // expected + } + + ResourceId file = baseDirectory.resolve("file", RESOLVE_FILE); + try { + baseDirectory.resolve("file2", RESOLVE_FILE); + fail(String.format("Should not be able to resolve against file resource %s", file)); + } catch (Throwable t) { + // expected + } + } + + private static void validateResourceIds(List<ResourceId> resourceIds) { + for (ResourceId resourceId : resourceIds) { + // ResourceIds should equal themselves. + assertThat("ResourceId equal to itself", resourceId, equalTo(resourceId)); + + // ResourceIds should be clonable via FileSystems#matchNewResource. + ResourceId cloned; + if (resourceId.isDirectory()) { + cloned = FileSystems.matchNewResource(resourceId.toString(), true /* isDirectory */); + } else { + cloned = FileSystems.matchNewResource(resourceId.toString(), false /* isDirectory */); + } + assertThat( + "ResourceId equals clone of itself", cloned, equalTo(resourceId)); + // .. and clones have consistent toString. + assertThat( + "ResourceId toString consistency", cloned.toString(), equalTo(resourceId.toString())); + // .. and have consistent isDirectory. + assertThat( + "ResourceId isDirectory consistency", + cloned.isDirectory(), + equalTo(resourceId.isDirectory())); + } + } + + private ResourceIdTester() {} // prevent instantiation +} http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/extensions/google-cloud-platform-core/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/java/extensions/google-cloud-platform-core/pom.xml b/sdks/java/extensions/google-cloud-platform-core/pom.xml index a1baea1..e4e951b 100644 --- a/sdks/java/extensions/google-cloud-platform-core/pom.xml +++ b/sdks/java/extensions/google-cloud-platform-core/pom.xml @@ -152,6 +152,12 @@ <!-- test dependencies --> <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava-testlib</artifactId> + <scope>test</scope> + </dependency> + + <dependency> <groupId>org.apache.beam</groupId> <artifactId>beam-sdks-java-core</artifactId> <classifier>tests</classifier> http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java ---------------------------------------------------------------------- diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java index b245610..2a67501 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java @@ -22,8 +22,11 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; +import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.io.fs.ResourceIdTester; +import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.util.gcsfs.GcsPath; import org.junit.Rule; import org.junit.Test; @@ -163,6 +166,12 @@ public class GcsResourceIdTest { "xyz.txt"); } + @Test + public void testResourceIdTester() throws Exception { + FileSystems.setDefaultConfigInWorkers(TestPipeline.testingPipelineOptions()); + ResourceIdTester.runResourceIdBattery(toResourceIdentifier("gs://bucket/foo/")); + } + private GcsResourceId toResourceIdentifier(String str) throws Exception { return GcsResourceId.fromGcsPath(GcsPath.fromUri(str)); } http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/io/hadoop-file-system/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/java/io/hadoop-file-system/pom.xml b/sdks/java/io/hadoop-file-system/pom.xml index 423237b..db5a1db 100644 --- a/sdks/java/io/hadoop-file-system/pom.xml +++ b/sdks/java/io/hadoop-file-system/pom.xml @@ -157,6 +157,19 @@ </dependency> <dependency> + <groupId>org.apache.beam</groupId> + <artifactId>beam-sdks-java-core</artifactId> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava-testlib</artifactId> + <scope>test</scope> + </dependency> + + <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-all</artifactId> <scope>test</scope> http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java ---------------------------------------------------------------------- diff --git a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java new file mode 100644 index 0000000..b0d821b --- /dev/null +++ b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.hdfs; + +import java.net.URI; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.fs.ResourceIdTester; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +/** + * Tests for {@link HadoopResourceId}. + */ +public class HadoopResourceIdTest { + private Configuration configuration; + private MiniDFSCluster hdfsCluster; + private URI hdfsClusterBaseUri; + private HadoopFileSystem fileSystem; + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + configuration = new Configuration(); + configuration.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tmpFolder.getRoot().getAbsolutePath()); + MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(configuration); + hdfsCluster = builder.build(); + hdfsClusterBaseUri = new URI(configuration.get("fs.defaultFS") + "/"); + fileSystem = new HadoopFileSystem(configuration); + } + + @After + public void tearDown() throws Exception { + hdfsCluster.shutdown(); + } + + @Test + public void testResourceIdTester() throws Exception { + FileSystems.setDefaultConfigInWorkers(TestPipeline.testingPipelineOptions()); + ResourceIdTester.runResourceIdBattery(new HadoopResourceId(hdfsClusterBaseUri)); + } +}
