better comments for win workaround and basic sanity checks for winutils.exe
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/460d21cb Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/460d21cb Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/460d21cb Branch: refs/heads/BEAM-357_windows-build-fails Commit: 460d21cb7070603f789da9d13e12668194c91e9b Parents: 4188330 Author: Romain manni-Bucau <rmannibu...@gmail.com> Authored: Tue Jun 21 10:37:05 2016 +0200 Committer: Romain manni-Bucau <rmannibu...@gmail.com> Committed: Tue Jun 21 10:37:05 2016 +0200 ---------------------------------------------------------------------- .../beam/runners/flink/WriteSinkITCase.java | 2 +- .../beam/sdk/testing/HadoopWorkarounds.java | 109 +++++++++++++++++-- sdks/java/io/hdfs/pom.xml | 9 -- sdks/java/maven-archetypes/starter/pom.xml | 6 +- 4 files changed, 104 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java ---------------------------------------------------------------------- diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java index 1a56350..bb3778d 100644 --- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java +++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java @@ -54,7 +54,7 @@ public class WriteSinkITCase extends JavaProgramTestBase { @Override protected void preSubmit() throws Exception { - resultPath = getTempDirPath("result"); + resultPath = getTempDirPath("result-" + System.nanoTime()); } @Override http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java index ee2e135..1c2aa20 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java @@ -17,6 +17,8 @@ */ package org.apache.beam.sdk.testing; +import static java.util.Arrays.asList; + import org.apache.commons.compress.utils.IOUtils; import java.io.File; @@ -26,15 +28,21 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; +import java.util.Arrays; /** * A simple class ensure winutils.exe can be found in the JVM. + * <p> + * See http://wiki.apache.org/hadoop/WindowsProblems for details. + * <p> + * Note: don't forget to add org.bouncycastle:bcpg-jdk16 dependency to use it. */ public class HadoopWorkarounds { /** * In practise this method only needs to be called once by JVM * since hadoop uses static variables to store it. - * + * <p> * Note: ensure invocation is done before hadoop reads it * and ensure this folder survives tests * (avoid temporary folder usage since tests can share it). @@ -51,6 +59,8 @@ public class HadoopWorkarounds { // hadoop doesn't have winutils.exe :(: https://issues.apache.org/jira/browse/HADOOP-10051 // so use this github repo temporarly then just use the main tar.gz /* + // note this commented code requires commons-compress dependency (to add if we use that) + String hadoopVersion = VersionInfo.getVersion(); final URL url = new URL("https://archive.apache.org/dist/hadoop/common/ hadoop-" + hadoopVersion + "/hadoop-" + hadoopVersion + ".tar.gz"); @@ -97,19 +107,49 @@ public class HadoopWorkarounds { + "-Dhadoop.home.dir so we'll download winutils.exe"); new File(hadoopHome, "bin").mkdirs(); - final URL url; - try { - url = new URL("https://github.com/steveloughran/winutils/" - + "raw/master/hadoop-2.7.1/bin/winutils.exe"); - } catch (final MalformedURLException e) { // unlikely - throw new IllegalArgumentException(e); + final File winutils = new File(hadoopHome, "bin/winutils.exe"); + + for (final String suffix : asList("", ".asc")) { + final URL url; + try { + // this code is not a random URL - read HADOOP-10051 + // it is provided and signed with an ASF gpg key. + + // note: 2.6.3 cause 2.6.4, 2.7.1 don't have .asc + url = new URL("https://github.com/steveloughran/winutils/" + + "raw/master/hadoop-2.6.3/bin/winutils.exe" + suffix); + } catch (final MalformedURLException e) { // unlikely + throw new IllegalArgumentException(e); + } + + // download winutils.exe + try { + try (final InputStream is = url.openStream(); + final OutputStream os = new FileOutputStream( + new File(hadoopHome, "bin/winutils.exe" + suffix))) { + try { + IOUtils.copy(is, os, 1024 * 1024); + } catch (final IOException e) { + throw new IllegalStateException(e); + } + } + } catch (final IOException e) { + throw new IllegalStateException(e); + } } + + // get the gpg key which is supposed to have signed the winutils.exe + final File gpg = new File(hadoopHome, "bin/gpg"); try { - try (final InputStream is = url.openStream(); - final OutputStream os = new FileOutputStream( - new File(hadoopHome, "bin/winutils.exe"))) { + /* + key is https://github.com/steveloughran/winutils/blob/master/KEYS + bu we trust the ASF not github so use the one we trust. + */ + final URL gpgUrl = new URL("http://home.apache.org/keys/committer/stevel"); + try (final InputStream is = gpgUrl.openStream(); + final OutputStream os = new FileOutputStream(gpg)) { try { - IOUtils.copy(is, os, 1024 * 1024); + IOUtils.copy(is, os); } catch (final IOException e) { throw new IllegalStateException(e); } @@ -117,9 +157,56 @@ public class HadoopWorkarounds { } catch (final IOException e) { throw new IllegalStateException(e); } + + final File ascFile = new File(winutils.getParentFile(), winutils.getName() + ".asc"); + try { + sanityCheck(winutils, ascFile, gpg); + } catch (IOException e) { + throw new IllegalStateException("Invalid download"); + } + System.setProperty("hadoop.home.dir", hadoopHome.getAbsolutePath()); } + // TODO: replace with gpg --verify? + // for now it is just some basic sanity checks to ensure we use the files we think + private static void sanityCheck( + final File winutils, final File ascFile, final File gpg) + throws IOException { + + final byte[] asc = Files.readAllBytes(ascFile.toPath()); + final byte[] expectedAsc = ("-----BEGIN PGP SIGNATURE-----\n" + + "Comment: GPGTools - https://gpgtools.org\n" + + "\n" + + "iQIcBAABCgAGBQJWeb5GAAoJEKkkVPkXR4a0qUgP/1u1Z5vV+IvU/8w79HIYX56+\n" + + "FHMRGxM5953dggqjhGSBtfx62YA8oxhDP+8qLpQWtfjTC3//CW1Oz5hrkL0m+Am5\n" + + "Kf+qiINDLqX3Fsc4wHQvnLMt2pJPmm4K9FtpkedCdAchLOiM6Wr7WtGiWYQAdUh0\n" + + "5FjUZLLVx95Kj3cTY+1B/BL+z/hB63Ry2AC29oZG4fCuAH1nTZjhH3vBD1/kzS+E\n" + + "LEKHrGh/pP6ADgg9AfJvVmRhidlCVi21ZfwWHAaitwDTMFvtFSGq03A3F6Xn2iyQ\n" + + "3H6RcZ8dqEbtUEa1jOh1xNGzqP4oipWe0KQJ/Lx2eiSh8te73k/Pfw1Ta9CuHXqk\n" + + "n8ko7cBc/pUm7nXbfjiURtWFJ4corT4oahJQna+GgvYR4BrYVLlSGb5VijTkzb7i\n" + + "0XU40BM5sOcDS/I0lkvqKP0mSi+mMJXbm10y0jw2S7KR7KeHLwzybsjco05DfWUD\n" + + "fSaCHK726g5SLsWJvZaurwna7+Mepzmo1HpAVy6nAuiAa2OQVIioNyFanIbuhbM3\n" + + "7PXBDWbfPOgr1WbYW4TASoepvsuJsAahYf2SlGagByOiDNliDHJi1z+ArfWsCFFh\n" + + "fAMMzPLKJwkmKPahyej3MrcywtntX68D7R8wTCAaj3xCxJsvX4IRv6YRk1+hQ2je\n" + + "EXQFW2c8nTI6XqtFpsbw\n" + + "=42+k\n" + + "-----END PGP SIGNATURE-----\n").getBytes("UTF-8"); + if (!Arrays.equals(asc, expectedAsc)) { + throw new IllegalArgumentException( + "Invalid asc file, did the repo get corrupted?"); + } + + final byte[] exe = Files.readAllBytes(winutils.toPath()); + if (exe.length != 108032 || exe[0] != 77 + || exe[exe.length - 1] != 0 || exe[exe.length / 3] != -127) { + throw new IllegalArgumentException( + "Invalid winutils.exe file, did the repo get corrupted?"); + } + + // for now we ignore gpg cause it is useless until we can use gpg tools + } + /** * Just a convenient win(File) invocation for tests. */ http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/io/hdfs/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/java/io/hdfs/pom.xml b/sdks/java/io/hdfs/pom.xml index f8e3c14..9c30792 100644 --- a/sdks/java/io/hdfs/pom.xml +++ b/sdks/java/io/hdfs/pom.xml @@ -83,14 +83,5 @@ <artifactId>junit</artifactId> <scope>test</scope> </dependency> - - <!-- see HDFSFileSourceTest commented block - <dependency> - <groupId>org.apache.commons</groupId> - <artifactId>commons-compress</artifactId> - <version>1.9</version> - <scope>test</scope> - </dependency> - --> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/maven-archetypes/starter/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml index 9fb21e9..3d8267e 100644 --- a/sdks/java/maven-archetypes/starter/pom.xml +++ b/sdks/java/maven-archetypes/starter/pom.xml @@ -61,7 +61,11 @@ <goal>integration-test</goal> </goals> <configuration> - <ignoreEOLStyle>true</ignoreEOLStyle> <!-- for win --> + <!-- + For windows since project files use \n and win uses during the generation \r\n. + Since it doesn't change the validity of the generated files (java, xml) we are fine doing it. + --> + <ignoreEOLStyle>true</ignoreEOLStyle> </configuration> </execution> </executions>