MAPREDUCE-7030. Uploader tool should ignore symlinks to the same directory 
(miklos.szeg...@cloudera.com via rkanter)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e404650f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e404650f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e404650f

Branch: refs/heads/YARN-7402
Commit: e404650f489727d2df9a8813fddc4e0d682fbbee
Parents: c05b847
Author: Robert Kanter <rkan...@apache.org>
Authored: Fri Jan 12 14:18:01 2018 -0800
Committer: Robert Kanter <rkan...@apache.org>
Committed: Fri Jan 12 14:18:01 2018 -0800

----------------------------------------------------------------------
 .../mapred/uploader/FrameworkUploader.java      | 50 +++++++++++++++++-
 .../mapred/uploader/TestFrameworkUploader.java  | 53 ++++++++++++++++++++
 2 files changed, 101 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/e404650f/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java
index a374262..899689d 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java
@@ -40,6 +40,9 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.NotLinkException;
+import java.nio.file.Paths;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
@@ -71,6 +74,7 @@ public class FrameworkUploader implements Runnable {
   String target = null;
   @VisibleForTesting
   short replication = 10;
+  private boolean ignoreSymlink = false;
 
   @VisibleForTesting
   Set<String> filteredInputFiles = new HashSet<>();
@@ -79,8 +83,7 @@ public class FrameworkUploader implements Runnable {
   @VisibleForTesting
   List<Pattern> blacklistedFiles = new LinkedList<>();
 
-  @VisibleForTesting
-  OutputStream targetStream = null;
+  private OutputStream targetStream = null;
   private String alias = null;
 
   private void printHelp(Options options) {
@@ -284,6 +287,9 @@ public class FrameworkUploader implements Runnable {
         break;
       }
     }
+    if (ignoreSymlink && !excluded) {
+      excluded = checkSymlink(jar);
+    }
     if (found && !excluded) {
       LOG.info("Whitelisted " + jar.getAbsolutePath());
       if (!filteredInputFiles.add(jar.getAbsolutePath())) {
@@ -299,6 +305,40 @@ public class FrameworkUploader implements Runnable {
     }
   }
 
+  /**
+   * Check if the file is a symlink to the same directory.
+   * @param jar The file to check
+   * @return true, to ignore the directory
+   */
+  @VisibleForTesting
+  boolean checkSymlink(File jar) {
+    if (Files.isSymbolicLink(jar.toPath())) {
+      try {
+        java.nio.file.Path link = Files.readSymbolicLink(jar.toPath());
+        java.nio.file.Path jarPath = Paths.get(jar.getAbsolutePath());
+        String linkString = link.toString();
+        java.nio.file.Path jarParent = jarPath.getParent();
+        java.nio.file.Path linkPath =
+            jarParent == null ? null : jarParent.resolve(linkString);
+        java.nio.file.Path linkPathParent =
+            linkPath == null ? null : linkPath.getParent();
+        java.nio.file.Path normalizedLinkPath =
+            linkPathParent == null ? null : linkPathParent.normalize();
+        if (normalizedLinkPath != null && jarParent.equals(
+            normalizedLinkPath)) {
+          LOG.info(String.format("Ignoring same directory link %s to %s",
+              jarPath.toString(), link.toString()));
+          return true;
+        }
+      } catch (NotLinkException ex) {
+        LOG.debug("Not a link", jar);
+      } catch (IOException ex) {
+        LOG.warn("Cannot read symbolic link on", jar);
+      }
+    }
+    return false;
+  }
+
   private void validateTargetPath() throws UploaderException {
     if (!target.startsWith("hdfs:/") &&
         !target.startsWith("file:/")) {
@@ -340,6 +380,9 @@ public class FrameworkUploader implements Runnable {
         .withDescription(
             "Desired replication count")
         .hasArg().create("replication"));
+    opts.addOption(OptionBuilder
+        .withDescription("Ignore symlinks into the same directory")
+        .create("nosymlink"));
     GenericOptionsParser parser = new GenericOptionsParser(opts, args);
     if (parser.getCommandLine().hasOption("help") ||
         parser.getCommandLine().hasOption("h")) {
@@ -354,6 +397,9 @@ public class FrameworkUploader implements Runnable {
         "blacklist", DefaultJars.DEFAULT_EXCLUDED_MR_JARS);
     replication = Short.parseShort(parser.getCommandLine().getOptionValue(
         "replication", "10"));
+    if (parser.getCommandLine().hasOption("nosymlink")) {
+      ignoreSymlink = true;
+    }
     String fs = parser.getCommandLine()
         .getOptionValue("fs", null);
     if (fs == null) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e404650f/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java
index f3e4fc5..ef64bfe 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java
@@ -18,10 +18,13 @@
 
 package org.apache.hadoop.mapred.uploader;
 
+import com.google.common.collect.Lists;
 import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -32,6 +35,8 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -321,4 +326,52 @@ public class TestFrameworkUploader {
 
   }
 
+  /**
+   * Test native IO.
+   */
+  @Test
+  public void testNativeIO() throws IOException {
+    FrameworkUploader uploader = new FrameworkUploader();
+    File parent = new File(testDir);
+    try {
+      // Create a parent directory
+      parent.deleteOnExit();
+      Assert.assertTrue(parent.mkdirs());
+
+      // Create a target file
+      File targetFile = new File(parent, "a.txt");
+      try(FileOutputStream os = new FileOutputStream(targetFile)) {
+        IOUtils.writeLines(Lists.newArrayList("a", "b"), null, os);
+      }
+      Assert.assertFalse(uploader.checkSymlink(targetFile));
+
+      // Create a symlink to the target
+      File symlinkToTarget = new File(parent, "symlinkToTarget.txt");
+      try {
+        Files.createSymbolicLink(
+            Paths.get(symlinkToTarget.getAbsolutePath()),
+            Paths.get(targetFile.getAbsolutePath()));
+      } catch (UnsupportedOperationException e) {
+        // Symlinks are not supported, so ignore the test
+        Assume.assumeTrue(false);
+      }
+      Assert.assertTrue(uploader.checkSymlink(symlinkToTarget));
+
+      // Create a symlink outside the current directory
+      File symlinkOutside = new File(parent, "symlinkToParent.txt");
+      try {
+        Files.createSymbolicLink(
+            Paths.get(symlinkOutside.getAbsolutePath()),
+            Paths.get(parent.getAbsolutePath()));
+      } catch (UnsupportedOperationException e) {
+        // Symlinks are not supported, so ignore the test
+        Assume.assumeTrue(false);
+      }
+      Assert.assertFalse(uploader.checkSymlink(symlinkOutside));
+    } finally {
+      FileUtils.deleteDirectory(parent);
+    }
+
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to