Repository: hadoop
Updated Branches:
  refs/heads/trunk f4e1aa050 -> 1db4788b7


HADOOP-14598. Blacklist Http/HttpsFileSystem in FsUrlStreamHandlerFactory. 
Contributed by Steve Loughran.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1db4788b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1db4788b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1db4788b

Branch: refs/heads/trunk
Commit: 1db4788b7d22e57f91520e4a6971774ef84ffab9
Parents: f4e1aa0
Author: Haohui Mai <whe...@apache.org>
Authored: Tue Aug 8 16:27:23 2017 -0700
Committer: Haohui Mai <whe...@apache.org>
Committed: Tue Aug 8 16:33:18 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/fs/FsUrlConnection.java   | 10 ++++
 .../hadoop/fs/FsUrlStreamHandlerFactory.java    | 26 ++++++++++-
 .../apache/hadoop/fs/TestUrlStreamHandler.java  | 48 +++++++++++++++-----
 3 files changed, 72 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1db4788b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java
index 90e75b0..03c7aed 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java
@@ -23,6 +23,10 @@ import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLConnection;
 
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -33,6 +37,8 @@ import org.apache.hadoop.conf.Configuration;
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
 class FsUrlConnection extends URLConnection {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(FsUrlConnection.class);
 
   private Configuration conf;
 
@@ -40,12 +46,16 @@ class FsUrlConnection extends URLConnection {
 
   FsUrlConnection(Configuration conf, URL url) {
     super(url);
+    Preconditions.checkArgument(conf != null, "null conf argument");
+    Preconditions.checkArgument(url != null, "null url argument");
     this.conf = conf;
   }
 
   @Override
   public void connect() throws IOException {
+    Preconditions.checkState(is == null, "Already connected");
     try {
+      LOG.debug("Connecting to {}", url);
       FileSystem fs = FileSystem.get(url.toURI(), conf);
       is = fs.open(new Path(url.getPath()));
     } catch (URISyntaxException e) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1db4788b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
index 91a527d..751b955 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
@@ -22,6 +22,9 @@ import java.net.URLStreamHandlerFactory;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -41,6 +44,18 @@ import org.apache.hadoop.conf.Configuration;
 public class FsUrlStreamHandlerFactory implements
     URLStreamHandlerFactory {
 
+  private static final Logger LOG =
+      LoggerFactory.getLogger(FsUrlStreamHandlerFactory.class);
+
+  /**
+   * These are the protocols with MUST NOT be exported, as doing so
+   * would conflict with the standard URL handlers registered by
+   * the JVM. Many things will break.
+   */
+  public static final String[] UNEXPORTED_PROTOCOLS = {
+      "http", "https"
+  };
+
   // The configuration holds supported FS implementation class names.
   private Configuration conf;
 
@@ -64,14 +79,20 @@ public class FsUrlStreamHandlerFactory implements
       throw new RuntimeException(io);
     }
     this.handler = new FsUrlStreamHandler(this.conf);
+    for (String protocol : UNEXPORTED_PROTOCOLS) {
+      protocols.put(protocol, false);
+    }
   }
 
   @Override
   public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
+    LOG.debug("Creating handler for protocol {}", protocol);
     if (!protocols.containsKey(protocol)) {
       boolean known = true;
       try {
-        FileSystem.getFileSystemClass(protocol, conf);
+        Class<? extends FileSystem> impl
+            = FileSystem.getFileSystemClass(protocol, conf);
+        LOG.debug("Found implementation of {}: {}", protocol, impl);
       }
       catch (IOException ex) {
         known = false;
@@ -79,9 +100,12 @@ public class FsUrlStreamHandlerFactory implements
       protocols.put(protocol, known);
     }
     if (protocols.get(protocol)) {
+      LOG.debug("Using handler for protocol {}", protocol);
       return handler;
     } else {
       // FileSystem does not know the protocol, let the VM handle this
+      LOG.debug("Unknown protocol {}, delegating to default implementation",
+          protocol);
       return null;
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1db4788b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java
index 6fc97a2..5a04f67 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.fs;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 
 import java.io.File;
 import java.io.IOException;
@@ -32,6 +33,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.test.PathUtils;
+
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
@@ -39,8 +42,22 @@ import org.junit.Test;
  */
 public class TestUrlStreamHandler {
 
-  private static final File TEST_ROOT_DIR = 
PathUtils.getTestDir(TestUrlStreamHandler.class);
-    
+  private static final File TEST_ROOT_DIR =
+      PathUtils.getTestDir(TestUrlStreamHandler.class);
+
+  private static final FsUrlStreamHandlerFactory HANDLER_FACTORY
+      = new FsUrlStreamHandlerFactory();
+
+  @BeforeClass
+  public static void setupHandler() {
+
+    // Setup our own factory
+    // setURLStreamHandlerFactor is can be set at most once in the JVM
+    // the new URLStreamHandler is valid for all tests cases
+    // in TestStreamHandler
+    URL.setURLStreamHandlerFactory(HANDLER_FACTORY);
+  }
+
   /**
    * Test opening and reading from an InputStream through a hdfs:// URL.
    * <p>
@@ -55,15 +72,6 @@ public class TestUrlStreamHandler {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new 
MiniDFSCluster.Builder(conf).numDataNodes(2).build();
     FileSystem fs = cluster.getFileSystem();
-
-    // Setup our own factory
-    // setURLSteramHandlerFactor is can be set at most once in the JVM
-    // the new URLStreamHandler is valid for all tests cases 
-    // in TestStreamHandler
-    FsUrlStreamHandlerFactory factory =
-        new org.apache.hadoop.fs.FsUrlStreamHandlerFactory();
-    java.net.URL.setURLStreamHandlerFactory(factory);
-
     Path filePath = new Path("/thefile");
 
     try {
@@ -156,4 +164,22 @@ public class TestUrlStreamHandler {
 
   }
 
+  @Test
+  public void testHttpDefaultHandler() throws Throwable {
+    assertNull("Handler for HTTP is the Hadoop one",
+        HANDLER_FACTORY.createURLStreamHandler("http"));
+  }
+
+  @Test
+  public void testHttpsDefaultHandler() throws Throwable {
+    assertNull("Handler for HTTPS is the Hadoop one",
+        HANDLER_FACTORY.createURLStreamHandler("https"));
+  }
+
+  @Test
+  public void testUnknownProtocol() throws Throwable {
+    assertNull("Unknown protocols are not handled",
+        HANDLER_FACTORY.createURLStreamHandler("gopher"));
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to