This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e969eaec NUTCH-2930 Protocol-okhttp: implement IP filter (#736)
7e969eaec is described below

commit 7e969eaec1ab8e9e21667faf6cf1881fb10cfb31
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Fri Aug 19 15:26:07 2022 +0200

    NUTCH-2930 Protocol-okhttp: implement IP filter (#736)
    
    - add include/exclude rules as list of IP address, CIDR notation
      or predefined IP ranges (localhost, loopback, sitelocal)
---
 conf/nutch-default.xml                             |  25 +++
 .../org/apache/nutch/protocol/okhttp/CIDR.java     |  79 ++++++++
 .../nutch/protocol/okhttp/IPFilterRules.java       | 129 +++++++++++++
 .../org/apache/nutch/protocol/okhttp/OkHttp.java   |  35 ++++
 .../protocol/okhttp/TestBadServerResponses.java    |   2 +-
 .../protocol/okhttp/TestIPAddressFiltering.java    | 207 +++++++++++++++++++++
 .../nutch/protocol/okhttp/TestProtocolOkHttp.java  |   2 +-
 .../protocol/AbstractHttpProtocolPluginTest.java   |  22 ++-
 8 files changed, 494 insertions(+), 7 deletions(-)

diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 1ad02a021..2a6325884 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -449,6 +449,31 @@
   </description>
 </property>
 
+<property>
+  <name>http.filter.ipaddress.include</name>
+  <value></value>
+  <description>
+    If not empty: only fetch content from these IP addresses defined
+    as a comma-separated list of a single IP address, a CIDR notation,
+    or one of the following pre-defined IP address types: localhost,
+    loopback, sitelocal. The property http.filter.ipaddress.exclude
+    can be used to block subranges in the included list of ranges.
+    Note: supported only by protocol-okhttp.
+  </description>
+</property>
+
+<property>
+  <name>http.filter.ipaddress.exclude</name>
+  <value></value>
+  <description>
+    If not empty: do not fetch content from these IP addresses defined
+    as a comma-separated list of a single IP address, a CIDR notation,
+    or one of the following pre-defined IP address types: localhost,
+    loopback, sitelocal. Note: supported only by protocol-okhttp.
+  </description>
+</property>
+
+
 <!-- FTP properties -->
 
 <property>
diff --git 
a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/CIDR.java
 
b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/CIDR.java
new file mode 100644
index 000000000..3add082a8
--- /dev/null
+++ 
b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/CIDR.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.okhttp;
+
+import java.net.InetAddress;
+
+import com.google.common.net.InetAddresses;
+
+/**
+ * Parse a <a href=
+ * "https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing";>CIDR</a> 
block
+ * notation and test whether an IP address is contained in the subnet range
+ * defined by the CIDR.
+ */
+public class CIDR {
+  InetAddress addr;
+  int mask;
+
+  public CIDR(InetAddress address, int mask) {
+    this.addr = address;
+    this.mask = mask;
+  }
+
+  public CIDR(String cidr) throws IllegalArgumentException {
+    String ipStr = cidr;
+    int sep = cidr.indexOf('/');
+    if (sep > -1) {
+      ipStr = cidr.substring(0, sep);
+    }
+    addr = InetAddresses.forString(ipStr);
+    if (sep > -1) {
+      mask = Integer.parseInt(cidr.substring(sep + 1));
+    } else {
+      mask = addr.getAddress().length * 8;
+    }
+    if (cidr.indexOf(':') > -1 && addr.getAddress().length == 4) {
+      // IPv4-mapped IPv6 addresses are automatically converted to IPv4,
+      // need to shift the mask
+      mask = Math.max(0, mask - 96);
+    }
+  }
+
+  public boolean contains(InetAddress address) {
+    byte[] addr0 = addr.getAddress();
+    byte[] addr1 = address.getAddress();
+    if (addr0.length != addr1.length) {
+      // not comparing IPv4 and IPv6 addresses
+      return false;
+    }
+    for (int i = 0; i < addr0.length; i++) {
+      int remainingMaskBits = mask - (i * 8);
+      if (remainingMaskBits <= 0)
+        return true;
+      int m = ~(0xff >> remainingMaskBits); // mask for byte under cursor
+      if ((addr0[i] & m) != (addr1[i] & m))
+        return false;
+    }
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return addr + "/" + mask;
+  }
+}
diff --git 
a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/IPFilterRules.java
 
b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/IPFilterRules.java
new file mode 100644
index 000000000..868732fe5
--- /dev/null
+++ 
b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/IPFilterRules.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.okhttp;
+
+import java.lang.invoke.MethodHandles;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Predicate;
+
+import org.apache.hadoop.conf.Configuration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Optionally limit or block connections to IP address ranges
+ * (localhost/loopback or site-local addresses, subnet ranges given in CIDR
+ * notation, or single IP addresses).
+ * 
+ * IP filter rules are built from two Nutch properties:
+ * <ul>
+ * <li><code>http.filter.ipaddress.include</code> defines all allowed IP 
ranges.
+ * If not defined or empty all IP addresses (and not explicitly excluded) are
+ * allowed.
+ * <li><code>http.filter.ipaddress.exclude</code> defines excluded IP address
+ * ranges.
+ * </ul>
+ *
+ * IP ranges can be defined as
+ * <ul>
+ * <li>IP address, e.g. <code>127.0.0.1</code> or <code>::1</code> (IPv6)</li>
+ * <li>CIDR notation, e.g. <code>192.168.0.0/16</code> or
+ * <code>fd00::/8</code></li>
+ * <li><code>localhost</code> or <code>loopback</code> applies to all IP
+ * addresses for which {@link InetAddress#isLoopbackAddress()} is true</li>
+ * <li><code>sitelocal</code> applies to all IP
+ * addresses for which {@link InetAddress#isSiteLocalAddress()} is true</li>
+ * </ul>
+ *
+ * Multiple IP ranges are separated by a comma, e.g. 
<code>loopback,sitelocal,fd00::/8</code>
+ *
+ */
+public class IPFilterRules {
+
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(MethodHandles.lookup().lookupClass());
+
+  List<Predicate<InetAddress>> includeRules;
+  List<Predicate<InetAddress>> excludeRules;
+
+  public IPFilterRules(Configuration conf) {
+    includeRules = parseIPRules(conf, "http.filter.ipaddress.include");
+    excludeRules = parseIPRules(conf, "http.filter.ipaddress.exclude");
+  }
+
+  public boolean isEmpty() {
+    return !(includeRules.size() > 0 || excludeRules.size() > 0);
+  }
+
+  public boolean accept(InetAddress address) {
+    boolean accept = true;
+    if (includeRules.size() > 0) {
+      accept = false;
+      for (Predicate<InetAddress> rule : includeRules) {
+        if (rule.test(address)) {
+          accept = true;
+          break;
+        }
+      }
+    }
+    if (accept && excludeRules.size() > 0) {
+      for (Predicate<InetAddress> rule : excludeRules) {
+        if (rule.test(address)) {
+          accept = false;
+          break;
+        }
+      }
+    }
+    return accept;
+  }
+
+  private static List<Predicate<InetAddress>> parseIPRules(Configuration conf,
+      String ipRuleProperty) {
+    List<Predicate<InetAddress>> rules = new ArrayList<>();
+    String[] ipRules = conf.getStrings(ipRuleProperty);
+    if (ipRules == null) {
+      return rules;
+    }
+    for (String ipRule : ipRules) {
+      switch (ipRule.toLowerCase()) {
+      case "localhost":
+      case "loopback":
+        rules.add((InetAddress a) -> a.isLoopbackAddress());
+        break;
+      case "sitelocal":
+        rules.add((InetAddress a) -> a.isSiteLocalAddress());
+        break;
+      default:
+        try {
+          CIDR cidr = new CIDR(ipRule);
+          rules.add((InetAddress a) -> cidr.contains(a));
+        } catch (IllegalArgumentException e) {
+          LOG.error(
+              "Failed to parse {} as CIDR, ignoring to configure IP rules 
({})",
+              ipRule, ipRuleProperty);
+        }
+      }
+    }
+    if (rules.size() > 0) {
+      LOG.info("Found {} IP filter rules for {}", rules.size(), 
ipRuleProperty);
+    }
+    return rules;
+  }
+
+}
diff --git 
a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
 
b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
index 63fa32837..876c4ef24 100644
--- 
a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
+++ 
b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
@@ -212,6 +212,11 @@ public class OkHttp extends HttpBase {
       }
     }
 
+    IPFilterRules ipFilterRules = new IPFilterRules(conf);
+    if (!ipFilterRules.isEmpty()) {
+      builder.addNetworkInterceptor(new 
HTTPFilterIPAddressInterceptor(ipFilterRules));
+    }
+
     if (this.storeIPAddress || this.storeHttpHeaders || this.storeHttpRequest) 
{
       builder.addNetworkInterceptor(new HTTPHeadersInterceptor());
     }
@@ -259,6 +264,36 @@ public class OkHttp extends HttpBase {
     }
   }
 
+  class HTTPFilterIPAddressInterceptor implements Interceptor {
+
+    IPFilterRules rules;
+
+    public HTTPFilterIPAddressInterceptor(IPFilterRules rules) {
+      this.rules = rules;
+    }
+
+    @Override
+    public okhttp3.Response intercept(Interceptor.Chain chain)
+        throws IOException {
+
+      Connection connection = chain.connection();
+      InetAddress address = connection.socket().getInetAddress();
+
+      boolean accept = rules.accept(address);
+
+      Request request = chain.request();
+
+      if (accept) {
+        return chain.proceed(request);
+      }
+
+      LOG.warn("Blocked connection to IP address {}: {}",
+          address.getHostAddress(), request.url());
+      throw new IOException(
+          "Forbidden connection to IP address " + address.getHostAddress());
+    }
+  }
+
   class HTTPHeadersInterceptor implements Interceptor {
 
     private String getNormalizedProtocolName(Protocol protocol) {
diff --git 
a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
 
b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
index 5a587fea2..7c5d0f15c 100644
--- 
a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
+++ 
b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
@@ -34,7 +34,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Test cases for protocol-http - robustness regarding bad server responses:
+ * Test cases for protocol-okhttp - robustness regarding bad server responses:
  * malformed HTTP header lines, etc. See, NUTCH-2549.
  */
 public class TestBadServerResponses extends AbstractHttpProtocolPluginTest {
diff --git 
a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestIPAddressFiltering.java
 
b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestIPAddressFiltering.java
new file mode 100644
index 000000000..dbd1b846d
--- /dev/null
+++ 
b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestIPAddressFiltering.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.okhttp;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.net.InetAddress;
+import java.util.function.Function;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.protocol.AbstractHttpProtocolPluginTest;
+import org.junit.Test;
+
+import com.google.common.net.InetAddresses;
+
+/**
+ * Test cases for protocol-okhttp IP address filtering
+ */
+public class TestIPAddressFiltering extends AbstractHttpProtocolPluginTest {
+
+  @Override
+  protected String getPluginClassName() {
+    return "org.apache.nutch.protocol.okhttp.OkHttp";
+  }
+
+  public InetAddress parseIP(String ip) {
+    // the Java built-in may perform DNS lookup (and throw 
UnknownHostException)
+    // if not a well-formed IP address:
+    //   InetAddress.getByName(ip);
+
+    // use Guava because it does not perform DNS lookups, may throw
+    // IllegalArgumentException if IP address is not well-formed
+    return InetAddresses.forString(ip);
+  }
+
+  public void testCIDRcontains(String cidr, String ip) {
+    CIDR c = new CIDR(cidr);
+    InetAddress i = parseIP(ip);
+    assertTrue(i + " should be in " + c, c.contains(i));
+  }
+
+  public void testCIDRnotContains(String cidr, String ip) {
+    CIDR c = new CIDR(cidr);
+    InetAddress i = parseIP(ip);
+    assertFalse(i + " should not be in " + c, c.contains(i));
+  }
+
+  /** Tests for {@link CIDR} */
+  @Test
+  public void testCIDRs() {
+    // private subnets IPv4
+    testCIDRcontains("127.0.0.0/8", "127.0.0.1");
+    testCIDRcontains("10.0.0.0/8", "10.0.0.13");
+    testCIDRcontains("172.16.0.0/12", "172.17.0.0");
+    testCIDRcontains("192.168.0.0/16", "192.168.0.1");
+
+    // private subnets IPv6
+    testCIDRcontains("::1/128", "::1");
+    testCIDRcontains("127.0.0.0/8", "::ffff:127.0.0.1");
+    testCIDRcontains("::ffff:7f00:0/104", "::ffff:127.0.0.1");
+    testCIDRcontains("fd00::/8", "fd12:3456:789a:1::1");
+    testCIDRcontains("fe80::/10", "fe80::2f29:b6f0:a4c:32ae");
+
+    // test single IP address (with and without mask)
+    testCIDRcontains("127.0.0.1", "127.0.0.1");
+    testCIDRcontains("127.0.0.1/24", "127.0.0.1");
+
+    // test off-by-one boundaries
+    testCIDRnotContains("127.0.0.0/8", "128.0.0.0");
+    testCIDRnotContains("10.0.0.0/8", "11.0.0.0");
+    testCIDRnotContains("10.0.0.0/8", "9.255.255.255");
+    testCIDRnotContains("172.16.0.0/12", "172.32.0.0");
+    testCIDRnotContains("172.16.0.0/12", "171.255.255.255");
+  }
+
+  public void testFilter(Configuration conf, String[] included, String[] 
excluded) {
+    IPFilterRules ipFilterRules = new IPFilterRules(conf);
+    for (String address : included) {
+      assertTrue("Address " + address + " should be included",
+          ipFilterRules.accept(parseIP(address)));
+    }
+    for (String address : excluded) {
+      assertFalse("Address " + address + " should be excluded",
+          ipFilterRules.accept(parseIP(address)));
+    }
+  }
+
+  /** Tests for {@link IPFilterRules} */
+  @Test
+  public void testIPAddressFilterRules() {
+    String[] publicAddresses = {"93.184.216.34", "93.184.216.43"};
+    String[] loopbackAddresses = {"127.0.0.1", "127.0.0.2", "::1"};
+    String[] sitelocalAddresses = {"10.0.0.13", "172.17.0.0", "192.168.0.1"};
+
+    conf.set("http.filter.ipaddress.include", "");
+    conf.set("http.filter.ipaddress.exclude", "localhost");
+    testFilter(conf, new String[0], loopbackAddresses);
+
+    conf.set("http.filter.ipaddress.exclude", "loopback,sitelocal");
+    testFilter(conf, publicAddresses, loopbackAddresses);
+    testFilter(conf, publicAddresses, sitelocalAddresses);
+
+    conf.set("http.filter.ipaddress.include", "93.184.216.0/8");
+    conf.set("http.filter.ipaddress.exclude", "");
+    testFilter(conf, publicAddresses, loopbackAddresses);
+
+    conf.set("http.filter.ipaddress.include", "localhost");
+    conf.set("http.filter.ipaddress.exclude", "");
+    testFilter(conf, loopbackAddresses, publicAddresses);
+  }
+  
+  public void testPredefinedAddressRange(String ipAddress, String type) {
+    try {
+      InetAddress addr = InetAddresses.forString(ipAddress);
+      Function<InetAddress,Boolean> pred = null;
+      switch (type.toLowerCase()) {
+      case "localhost":
+      case "loopback":
+        pred = InetAddress::isLoopbackAddress;
+        break;
+      case "sitelocal":
+        pred = InetAddress::isSiteLocalAddress;
+        break;
+      default:
+        fail("Unknown IP address type " + type);
+      }
+      assertTrue(ipAddress + " is not recognized as " + type + " address", 
pred.apply(addr));
+    } catch (IllegalArgumentException e) {
+      fail("Not a valid IP address string: " + ipAddress);
+    }
+  }
+
+  /**
+   * Verify that certain IP addresses are matched by predefined IP classes:
+   * localhost, loopback, sitelocal. This verifies that the predefined classes
+   * are properly mapped to the underlying predicates of the class
+   * {@link InetAddress}.
+   */
+  @Test
+  public void testPredefinedRanges() throws Exception {
+    testPredefinedAddressRange("127.0.0.1", "localhost");
+    testPredefinedAddressRange("127.0.0.1", "loopback");
+    testPredefinedAddressRange("10.0.0.13", "sitelocal");
+    testPredefinedAddressRange("172.17.0.0", "sitelocal");
+    testPredefinedAddressRange("192.168.0.1", "sitelocal");
+
+    testPredefinedAddressRange("::1", "loopback");
+    testPredefinedAddressRange("::ffff:127.0.0.1", "loopback");
+    // fec0::/10 - Java follows the "old" standard to define private IPv6 
addresses
+    testPredefinedAddressRange("fec0::", "sitelocal");
+    // fd::/8 - not (yet?) recognized as site-local address by 
InetAddress::isSiteLocalAddress
+    //testPredefinedAddressRange("fd12:3456:789a:1::1", "sitelocal");
+  }
+
+  /**
+   * Test whether connections are blocked according to the IP filter
+   * configuration
+   */
+  @Test
+  public void testConnectionBlocking() throws Exception {
+    localHost = "127.0.0.1";
+    launchServer("/", (responseHeader + simpleContent).getBytes(UTF_8));
+
+    // without filter configured
+    conf.set("http.filter.ipaddress.exclude", "");
+    http.setConf(conf);
+    fetchPage("/", 200, "text/html");
+
+    // filter localhost
+    conf.set("http.filter.ipaddress.exclude", "localhost");
+    http.setConf(conf);
+    fetchPage("/", -1, "text/html");
+
+    // filter loopback
+    conf.set("http.filter.ipaddress.exclude", "localhost");
+    http.setConf(conf);
+    fetchPage("/", -1, "text/html");
+
+    // filter by IP
+    conf.set("http.filter.ipaddress.exclude", "127.0.0.1");
+    http.setConf(conf);
+    fetchPage("/", -1, "text/html");
+
+    // filter by CIDR
+    conf.set("http.filter.ipaddress.exclude", "127.0.0.0/8");
+    http.setConf(conf);
+    fetchPage("/", -1, "text/html");
+ }
+
+}
diff --git 
a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
 
b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
index 289e75672..e740ed288 100644
--- 
a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
+++ 
b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
@@ -25,7 +25,7 @@ import 
org.apache.nutch.protocol.AbstractHttpProtocolPluginTest;
 import org.junit.Test;
 
 /**
- * Test cases for protocol-http
+ * Test cases for protocol-okhttp
  */
 public class TestProtocolOkHttp extends AbstractHttpProtocolPluginTest {
 
diff --git 
a/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java 
b/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java
index 3a90e21a9..322b34e99 100644
--- a/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java
+++ b/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java
@@ -28,7 +28,6 @@ import java.net.Socket;
 import java.net.SocketException;
 import java.net.URL;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
@@ -60,6 +59,17 @@ public abstract class AbstractHttpProtocolPluginTest {
   protected Protocol http;
   protected ServerSocket server;
   protected Configuration conf;
+
+  /** Protocol / URL scheme used to send/receive test requests */
+  protected String protocol = "http";
+
+  /**
+   * URL host name used to represent localhost when sending/receiving test
+   * requests
+   */
+  protected String localHost = "127.0.0.1";
+
+  /** Port used to send/receive test requests */
   protected int defaultPort = 47505;
 
   protected static final String responseHeader = "HTTP/1.1 200 OK\r\n";
@@ -103,7 +113,9 @@ public abstract class AbstractHttpProtocolPluginTest {
 
   @After
   public void tearDown() throws Exception {
-    server.close();
+    if (server != null) {
+      server.close();
+    }
   }
 
   /**
@@ -123,13 +135,13 @@ public abstract class AbstractHttpProtocolPluginTest {
       BiFunction<String, String[], byte[]> responder,
       Predicate<List<String>> requestChecker) throws Exception {
     server = new ServerSocket();
-    server.bind(new InetSocketAddress("127.0.0.1", port));
+    server.bind(new InetSocketAddress(localHost, port));
     Pattern requestPattern = Pattern.compile("(?i)^GET\\s+(\\S+)");
     while (true) {
       LOG.info("Listening on port {}", port);
       if (server.isClosed()) {
         server = new ServerSocket();
-        server.bind(new InetSocketAddress("127.0.0.1", port));
+        server.bind(new InetSocketAddress(localHost, port));
       }
       Socket socket = server.accept();
       LOG.info("Connection received");
@@ -259,7 +271,7 @@ public abstract class AbstractHttpProtocolPluginTest {
    */
   protected ProtocolOutput fetchPage(int port, String page, int expectedCode,
       String expectedContentType) throws Exception {
-    URL url = new URL("http", "127.0.0.1", port, page);
+    URL url = new URL(protocol, localHost, port, page);
     LOG.info("Fetching {}", url);
     CrawlDatum crawlDatum = new CrawlDatum();
     ProtocolOutput protocolOutput = http

Reply via email to