Author: markus
Date: Mon Jan 11 17:10:30 2016
New Revision: 1724085

URL: http://svn.apache.org/viewvc?rev=1724085&view=rev
Log:
NUTCH-2190 Protocol normalizer

Added:
    nutch/trunk/src/plugin/urlnormalizer-protocol/
    nutch/trunk/src/plugin/urlnormalizer-protocol/build.xml
    nutch/trunk/src/plugin/urlnormalizer-protocol/data/
    nutch/trunk/src/plugin/urlnormalizer-protocol/data/protocols.txt
    nutch/trunk/src/plugin/urlnormalizer-protocol/ivy.xml
    nutch/trunk/src/plugin/urlnormalizer-protocol/plugin.xml
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/
    
nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/
    
nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/
    
nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/
    nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/
    
nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/
    
nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/
    
nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/build.xml
    nutch/trunk/default.properties
    nutch/trunk/src/plugin/build.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1724085&r1=1724084&r2=1724085&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Jan 11 17:10:30 2016
@@ -1,5 +1,7 @@
 Nutch Change Log
 
+* NUTCH-2190 Protocol normalizer (markus)
+
 * NUTCH-1838 Host and domain based regex and automaton filtering (markus)
 
 * NUTCH-2178 DeduplicationJob to optionally group on host or domain (markus)

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1724085&r1=1724084&r2=1724085&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Mon Jan 11 17:10:30 2016
@@ -224,6 +224,7 @@
       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-protocol/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-slash/src/java"/>
@@ -660,6 +661,7 @@
       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-protocol/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-slash/src/java"/>
@@ -1082,6 +1084,8 @@
         <source path="${plugins.dir}/urlnormalizer-host/src/test/" />
         <source path="${plugins.dir}/urlnormalizer-pass/src/java/" />
         <source path="${plugins.dir}/urlnormalizer-pass/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-protocol/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-protocol/src/test/" />
         <source path="${plugins.dir}/urlnormalizer-querystring/src/java/" />
         <source path="${plugins.dir}/urlnormalizer-querystring/src/test/" />
         <source path="${plugins.dir}/urlnormalizer-regex/src/java/" />

Modified: nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1724085&r1=1724084&r2=1724085&view=diff
==============================================================================
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Mon Jan 11 17:10:30 2016
@@ -110,6 +110,7 @@ plugins.urlnormalizer=\
    org.apache.nutch.net.urlnormalizer.basic*:\
    org.apache.nutch.net.urlnormalizer.host*:\
    org.apache.nutch.net.urlnormalizer.pass*:\
+   org.apache.nutch.net.urlnormalizer.protocol*:\
    org.apache.nutch.net.urlnormalizer.querystring*:\
    org.apache.nutch.net.urlnormalizer.regex*
 

Modified: nutch/trunk/src/plugin/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1724085&r1=1724084&r2=1724085&view=diff
==============================================================================
--- nutch/trunk/src/plugin/build.xml (original)
+++ nutch/trunk/src/plugin/build.xml Mon Jan 11 17:10:30 2016
@@ -82,6 +82,7 @@
      <ant dir="urlnormalizer-basic" target="deploy"/>
      <ant dir="urlnormalizer-host" target="deploy"/>
      <ant dir="urlnormalizer-pass" target="deploy"/>
+     <ant dir="urlnormalizer-protocol" target="deploy"/>
      <ant dir="urlnormalizer-querystring" target="deploy"/>
      <ant dir="urlnormalizer-regex" target="deploy"/>
      <ant dir="urlnormalizer-slash" target="deploy"/>
@@ -125,6 +126,7 @@
      <ant dir="urlnormalizer-basic" target="test"/>
      <ant dir="urlnormalizer-host" target="test"/>
      <ant dir="urlnormalizer-pass" target="test"/>
+     <ant dir="urlnormalizer-protocol" target="test"/>
      <ant dir="urlnormalizer-querystring" target="test"/>
      <ant dir="urlnormalizer-regex" target="test"/>
      <ant dir="urlnormalizer-slash" target="test"/>
@@ -193,6 +195,7 @@
     <ant dir="urlnormalizer-basic" target="clean"/>
     <ant dir="urlnormalizer-host" target="clean"/>
     <ant dir="urlnormalizer-pass" target="clean"/>
+    <ant dir="urlnormalizer-protocol" target="clean"/>
     <ant dir="urlnormalizer-querystring" target="clean"/>
     <ant dir="urlnormalizer-regex" target="clean"/>
     <ant dir="urlnormalizer-slash" target="clean"/>

Added: nutch/trunk/src/plugin/urlnormalizer-protocol/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-protocol/build.xml?rev=1724085&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlnormalizer-protocol/build.xml (added)
+++ nutch/trunk/src/plugin/urlnormalizer-protocol/build.xml Mon Jan 11 17:10:30 
2016
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="urlnormalizer-protocol" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <!-- for junit test -->
+  <mkdir dir="${build.test}/data"/>
+  <copy todir="${build.test}/data">
+    <fileset dir="data" />
+  </copy>
+</project>

Added: nutch/trunk/src/plugin/urlnormalizer-protocol/data/protocols.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-protocol/data/protocols.txt?rev=1724085&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlnormalizer-protocol/data/protocols.txt (added)
+++ nutch/trunk/src/plugin/urlnormalizer-protocol/data/protocols.txt Mon Jan 11 
17:10:30 2016
@@ -0,0 +1,7 @@
+# format: host\tprotocol\n
+
+example.org    http
+example.net    http
+
+example.io     https
+example.nl     https

Added: nutch/trunk/src/plugin/urlnormalizer-protocol/ivy.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-protocol/ivy.xml?rev=1724085&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlnormalizer-protocol/ivy.xml (added)
+++ nutch/trunk/src/plugin/urlnormalizer-protocol/ivy.xml Mon Jan 11 17:10:30 
2016
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+    <include file="${nutch.root}/ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>

Added: nutch/trunk/src/plugin/urlnormalizer-protocol/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-protocol/plugin.xml?rev=1724085&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlnormalizer-protocol/plugin.xml (added)
+++ nutch/trunk/src/plugin/urlnormalizer-protocol/plugin.xml Mon Jan 11 
17:10:30 2016
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="urlnormalizer-protocol"
+   name="Protocol URL Normalizer"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="urlnormalizer-protocol.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.net.urlnormalizer.protocol"
+              name="Nutch Protocol URL Normalizer"
+              point="org.apache.nutch.net.URLNormalizer">
+      <implementation id="ProtocolURLNormalizer"
+                      
class="org.apache.nutch.net.urlnormalizer.protocol.ProtocolURLNormalizer">
+        <parameter name="file" value="protocols.txt"/>
+      </implementation>
+   </extension>
+
+</plugin>

Added: 
nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java?rev=1724085&view=auto
==============================================================================
--- 
nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
 (added)
+++ 
nutch/trunk/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
 Mon Jan 11 17:10:30 2016
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.protocol;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.URLNormalizer;
+import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.plugin.PluginRepository;
+import org.apache.nutch.util.URLUtil;
+
+/**
+ * @author mar...@openindex.io
+ */
+public class ProtocolURLNormalizer implements URLNormalizer {
+
+  private Configuration conf;
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ProtocolURLNormalizer.class);
+
+  private static final char QUESTION_MARK = '?';
+  private static final String PROTOCOL_DELIMITER = "://";
+
+  private static String attributeFile = null;
+  private String protocolsFile = null;
+  
+  // We record a map of hosts and boolean, the boolean denotes whether the 
host should
+  // have slashes after URL paths. True means slash, false means remove the 
slash
+  private static final Map<String,String> protocolsMap = new 
HashMap<String,String>();
+
+  public ProtocolURLNormalizer() {}
+
+  public ProtocolURLNormalizer(String protocolsFile) {
+    this.protocolsFile = protocolsFile;
+  }
+
+  private synchronized void readConfiguration(Reader configReader) throws 
IOException {
+    if (protocolsMap.size() > 0) {
+      return;
+    }
+
+    BufferedReader reader = new BufferedReader(configReader);
+    String line, host;
+    String protocol;
+    int delimiterIndex;
+
+    while ((line = reader.readLine()) != null) {
+      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
+        line.trim();
+        delimiterIndex = line.indexOf(" ");
+        // try tabulator
+        if (delimiterIndex == -1) {
+          delimiterIndex = line.indexOf("\t");
+        }
+
+        host = line.substring(0, delimiterIndex);
+        protocol = line.substring(delimiterIndex + 1).trim();
+        
+        protocolsMap.put(host, protocol);
+      }
+    }
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+
+    // get the extensions for domain urlfilter
+    String pluginName = "urlnormalizer-protocol";
+    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
+      URLNormalizer.class.getName()).getExtensions();
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
+        attributeFile = extension.getAttribute("file");
+        break;
+      }
+    }
+
+    // handle blank non empty input
+    if (attributeFile != null && attributeFile.trim().equals("")) {
+      attributeFile = null;
+    }
+
+    if (attributeFile != null) {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Attribute \"file\" is defined for plugin " + pluginName
+          + " as " + attributeFile);
+      }
+    }
+    else {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin "
+          + pluginName);
+      }
+    }
+
+    // domain file and attribute "file" take precedence if defined
+    String file = conf.get("urlnormalizer.protocols.file");
+    String stringRules = conf.get("urlnormalizer.protocols.rules");
+    if (protocolsFile != null) {
+      file = protocolsFile;
+    }
+    else if (attributeFile != null) {
+      file = attributeFile;
+    }
+    Reader reader = null;
+    if (stringRules != null) { // takes precedence over files
+      reader = new StringReader(stringRules);
+    } else {
+      reader = conf.getConfResourceAsReader(file);
+    }
+    try {
+      if (reader == null) {
+        reader = new FileReader(file);
+      }
+      readConfiguration(reader);
+    }
+    catch (IOException e) {
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+    }
+  }
+  
+  public String normalize(String url, String scope) throws 
MalformedURLException {
+    return normalize(url, null, scope);
+  }
+
+  public String normalize(String url, CrawlDatum crawlDatum, String scope) 
throws MalformedURLException {
+    // Get URL repr.
+    URL u = new URL(url);
+    
+    // Get the host
+    String host = u.getHost();
+
+    // Do we have a rule for this host?
+    if (protocolsMap.containsKey(host)) {    
+      String protocol = u.getProtocol();
+      String requiredProtocol = protocolsMap.get(host);
+      
+      // Incorrect protocol?
+      if (!protocol.equals(requiredProtocol)) {
+        // Rebuild URL with new protocol
+        StringBuilder buffer = new StringBuilder(requiredProtocol);
+        buffer.append(PROTOCOL_DELIMITER);
+        buffer.append(host);
+        buffer.append(u.getPath());
+        
+        String queryString = u.getQuery();
+        if (queryString != null) {
+          buffer.append(QUESTION_MARK);
+          buffer.append(queryString);
+        }
+        
+        url = buffer.toString();
+      }
+    }
+
+    return url;
+  }
+}
\ No newline at end of file

Added: 
nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java?rev=1724085&view=auto
==============================================================================
--- 
nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
 (added)
+++ 
nutch/trunk/src/plugin/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
 Mon Jan 11 17:10:30 2016
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.protocol;
+
+import java.net.MalformedURLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestProtocolURLNormalizer extends TestCase {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  public void testProtocolURLNormalizer() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    String protocolsFile = SAMPLES + SEPARATOR + "protocols.txt";
+    ProtocolURLNormalizer normalizer = new 
ProtocolURLNormalizer(protocolsFile);
+    normalizer.setConf(conf);
+
+    // No change
+    assertEquals("http://example.org/";, 
normalizer.normalize("https://example.org/";, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/";, 
normalizer.normalize("https://example.net/";, URLNormalizers.SCOPE_DEFAULT));
+    
+    // https to http
+    assertEquals("http://example.org/";, 
normalizer.normalize("https://example.org/";, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/";, 
normalizer.normalize("https://example.net/";, URLNormalizers.SCOPE_DEFAULT));
+    
+    // no change
+    assertEquals("https://example.io/";, 
normalizer.normalize("https://example.io/";, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("https://example.nl/";, 
normalizer.normalize("https://example.nl/";, URLNormalizers.SCOPE_DEFAULT));
+    
+    // http to https
+    assertEquals("https://example.io/";, 
normalizer.normalize("http://example.io/";, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("https://example.nl/";, 
normalizer.normalize("http://example.nl/";, URLNormalizers.SCOPE_DEFAULT));
+  }
+}


Reply via email to