This is an automated email from the ASF dual-hosted git repository.

markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new c390dfc8b NUTCH-3031 ProtocolFactory host mapper to support domains
c390dfc8b is described below

commit c390dfc8b5c15db74d61c83e79f8e17d9bdc7b3f
Author: Markus Jelsma <mar...@apache.org>
AuthorDate: Tue Mar 12 17:29:20 2024 +0000

    NUTCH-3031 ProtocolFactory host mapper to support domains
---
 src/java/org/apache/nutch/protocol/ProtocolFactory.java | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/java/org/apache/nutch/protocol/ProtocolFactory.java 
b/src/java/org/apache/nutch/protocol/ProtocolFactory.java
index a545a4cd0..dc274b7e1 100644
--- a/src/java/org/apache/nutch/protocol/ProtocolFactory.java
+++ b/src/java/org/apache/nutch/protocol/ProtocolFactory.java
@@ -29,6 +29,7 @@ import org.apache.nutch.plugin.ExtensionPoint;
 import org.apache.nutch.plugin.PluginRepository;
 import org.apache.nutch.plugin.PluginRuntimeException;
 import org.apache.nutch.util.ObjectCache;
+import org.apache.nutch.util.URLUtil;
 
 import org.apache.commons.lang.StringUtils;
 
@@ -130,8 +131,16 @@ public class ProtocolFactory {
 
       // First attempt to resolve a protocol implementation by hostname
       String host = url.getHost();
+      String domain = URLUtil.getDomainName(url).toLowerCase().trim();
+      String hostOrDomain = null;
+      Extension extension = null;
       if (hostProtocolMapping.containsKey(host)) {
-        Extension extension = getExtensionById(hostProtocolMapping.get(host));
+        hostOrDomain = host;
+      } else if (hostProtocolMapping.containsKey(domain)) {
+        hostOrDomain = domain;
+      }
+      if (hostOrDomain != null) {
+        extension = getExtensionById(hostProtocolMapping.get(hostOrDomain));
         if (extension != null) {
           protocol = getProtocolInstanceByExtension(extension);
         }
@@ -141,7 +150,7 @@ public class ProtocolFactory {
       if (protocol == null) {
         // Protocol listed in default map?
         if (defaultProtocolImplMapping.containsKey(url.getProtocol())) {
-          Extension extension = 
getExtensionById(defaultProtocolImplMapping.get(url.getProtocol()));
+          extension = 
getExtensionById(defaultProtocolImplMapping.get(url.getProtocol()));
           if (extension != null) {
             protocol = getProtocolInstanceByExtension(extension);
           }
@@ -150,7 +159,7 @@ public class ProtocolFactory {
 
       // Still couldn't find a protocol? Attempt by protocol
       if (protocol == null) {
-        Extension extension = findExtension(url.getProtocol(), "protocolName");
+        extension = findExtension(url.getProtocol(), "protocolName");
         if (extension != null) {
           protocol = getProtocolInstanceByExtension(extension);
         }

Reply via email to