This is an automated email from the ASF dual-hosted git repository. markus pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new c390dfc8b NUTCH-3031 ProtocolFactory host mapper to support domains c390dfc8b is described below commit c390dfc8b5c15db74d61c83e79f8e17d9bdc7b3f Author: Markus Jelsma <mar...@apache.org> AuthorDate: Tue Mar 12 17:29:20 2024 +0000 NUTCH-3031 ProtocolFactory host mapper to support domains --- src/java/org/apache/nutch/protocol/ProtocolFactory.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/nutch/protocol/ProtocolFactory.java b/src/java/org/apache/nutch/protocol/ProtocolFactory.java index a545a4cd0..dc274b7e1 100644 --- a/src/java/org/apache/nutch/protocol/ProtocolFactory.java +++ b/src/java/org/apache/nutch/protocol/ProtocolFactory.java @@ -29,6 +29,7 @@ import org.apache.nutch.plugin.ExtensionPoint; import org.apache.nutch.plugin.PluginRepository; import org.apache.nutch.plugin.PluginRuntimeException; import org.apache.nutch.util.ObjectCache; +import org.apache.nutch.util.URLUtil; import org.apache.commons.lang.StringUtils; @@ -130,8 +131,16 @@ public class ProtocolFactory { // First attempt to resolve a protocol implementation by hostname String host = url.getHost(); + String domain = URLUtil.getDomainName(url).toLowerCase().trim(); + String hostOrDomain = null; + Extension extension = null; if (hostProtocolMapping.containsKey(host)) { - Extension extension = getExtensionById(hostProtocolMapping.get(host)); + hostOrDomain = host; + } else if (hostProtocolMapping.containsKey(domain)) { + hostOrDomain = domain; + } + if (hostOrDomain != null) { + extension = getExtensionById(hostProtocolMapping.get(hostOrDomain)); if (extension != null) { protocol = getProtocolInstanceByExtension(extension); } @@ -141,7 +150,7 @@ public class ProtocolFactory { if (protocol == null) { // Protocol listed in default map? if (defaultProtocolImplMapping.containsKey(url.getProtocol())) { - Extension extension = getExtensionById(defaultProtocolImplMapping.get(url.getProtocol())); + extension = getExtensionById(defaultProtocolImplMapping.get(url.getProtocol())); if (extension != null) { protocol = getProtocolInstanceByExtension(extension); } @@ -150,7 +159,7 @@ public class ProtocolFactory { // Still couldn't find a protocol? Attempt by protocol if (protocol == null) { - Extension extension = findExtension(url.getProtocol(), "protocolName"); + extension = findExtension(url.getProtocol(), "protocolName"); if (extension != null) { protocol = getProtocolInstanceByExtension(extension); }