This is an automated email from the ASF dual-hosted git repository.
markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new c390dfc8b NUTCH-3031 ProtocolFactory host mapper to support domains
c390dfc8b is described below
commit c390dfc8b5c15db74d61c83e79f8e17d9bdc7b3f
Author: Markus Jelsma <[email protected]>
AuthorDate: Tue Mar 12 17:29:20 2024 +0000
NUTCH-3031 ProtocolFactory host mapper to support domains
---
src/java/org/apache/nutch/protocol/ProtocolFactory.java | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/java/org/apache/nutch/protocol/ProtocolFactory.java
b/src/java/org/apache/nutch/protocol/ProtocolFactory.java
index a545a4cd0..dc274b7e1 100644
--- a/src/java/org/apache/nutch/protocol/ProtocolFactory.java
+++ b/src/java/org/apache/nutch/protocol/ProtocolFactory.java
@@ -29,6 +29,7 @@ import org.apache.nutch.plugin.ExtensionPoint;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.plugin.PluginRuntimeException;
import org.apache.nutch.util.ObjectCache;
+import org.apache.nutch.util.URLUtil;
import org.apache.commons.lang.StringUtils;
@@ -130,8 +131,16 @@ public class ProtocolFactory {
// First attempt to resolve a protocol implementation by hostname
String host = url.getHost();
+ String domain = URLUtil.getDomainName(url).toLowerCase().trim();
+ String hostOrDomain = null;
+ Extension extension = null;
if (hostProtocolMapping.containsKey(host)) {
- Extension extension = getExtensionById(hostProtocolMapping.get(host));
+ hostOrDomain = host;
+ } else if (hostProtocolMapping.containsKey(domain)) {
+ hostOrDomain = domain;
+ }
+ if (hostOrDomain != null) {
+ extension = getExtensionById(hostProtocolMapping.get(hostOrDomain));
if (extension != null) {
protocol = getProtocolInstanceByExtension(extension);
}
@@ -141,7 +150,7 @@ public class ProtocolFactory {
if (protocol == null) {
// Protocol listed in default map?
if (defaultProtocolImplMapping.containsKey(url.getProtocol())) {
- Extension extension =
getExtensionById(defaultProtocolImplMapping.get(url.getProtocol()));
+ extension =
getExtensionById(defaultProtocolImplMapping.get(url.getProtocol()));
if (extension != null) {
protocol = getProtocolInstanceByExtension(extension);
}
@@ -150,7 +159,7 @@ public class ProtocolFactory {
// Still couldn't find a protocol? Attempt by protocol
if (protocol == null) {
- Extension extension = findExtension(url.getProtocol(), "protocolName");
+ extension = findExtension(url.getProtocol(), "protocolName");
if (extension != null) {
protocol = getProtocolInstanceByExtension(extension);
}