Author: kwright
Date: Thu Jan 10 01:47:34 2013
New Revision: 1431176
URL: http://svn.apache.org/viewvc?rev=1431176&view=rev
Log:
More changes to bring Web connector back into line with MCF 1.0.1. Part of
CONNECTORS-604. Note well: This change REQUIRES a new version of httpclient,
downloaded via ant make-core-deps!!
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1431176&r1=1431175&r2=1431176&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Thu Jan 10 01:47:34 2013
@@ -1270,33 +1270,33 @@ public class ThrottledFetcher
new AllowAllHostnameVerifier());
Scheme myHttpsProtocol = new Scheme("https", 443, myFactory);
- int resolvedPort;
+ int hostPort;
String displayedPort;
if (port != -1)
{
if (!(protocol.equals("http") && port == 80) &&
!(protocol.equals("https") && port == 443))
+ {
displayedPort = ":"+Integer.toString(port);
+ hostPort = port;
+ }
else
+ {
displayedPort = "";
- resolvedPort = port;
+ hostPort = -1;
+ }
}
else
{
- if (protocol.equals("http"))
- resolvedPort = 80;
- else if (protocol.equals("https"))
- resolvedPort = 443;
- else
- throw new IllegalArgumentException("Unexpected protocol: "+protocol);
displayedPort = "";
+ hostPort = -1;
}
StringBuilder sb = new StringBuilder(protocol);
sb.append("://").append(server).append(displayedPort).append(urlPath);
String fetchUrl = sb.toString();
- HttpHost fetchHost = new HttpHost(server,port,protocol);
+ HttpHost fetchHost = new HttpHost(server,hostPort,protocol);
HttpHost hostHost;
if (host != null)
@@ -1304,7 +1304,7 @@ public class ThrottledFetcher
sb.setLength(0);
sb.append(protocol).append("://").append(host).append(displayedPort).append(urlPath);
myUrl = sb.toString();
- hostHost = new HttpHost(host,resolvedPort,protocol);
+ hostHost = new HttpHost(host,hostPort,protocol);
}
else
{
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1431176&r1=1431175&r2=1431176&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Thu Jan 10 01:47:34 2013
@@ -630,7 +630,8 @@ public class WebcrawlerConnector extends
if (Logging.connectors.isDebugEnabled())
{
- Logging.connectors.debug("Web: For document identifier
'"+documentIdentifier+"' found session credential key
'"+sessionCredential.getSequenceKey()+"'");
+ if (sessionCredential != null)
+ Logging.connectors.debug("Web: For document identifier
'"+documentIdentifier+"' found session credential key
'"+sessionCredential.getSequenceKey()+"'");
}
// Set up the initial state and state variables.