Hi I was having the same problem running nutch behind a web proxy. But with little changes in the plugin protocol-httpclient this works for me.
See source below for my changes.
public class Http extends HttpBase {
public static final Log LOG = LogFactory.getLog(Http.class);
private static MultiThreadedHttpConnectionManager connectionManager =
new MultiThreadedHttpConnectionManager();
// Since the Configuration has not yet been setted,
// then an unconfigured client is returned.
private static HttpClient client = new HttpClient(connectionManager);
static synchronized HttpClient getClient() {
return client;
}
boolean verbose = false;
int maxThreadsTotal = 10;
String ntlmUsername = "";
String ntlmPassword = "";
String ntlmDomain = "";
String ntlmHost = "";
String proxyuser = "";
String proxypass = "";
public Http() {
super(LOG);
}
public void setConf(Configuration conf) {
super.setConf(conf);
this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
this.ntlmUsername = conf.get("http.auth.ntlm.username", "");
this.ntlmPassword = conf.get("http.auth.ntlm.password", "");
this.ntlmDomain = conf.get("http.auth.ntlm.domain", "");
this.ntlmHost = conf.get("http.auth.ntlm.host", "");
// add config for auth proxy
this.proxyuser = conf.get("http.auth.proxy.username", "");
this.proxypass = conf.get("http.auth.proxy.password", "");
//Level logLevel = Level.WARNING;
//if (conf.getBoolean("http.verbose", false)) {
// logLevel = Level.FINE;
//}
//LOG.setLevel(logLevel);
//Logger.getLogger("org.apache.commons.httpclient.HttpMethodDirector")
// .setLevel(logLevel);
configureClient();
}
public static void main(String[] args) throws Exception {
Http http = new Http();
http.setConf(NutchConfiguration.create());
main(http, args);
}
protected Response getResponse(URL url, CrawlDatum datum, boolean
redirect)
throws ProtocolException, IOException {
return new HttpResponse(this, url, datum, redirect);
}
private void configureClient() {
// Set up an HTTPS socket factory that accepts self-signed certs.
//Protocol dummyhttps = new Protocol("https", new
DummySSLProtocolSocketFactory(), 443);
//Protocol.registerProtocol("https", dummyhttps);
HttpConnectionManagerParams params = connectionManager.getParams();
params.setConnectionTimeout(timeout);
params.setSoTimeout(timeout);
params.setSendBufferSize(BUFFER_SIZE);
params.setReceiveBufferSize(BUFFER_SIZE);
params.setMaxTotalConnections(maxThreadsTotal);
if (maxThreadsTotal > maxThreadsPerHost) {
params.setDefaultMaxConnectionsPerHost(maxThreadsPerHost);
} else {
params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
}
HostConfiguration hostConf = client.getHostConfiguration();
ArrayList headers = new ArrayList();
// prefer English
headers.add(new Header("Accept-Language",
"en-us,en-gb,en;q=0.7,*;q=0.3"));
// prefer UTF-8
headers.add(new Header("Accept-Charset",
"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
// prefer understandable formats
headers.add(new Header("Accept",
"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/p
lain;q=0.8,image/png,*/*;q=0.5"));
// accept gzipped content
headers.add(new Header("Accept-Encoding", "x-gzip, gzip"));
hostConf.getParams().setParameter("http.default-headers", headers);
if (useProxy) {
hostConf.setProxy(proxyHost, proxyPort);
// add support for proxy authentication
if (proxyuser.length() > 0 ) {
Credentials proxyCreds = new
UsernamePasswordCredentials(proxyuser,proxypass);
client.getState().setProxyCredentials(new
AuthScope(proxyHost,AuthScope.ANY_PORT), proxyCreds);
}
}
if (ntlmUsername.length() > 0) {
Credentials ntCreds = new NTCredentials(ntlmUsername, ntlmPassword,
ntlmHost, ntlmDomain);
client.getState().setCredentials(new AuthScope(ntlmHost,
AuthScope.ANY_PORT), ntCreds);
if (LOG.isInfoEnabled()) {
LOG.info("Added NTLM credentials for " + ntlmUsername);
}
}
if (LOG.isInfoEnabled()) { LOG.info("Configured Client"); }
}
}
-----Ursprüngliche Nachricht-----
Von: ekoje ekoje [mailto:[EMAIL PROTECTED]
Gesendet: Donnerstag, 8. Februar 2007 15:36
An: [email protected]
Betreff: Web Proxy
Hi Guys,
I would like to run nutch but I'm behind a web proxy with authentication.
I use nutch-0.8.1 under windows XP. Ive configured nutch-site.xml to specify
my proxy host and port but how do i specify the username and password ?
Could you please help me ?
Thanks
smime.p7s
Description: S/MIME cryptographic signature
------------------------------------------------------------------------- Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier. Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________ Nutch-general mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/nutch-general
