Hi 

I was having the same problem running nutch behind a web proxy.
But with little changes in the plugin protocol-httpclient this works for me.

See source below for my changes.


public class Http extends HttpBase {

  public static final Log LOG = LogFactory.getLog(Http.class);

  private static MultiThreadedHttpConnectionManager connectionManager =
          new MultiThreadedHttpConnectionManager();

  // Since the Configuration has not yet been setted,
  // then an unconfigured client is returned.
  private static HttpClient client = new HttpClient(connectionManager);

  static synchronized HttpClient getClient() {
    return client;
  }

  boolean verbose = false;
  int maxThreadsTotal = 10;
  String ntlmUsername = "";
  String ntlmPassword = "";
  String ntlmDomain = "";
  String ntlmHost = "";

  String proxyuser = "";
  String proxypass = "";
  
  public Http() {
    super(LOG);
  }

  public void setConf(Configuration conf) {
    super.setConf(conf);
    this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
    this.ntlmUsername = conf.get("http.auth.ntlm.username", "");
    this.ntlmPassword = conf.get("http.auth.ntlm.password", "");
    this.ntlmDomain = conf.get("http.auth.ntlm.domain", "");
    this.ntlmHost = conf.get("http.auth.ntlm.host", "");
    
    
    // add config for auth proxy
    this.proxyuser = conf.get("http.auth.proxy.username", "");
    this.proxypass = conf.get("http.auth.proxy.password", "");
    
    
    //Level logLevel = Level.WARNING;
    //if (conf.getBoolean("http.verbose", false)) {
    //  logLevel = Level.FINE;
    //}
    //LOG.setLevel(logLevel);
    //Logger.getLogger("org.apache.commons.httpclient.HttpMethodDirector")
    //      .setLevel(logLevel);
    configureClient();
  }

  public static void main(String[] args) throws Exception {
    Http http = new Http();
    http.setConf(NutchConfiguration.create());
    main(http, args);
  }

  protected Response getResponse(URL url, CrawlDatum datum, boolean
redirect)
    throws ProtocolException, IOException {
    return new HttpResponse(this, url, datum, redirect);
  }
  
  private void configureClient() {

    // Set up an HTTPS socket factory that accepts self-signed certs.
    //Protocol dummyhttps = new Protocol("https", new
DummySSLProtocolSocketFactory(), 443);
    //Protocol.registerProtocol("https", dummyhttps);
    
    HttpConnectionManagerParams params = connectionManager.getParams();
    params.setConnectionTimeout(timeout);
    params.setSoTimeout(timeout);
    params.setSendBufferSize(BUFFER_SIZE);
    params.setReceiveBufferSize(BUFFER_SIZE);
    params.setMaxTotalConnections(maxThreadsTotal);
    if (maxThreadsTotal > maxThreadsPerHost) {
      params.setDefaultMaxConnectionsPerHost(maxThreadsPerHost);
    } else {
      params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
    }

    HostConfiguration hostConf = client.getHostConfiguration();
    ArrayList headers = new ArrayList();
    // prefer English
    headers.add(new Header("Accept-Language",
"en-us,en-gb,en;q=0.7,*;q=0.3"));
    // prefer UTF-8
    headers.add(new Header("Accept-Charset",
"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
    // prefer understandable formats
    headers.add(new Header("Accept",
 
"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/p
lain;q=0.8,image/png,*/*;q=0.5"));
    // accept gzipped content
    headers.add(new Header("Accept-Encoding", "x-gzip, gzip"));
    hostConf.getParams().setParameter("http.default-headers", headers);
    if (useProxy) {
      hostConf.setProxy(proxyHost, proxyPort);
      // add support for proxy authentication
      if (proxyuser.length() > 0 ) {
          Credentials proxyCreds = new
UsernamePasswordCredentials(proxyuser,proxypass);
          client.getState().setProxyCredentials(new
AuthScope(proxyHost,AuthScope.ANY_PORT), proxyCreds);
      }
    }
    if (ntlmUsername.length() > 0) {
      Credentials ntCreds = new NTCredentials(ntlmUsername, ntlmPassword,
ntlmHost, ntlmDomain);
      client.getState().setCredentials(new AuthScope(ntlmHost,
AuthScope.ANY_PORT), ntCreds);

      if (LOG.isInfoEnabled()) {
        LOG.info("Added NTLM credentials for " + ntlmUsername);
      }
    }
    if (LOG.isInfoEnabled()) { LOG.info("Configured Client"); }
  }
}


-----Ursprüngliche Nachricht-----
Von: ekoje ekoje [mailto:[EMAIL PROTECTED] 
Gesendet: Donnerstag, 8. Februar 2007 15:36
An: [email protected]
Betreff: Web Proxy

Hi Guys,

I would like to run nutch but I'm behind a web proxy with authentication.

I use nutch-0.8.1 under windows XP. Ive configured nutch-site.xml to specify
my proxy host and port but how do i specify the username and password ?

Could you please help me ?

Thanks

Attachment: smime.p7s
Description: S/MIME cryptographic signature

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier.
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Nutch-general mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nutch-general

Reply via email to