Author: lewismc
Date: Sat Nov  2 14:03:57 2013
New Revision: 1538193

URL: http://svn.apache.org/r1538193
Log:
NUTCH-1413 Record response time

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/conf/nutch-default.xml
    
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1538193&r1=1538192&r2=1538193&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Nov  2 14:03:57 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1413 Record response time (Yasin KILINC, Talat UYARER, snagel via 
lewismc)
+
 * NUTCH-1125 JUnit test for tld (Sertac TURKEL via lewismc)
 
 * NUTCH-1124 JUnit test for scoring-opic (Talat UYARER via lewismc)

Modified: nutch/branches/2.x/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1538193&r1=1538192&r2=1538193&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Sat Nov  2 14:03:57 2013
@@ -248,6 +248,14 @@
   </description>
 </property>
 
+<property>
+  <name>http.store.responsetime</name>
+  <value>true</value>
+  <description>Enables us to record the response time of the 
+  host which is the time period between start connection to end 
+  connection of a pages host.</description>
+</property>
+
 <!-- FTP properties -->
 
 <property>

Modified: 
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1538193&r1=1538192&r2=1538193&view=diff
==============================================================================
--- 
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 (original)
+++ 
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 Sat Nov  2 14:03:57 2013
@@ -19,11 +19,10 @@ package org.apache.nutch.protocol.http.a
 // JDK imports
 import java.io.IOException;
 import java.net.URL;
-import java.util.HashMap;
-import java.util.LinkedList;
-
+import java.nio.ByteBuffer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.avro.util.Utf8;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.Content;
@@ -33,6 +32,7 @@ import org.apache.nutch.protocol.Protoco
 import org.apache.nutch.protocol.ProtocolStatusCodes;
 import org.apache.nutch.protocol.ProtocolStatusUtils;
 import org.apache.nutch.storage.WebPage;
+import org.apache.nutch.util.Bytes;
 import org.apache.nutch.util.GZIPUtils;
 import org.apache.nutch.util.DeflateUtils;
 import org.apache.nutch.util.MimeUtil;
@@ -41,6 +41,8 @@ import org.apache.nutch.util.MimeUtil;
 import crawlercommons.robots.BaseRobotRules;
 
 public abstract class HttpBase implements Protocol {
+  
+  private final static Utf8 RESPONSE_TIME = new Utf8("_rs_");
 
   public static final int BUFFER_SIZE = 8 * 1024;
 
@@ -66,8 +68,8 @@ public abstract class HttpBase implement
   /** The Nutch 'User-Agent' request header */
   protected String userAgent = getAgentString(
       "NutchCVS", null, "Nutch",
-      "http://lucene.apache.org/nutch/bot.html";,
-  "[email protected]");
+      "http://nutch.apache.org/bot.html";,
+      "[email protected]");
 
 
   /** The "Accept-Language" request header value. */
@@ -90,6 +92,9 @@ public abstract class HttpBase implement
   /** Do we use HTTP/1.1? */
   protected boolean useHttp11 = false;
 
+  /** Response Time */
+  protected boolean responseTime = true;
+  
   /** Creates a new instance of HttpBase */
   public HttpBase() {
     this(null);
@@ -117,6 +122,7 @@ public abstract class HttpBase implement
     this.accept = conf.get("http.accept", accept);
     this.mimeTypes = new MimeUtil(conf);
     this.useHttp11 = conf.getBoolean("http.useHttp11", false);
+    this.responseTime = conf.getBoolean("http.store.responsetime", true);
     this.robots.setConf(conf);
     logConf();
   }
@@ -130,7 +136,15 @@ public abstract class HttpBase implement
 
     try {
       URL u = new URL(url);
+      
+      long startTime = System.currentTimeMillis();
       Response response = getResponse(u, page, false); // make a request
+      int elapsedTime =(int) (System.currentTimeMillis() - startTime);
+      
+      if(this.responseTime) {
+        page.putToMetadata(RESPONSE_TIME, 
ByteBuffer.wrap(Bytes.toBytes(elapsedTime))); 
+      }
+      
       int code = response.getCode();
       byte[] content = response.getContent();
       Content c = new Content(u.toString(), u.toString(),


Reply via email to