Author: lewismc
Date: Sat Nov 2 14:03:57 2013
New Revision: 1538193
URL: http://svn.apache.org/r1538193
Log:
NUTCH-1413 Record response time
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/nutch-default.xml
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1538193&r1=1538192&r2=1538193&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Nov 2 14:03:57 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1413 Record response time (Yasin KILINC, Talat UYARER, snagel via
lewismc)
+
* NUTCH-1125 JUnit test for tld (Sertac TURKEL via lewismc)
* NUTCH-1124 JUnit test for scoring-opic (Talat UYARER via lewismc)
Modified: nutch/branches/2.x/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1538193&r1=1538192&r2=1538193&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Sat Nov 2 14:03:57 2013
@@ -248,6 +248,14 @@
</description>
</property>
+<property>
+ <name>http.store.responsetime</name>
+ <value>true</value>
+ <description>Enables us to record the response time of the
+ host which is the time period between start connection to end
+ connection of a pages host.</description>
+</property>
+
<!-- FTP properties -->
<property>
Modified:
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1538193&r1=1538192&r2=1538193&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
(original)
+++
nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
Sat Nov 2 14:03:57 2013
@@ -19,11 +19,10 @@ package org.apache.nutch.protocol.http.a
// JDK imports
import java.io.IOException;
import java.net.URL;
-import java.util.HashMap;
-import java.util.LinkedList;
-
+import java.nio.ByteBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.Content;
@@ -33,6 +32,7 @@ import org.apache.nutch.protocol.Protoco
import org.apache.nutch.protocol.ProtocolStatusCodes;
import org.apache.nutch.protocol.ProtocolStatusUtils;
import org.apache.nutch.storage.WebPage;
+import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.GZIPUtils;
import org.apache.nutch.util.DeflateUtils;
import org.apache.nutch.util.MimeUtil;
@@ -41,6 +41,8 @@ import org.apache.nutch.util.MimeUtil;
import crawlercommons.robots.BaseRobotRules;
public abstract class HttpBase implements Protocol {
+
+ private final static Utf8 RESPONSE_TIME = new Utf8("_rs_");
public static final int BUFFER_SIZE = 8 * 1024;
@@ -66,8 +68,8 @@ public abstract class HttpBase implement
/** The Nutch 'User-Agent' request header */
protected String userAgent = getAgentString(
"NutchCVS", null, "Nutch",
- "http://lucene.apache.org/nutch/bot.html",
- "[email protected]");
+ "http://nutch.apache.org/bot.html",
+ "[email protected]");
/** The "Accept-Language" request header value. */
@@ -90,6 +92,9 @@ public abstract class HttpBase implement
/** Do we use HTTP/1.1? */
protected boolean useHttp11 = false;
+ /** Response Time */
+ protected boolean responseTime = true;
+
/** Creates a new instance of HttpBase */
public HttpBase() {
this(null);
@@ -117,6 +122,7 @@ public abstract class HttpBase implement
this.accept = conf.get("http.accept", accept);
this.mimeTypes = new MimeUtil(conf);
this.useHttp11 = conf.getBoolean("http.useHttp11", false);
+ this.responseTime = conf.getBoolean("http.store.responsetime", true);
this.robots.setConf(conf);
logConf();
}
@@ -130,7 +136,15 @@ public abstract class HttpBase implement
try {
URL u = new URL(url);
+
+ long startTime = System.currentTimeMillis();
Response response = getResponse(u, page, false); // make a request
+ int elapsedTime =(int) (System.currentTimeMillis() - startTime);
+
+ if(this.responseTime) {
+ page.putToMetadata(RESPONSE_TIME,
ByteBuffer.wrap(Bytes.toBytes(elapsedTime)));
+ }
+
int code = response.getCode();
byte[] content = response.getContent();
Content c = new Content(u.toString(), u.toString(),