Repository: nutch
Updated Branches:
  refs/heads/master 15c583e22 -> a3e742049


NUTCH-2213 : do not store the headers verbatim if the response was compressed


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/ba86ddfc
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/ba86ddfc
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/ba86ddfc

Branch: refs/heads/master
Commit: ba86ddfcc2431ae41accd8f56cc4ffcba16d1cc3
Parents: 72ad761
Author: Julien Nioche <[email protected]>
Authored: Wed Feb 10 15:38:26 2016 +0000
Committer: Julien Nioche <[email protected]>
Committed: Wed Feb 10 15:38:26 2016 +0000

----------------------------------------------------------------------
 .../java/org/apache/nutch/protocol/http/HttpResponse.java   | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/ba86ddfc/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
 
b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
index 77772f0..f6d7e4d 100644
--- 
a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
+++ 
b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
@@ -238,10 +238,6 @@ public class HttpResponse implements Response {
         haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
       }
 
-      if (httpHeaders != null) {
-        headers.add("_response.headers_", httpHeaders.toString());
-      }
-
       String transferEncoding = getHeader(Response.TRANSFER_ENCODING);
       if (transferEncoding != null && "chunked"
           .equalsIgnoreCase(transferEncoding.trim())) {
@@ -256,6 +252,11 @@ public class HttpResponse implements Response {
       } else if ("deflate".equals(contentEncoding)) {
         content = http.processDeflateEncoded(content, url);
       } else {
+        // store the headers verbatim only if the response was not compressed
+        // as the content length reported with not match otherwise
+        if (httpHeaders != null) {
+          headers.add("_response.headers_", httpHeaders.toString());
+        }
         if (Http.LOG.isTraceEnabled()) {
           Http.LOG.trace("fetched " + content.length + " bytes from " + url);
         }

Reply via email to