Repository: nutch Updated Branches: refs/heads/master 15c583e22 -> a3e742049
NUTCH-2213 : do not store the headers verbatim if the response was compressed Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/ba86ddfc Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/ba86ddfc Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/ba86ddfc Branch: refs/heads/master Commit: ba86ddfcc2431ae41accd8f56cc4ffcba16d1cc3 Parents: 72ad761 Author: Julien Nioche <[email protected]> Authored: Wed Feb 10 15:38:26 2016 +0000 Committer: Julien Nioche <[email protected]> Committed: Wed Feb 10 15:38:26 2016 +0000 ---------------------------------------------------------------------- .../java/org/apache/nutch/protocol/http/HttpResponse.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/ba86ddfc/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java index 77772f0..f6d7e4d 100644 --- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java +++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java @@ -238,10 +238,6 @@ public class HttpResponse implements Response { haveSeenNonContinueStatus = code != 100; // 100 is "Continue" } - if (httpHeaders != null) { - headers.add("_response.headers_", httpHeaders.toString()); - } - String transferEncoding = getHeader(Response.TRANSFER_ENCODING); if (transferEncoding != null && "chunked" .equalsIgnoreCase(transferEncoding.trim())) { @@ -256,6 +252,11 @@ public class HttpResponse implements Response { } else if ("deflate".equals(contentEncoding)) { content = http.processDeflateEncoded(content, url); } else { + // store the headers verbatim only if the response was not compressed + // as the content length reported with not match otherwise + if (httpHeaders != null) { + headers.add("_response.headers_", httpHeaders.toString()); + } if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("fetched " + content.length + " bytes from " + url); }
