This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new b6f645a4d NUTCH-3001 - fix logic for grabbing bytes if there's no 
content type in the header
     new f078a88df Merge pull request #774 from tballison/NUTCH-3001
b6f645a4d is described below

commit b6f645a4d025fa136f557dd37e9aba611b425fbb
Author: tallison <talli...@apache.org>
AuthorDate: Wed Sep 13 10:37:17 2023 -0400

    NUTCH-3001 - fix logic for grabbing bytes if there's no content type in the 
header
---
 .../nutch/protocol/selenium/HttpResponse.java      | 78 ++++++++++------------
 1 file changed, 37 insertions(+), 41 deletions(-)

diff --git 
a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
 
b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
index bb3bf6357..750677374 100644
--- 
a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
+++ 
b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
@@ -269,55 +269,51 @@ public class HttpResponse implements Response {
       String contentType = getHeader(Response.CONTENT_TYPE);
 
       // handle with Selenium only if content type in HTML or XHTML
-      if (contentType != null) {
-        if (contentType.contains("text/html")
-            || contentType.contains("application/xhtml")) {
-          readPlainContent(url);
-        } else {
-          try {
-            int contentLength = Integer.MAX_VALUE;
-            String contentLengthString = headers.get(Response.CONTENT_LENGTH);
-            if (contentLengthString != null) {
-              try {
-                contentLength = Integer.parseInt(contentLengthString.trim());
-              } catch (NumberFormatException ex) {
-                throw new HttpException(
-                    "bad content length: " + contentLengthString);
-              }
+      if (contentType != null &&
+              (contentType.contains("text/html") || 
contentType.contains("application/xhtml"))) {
+        readPlainContent(url);
+      } else {
+        try {
+          int contentLength = Integer.MAX_VALUE;
+          String contentLengthString = headers.get(Response.CONTENT_LENGTH);
+          if (contentLengthString != null) {
+            try {
+              contentLength = Integer.parseInt(contentLengthString.trim());
+            } catch (NumberFormatException ex) {
+              throw new HttpException("bad content length: " + 
contentLengthString);
             }
+          }
 
-            if (http.getMaxContent() >= 0
-                && contentLength > http.getMaxContent()) {
-              contentLength = http.getMaxContent();
-            }
+          if (http.getMaxContent() >= 0 && contentLength > 
http.getMaxContent()) {
+            contentLength = http.getMaxContent();
+          }
 
-            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
-            int bufferFilled = 0;
-            int totalRead = 0;
-            ByteArrayOutputStream out = new ByteArrayOutputStream();
-            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
-                && totalRead + bufferFilled <= contentLength) {
-              totalRead += bufferFilled;
-              out.write(buffer, 0, bufferFilled);
-            }
+          byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
+          int bufferFilled = 0;
+          int totalRead = 0;
+          ByteArrayOutputStream out = new ByteArrayOutputStream();
+          while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 &&
+                  totalRead + bufferFilled <= contentLength) {
+            totalRead += bufferFilled;
+            out.write(buffer, 0, bufferFilled);
+          }
 
-            content = out.toByteArray();
+          content = out.toByteArray();
 
-          } catch (Exception e) {
-            if (code == 200)
-              throw new IOException(e.toString());
-            // for codes other than 200 OK, we are fine with empty content
-          } finally {
-            if (in != null) {
-              in.close();
-            }
+        } catch (Exception e) {
+          if (code == 200) {
+            throw new IOException(e.toString());
+          }
+          // for codes other than 200 OK, we are fine with empty content
+        } finally {
+          if (in != null) {
+            in.close();
           }
-        }
-        if (httpHeaders != null) {
-          headers.add(Response.RESPONSE_HEADERS, httpHeaders.toString());
         }
       }
-
+      if (httpHeaders != null) {
+        headers.add(Response.RESPONSE_HEADERS, httpHeaders.toString());
+      }
     } catch(KeyManagementException | NoSuchAlgorithmException | 
KeyStoreException e) {
         throw new ProtocolException(e);
     } finally {

Reply via email to