This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch tika-grpc-3x-features
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/tika-grpc-3x-features by this 
push:
     new fba92792c TIKA-4252: fix defaults. fix header parsing.
fba92792c is described below

commit fba92792cfabcabd434453d03244661adfc9285a
Author: Nicholas DiPiazza <ndipia...@apache.org>
AuthorDate: Thu May 9 09:32:29 2024 -0500

    TIKA-4252: fix defaults. fix header parsing.
---
 .../apache/tika/pipes/fetcher/http/HttpFetcher.java | 14 +++++++++-----
 .../fetcher/http/config/HttpFetcherConfig.java      | 21 +++++++++++----------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index 168622a87..f5b8cba70 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -129,7 +129,6 @@ public class HttpFetcher extends AbstractFetcher implements 
Initializable, Range
 
     @Override
     public InputStream fetch(String fetchKey, Metadata metadata) throws 
IOException, TikaException {
-        LOG.info("Fetching HTTP key: {}", fetchKey);
         HttpGet get = new HttpGet(fetchKey);
         RequestConfig requestConfig = RequestConfig
                 .custom()
@@ -149,9 +148,12 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
         String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders");
         if (httpRequestHeaders != null) {
             for (String httpRequestHeader : httpRequestHeaders) {
-                int idxOfEquals = httpRequestHeader.indexOf('=');
-                String headerKey = httpRequestHeader.substring(0, idxOfEquals);
-                String headerValue = httpRequestHeader.substring(idxOfEquals + 
1);
+                int idxOfEquals = httpRequestHeader.indexOf(':');
+                if (idxOfEquals == -1) {
+                    continue;
+                }
+                String headerKey = httpRequestHeader.substring(0, 
idxOfEquals).trim();
+                String headerValue = httpRequestHeader.substring(idxOfEquals + 
1).trim();
                 get.setHeader(headerKey, headerValue);
             }
         }
@@ -165,11 +167,12 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
     }
 
     @Override
-    public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata metadata) throws IOException {
+    public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata metadata) throws IOException, TikaException {
         HttpGet get = new HttpGet(fetchKey);
         if (!StringUtils.isBlank(httpFetcherConfig.getUserAgent())) {
             get.setHeader(USER_AGENT, httpFetcherConfig.getUserAgent());
         }
+        setHttpRequestHeaders(metadata, get);
         get.setHeader("Range", "bytes=" + startRange + "-" + endRange);
         return execute(get, metadata, httpClient, true);
     }
@@ -204,6 +207,7 @@ public class HttpFetcher extends AbstractFetcher implements 
Initializable, Range
             int code = response
                     .getStatusLine()
                     .getStatusCode();
+            LOG.info("Fetch id {} status code {}", get.getURI(), code);
             if (code < 200 || code > 299) {
                 throw new IOException("bad status code: " + code + " :: " + 
responseToString(response));
             }
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
index 7713c7ca4..ce2a3b3ab 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.pipes.fetcher.http.config;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.tika.pipes.fetcher.config.AbstractConfig;
@@ -27,16 +28,16 @@ public class HttpFetcherConfig extends AbstractConfig {
     private String authScheme;
     private String proxyHost;
     private Integer proxyPort;
-    private Integer connectTimeout;
-    private Integer requestTimeout;
-    private Integer socketTimeout;
-    private Integer maxConnections;
-    private Integer maxConnectionsPerRoute;
-    private Long maxSpoolSize;
-    private Integer maxRedirects;
-    private List<String> httpHeaders;
-    private Long overallTimeout;
-    private Integer maxErrMsgSize;
+    private Integer maxConnectionsPerRoute = 1000;
+    private Integer maxConnections = 2000;
+    private Integer requestTimeout = 120000;
+    private Integer connectTimeout = 120000;
+    private Integer socketTimeout = 120000;
+    private Long maxSpoolSize = -1L;
+    private Integer maxRedirects = 0;
+    private List<String> httpHeaders = new ArrayList<>();
+    private Long overallTimeout = 120000L;
+    private Integer maxErrMsgSize = 10000000;
     private String userAgent;
     private String jwtIssuer;
     private String jwtSubject;

Reply via email to