This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch tika-grpc-3x-features in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/tika-grpc-3x-features by this push: new fba92792c TIKA-4252: fix defaults. fix header parsing. fba92792c is described below commit fba92792cfabcabd434453d03244661adfc9285a Author: Nicholas DiPiazza <ndipia...@apache.org> AuthorDate: Thu May 9 09:32:29 2024 -0500 TIKA-4252: fix defaults. fix header parsing. --- .../apache/tika/pipes/fetcher/http/HttpFetcher.java | 14 +++++++++----- .../fetcher/http/config/HttpFetcherConfig.java | 21 +++++++++++---------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java index 168622a87..f5b8cba70 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java @@ -129,7 +129,6 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range @Override public InputStream fetch(String fetchKey, Metadata metadata) throws IOException, TikaException { - LOG.info("Fetching HTTP key: {}", fetchKey); HttpGet get = new HttpGet(fetchKey); RequestConfig requestConfig = RequestConfig .custom() @@ -149,9 +148,12 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders"); if (httpRequestHeaders != null) { for (String httpRequestHeader : httpRequestHeaders) { - int idxOfEquals = httpRequestHeader.indexOf('='); - String headerKey = httpRequestHeader.substring(0, idxOfEquals); - String headerValue = httpRequestHeader.substring(idxOfEquals + 1); + int idxOfEquals = httpRequestHeader.indexOf(':'); + if (idxOfEquals == -1) { + continue; + } + String headerKey = httpRequestHeader.substring(0, idxOfEquals).trim(); + String headerValue = httpRequestHeader.substring(idxOfEquals + 1).trim(); get.setHeader(headerKey, headerValue); } } @@ -165,11 +167,12 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range } @Override - public InputStream fetch(String fetchKey, long startRange, long endRange, Metadata metadata) throws IOException { + public InputStream fetch(String fetchKey, long startRange, long endRange, Metadata metadata) throws IOException, TikaException { HttpGet get = new HttpGet(fetchKey); if (!StringUtils.isBlank(httpFetcherConfig.getUserAgent())) { get.setHeader(USER_AGENT, httpFetcherConfig.getUserAgent()); } + setHttpRequestHeaders(metadata, get); get.setHeader("Range", "bytes=" + startRange + "-" + endRange); return execute(get, metadata, httpClient, true); } @@ -204,6 +207,7 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range int code = response .getStatusLine() .getStatusCode(); + LOG.info("Fetch id {} status code {}", get.getURI(), code); if (code < 200 || code > 299) { throw new IOException("bad status code: " + code + " :: " + responseToString(response)); } diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java index 7713c7ca4..ce2a3b3ab 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java @@ -16,6 +16,7 @@ */ package org.apache.tika.pipes.fetcher.http.config; +import java.util.ArrayList; import java.util.List; import org.apache.tika.pipes.fetcher.config.AbstractConfig; @@ -27,16 +28,16 @@ public class HttpFetcherConfig extends AbstractConfig { private String authScheme; private String proxyHost; private Integer proxyPort; - private Integer connectTimeout; - private Integer requestTimeout; - private Integer socketTimeout; - private Integer maxConnections; - private Integer maxConnectionsPerRoute; - private Long maxSpoolSize; - private Integer maxRedirects; - private List<String> httpHeaders; - private Long overallTimeout; - private Integer maxErrMsgSize; + private Integer maxConnectionsPerRoute = 1000; + private Integer maxConnections = 2000; + private Integer requestTimeout = 120000; + private Integer connectTimeout = 120000; + private Integer socketTimeout = 120000; + private Long maxSpoolSize = -1L; + private Integer maxRedirects = 0; + private List<String> httpHeaders = new ArrayList<>(); + private Long overallTimeout = 120000L; + private Integer maxErrMsgSize = 10000000; private String userAgent; private String jwtIssuer; private String jwtSubject;