This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch tika-grpc-3x-features
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/tika-grpc-3x-features by this
push:
new fba92792c TIKA-4252: fix defaults. fix header parsing.
fba92792c is described below
commit fba92792cfabcabd434453d03244661adfc9285a
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Thu May 9 09:32:29 2024 -0500
TIKA-4252: fix defaults. fix header parsing.
---
.../apache/tika/pipes/fetcher/http/HttpFetcher.java | 14 +++++++++-----
.../fetcher/http/config/HttpFetcherConfig.java | 21 +++++++++++----------
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index 168622a87..f5b8cba70 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -129,7 +129,6 @@ public class HttpFetcher extends AbstractFetcher implements
Initializable, Range
@Override
public InputStream fetch(String fetchKey, Metadata metadata) throws
IOException, TikaException {
- LOG.info("Fetching HTTP key: {}", fetchKey);
HttpGet get = new HttpGet(fetchKey);
RequestConfig requestConfig = RequestConfig
.custom()
@@ -149,9 +148,12 @@ public class HttpFetcher extends AbstractFetcher
implements Initializable, Range
String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders");
if (httpRequestHeaders != null) {
for (String httpRequestHeader : httpRequestHeaders) {
- int idxOfEquals = httpRequestHeader.indexOf('=');
- String headerKey = httpRequestHeader.substring(0, idxOfEquals);
- String headerValue = httpRequestHeader.substring(idxOfEquals +
1);
+ int idxOfEquals = httpRequestHeader.indexOf(':');
+ if (idxOfEquals == -1) {
+ continue;
+ }
+ String headerKey = httpRequestHeader.substring(0,
idxOfEquals).trim();
+ String headerValue = httpRequestHeader.substring(idxOfEquals +
1).trim();
get.setHeader(headerKey, headerValue);
}
}
@@ -165,11 +167,12 @@ public class HttpFetcher extends AbstractFetcher
implements Initializable, Range
}
@Override
- public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata metadata) throws IOException {
+ public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata metadata) throws IOException, TikaException {
HttpGet get = new HttpGet(fetchKey);
if (!StringUtils.isBlank(httpFetcherConfig.getUserAgent())) {
get.setHeader(USER_AGENT, httpFetcherConfig.getUserAgent());
}
+ setHttpRequestHeaders(metadata, get);
get.setHeader("Range", "bytes=" + startRange + "-" + endRange);
return execute(get, metadata, httpClient, true);
}
@@ -204,6 +207,7 @@ public class HttpFetcher extends AbstractFetcher implements
Initializable, Range
int code = response
.getStatusLine()
.getStatusCode();
+ LOG.info("Fetch id {} status code {}", get.getURI(), code);
if (code < 200 || code > 299) {
throw new IOException("bad status code: " + code + " :: " +
responseToString(response));
}
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
index 7713c7ca4..ce2a3b3ab 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.pipes.fetcher.http.config;
+import java.util.ArrayList;
import java.util.List;
import org.apache.tika.pipes.fetcher.config.AbstractConfig;
@@ -27,16 +28,16 @@ public class HttpFetcherConfig extends AbstractConfig {
private String authScheme;
private String proxyHost;
private Integer proxyPort;
- private Integer connectTimeout;
- private Integer requestTimeout;
- private Integer socketTimeout;
- private Integer maxConnections;
- private Integer maxConnectionsPerRoute;
- private Long maxSpoolSize;
- private Integer maxRedirects;
- private List<String> httpHeaders;
- private Long overallTimeout;
- private Integer maxErrMsgSize;
+ private Integer maxConnectionsPerRoute = 1000;
+ private Integer maxConnections = 2000;
+ private Integer requestTimeout = 120000;
+ private Integer connectTimeout = 120000;
+ private Integer socketTimeout = 120000;
+ private Long maxSpoolSize = -1L;
+ private Integer maxRedirects = 0;
+ private List<String> httpHeaders = new ArrayList<>();
+ private Long overallTimeout = 120000L;
+ private Integer maxErrMsgSize = 10000000;
private String userAgent;
private String jwtIssuer;
private String jwtSubject;