This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch tika-grpc-3x-features
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/tika-grpc-3x-features by this
push:
new a99e59899 TIKA-4252: add http request headers at fetcher config level
a99e59899 is described below
commit a99e59899132b6f2dc8020402e0575922d544c0b
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Fri May 24 16:29:23 2024 -0500
TIKA-4252: add http request headers at fetcher config level
---
.../tika/pipes/fetcher/http/HttpFetcher.java | 39 ++++++++++++++++++----
.../fetcher/http/config/HttpFetcherConfig.java | 9 +++++
.../tika/pipes/fetcher/http/HttpFetcherTest.java | 4 ++-
.../src/test/resources/tika-config-http.xml | 5 ++-
4 files changed, 48 insertions(+), 9 deletions(-)
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index f5b8cba70..575e9bd7a 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -144,17 +144,19 @@ public class HttpFetcher extends AbstractFetcher
implements Initializable, Range
if (!StringUtils.isBlank(httpFetcherConfig.getUserAgent())) {
get.setHeader(USER_AGENT, httpFetcherConfig.getUserAgent());
}
+
// additional http request headers can be sent in here.
+ // Add the headers from the Fetcher configuration.
+ if (httpFetcherConfig.getHttpRequestHeaders() != null) {
+ for (String httpRequestHeader :
httpFetcherConfig.getHttpRequestHeaders()) {
+ placeHeaderOnGetRequest(get, httpRequestHeader);
+ }
+ }
+ // Additionally, headers can be specified per-fetch via the metadata.
String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders");
if (httpRequestHeaders != null) {
for (String httpRequestHeader : httpRequestHeaders) {
- int idxOfEquals = httpRequestHeader.indexOf(':');
- if (idxOfEquals == -1) {
- continue;
- }
- String headerKey = httpRequestHeader.substring(0,
idxOfEquals).trim();
- String headerValue = httpRequestHeader.substring(idxOfEquals +
1).trim();
- get.setHeader(headerKey, headerValue);
+ placeHeaderOnGetRequest(get, httpRequestHeader);
}
}
if (jwtGenerator != null) {
@@ -166,6 +168,16 @@ public class HttpFetcher extends AbstractFetcher
implements Initializable, Range
}
}
+ private static void placeHeaderOnGetRequest(HttpGet get, String
httpRequestHeader) {
+ int idxOfEquals = httpRequestHeader.indexOf(':');
+ if (idxOfEquals == -1) {
+ return;
+ }
+ String headerKey = httpRequestHeader.substring(0, idxOfEquals).trim();
+ String headerValue = httpRequestHeader.substring(idxOfEquals +
1).trim();
+ get.setHeader(headerKey, headerValue);
+ }
+
@Override
public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata metadata) throws IOException, TikaException {
HttpGet get = new HttpGet(fetchKey);
@@ -430,6 +442,19 @@ public class HttpFetcher extends AbstractFetcher
implements Initializable, Range
httpFetcherConfig.setMaxRedirects(maxRedirects);
}
+ /**
+ * Which http request headers should we send in the http fetch requests.
+ *
+ * @param headers The headers to add to the HTTP GET requests.
+ */
+ @Field
+ public void setHttpRequestHeaders(List<String> headers) {
+ httpFetcherConfig.setHttpRequestHeaders(new ArrayList<>());
+ if (headers != null) {
+ httpFetcherConfig.getHttpRequestHeaders().addAll(headers);
+ }
+ }
+
/**
* Which http headers should we capture in the metadata.
* Keys will be prepended with {@link HttpFetcher#HTTP_HEADER_PREFIX}
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
index ce2a3b3ab..1988529f6 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
@@ -36,6 +36,7 @@ public class HttpFetcherConfig extends AbstractConfig {
private Long maxSpoolSize = -1L;
private Integer maxRedirects = 0;
private List<String> httpHeaders = new ArrayList<>();
+ private List<String> httpRequestHeaders = new ArrayList<>();
private Long overallTimeout = 120000L;
private Integer maxErrMsgSize = 10000000;
private String userAgent;
@@ -172,6 +173,14 @@ public class HttpFetcherConfig extends AbstractConfig {
return this;
}
+ public List<String> getHttpRequestHeaders() {
+ return httpRequestHeaders;
+ }
+
+ public void setHttpRequestHeaders(List<String> httpRequestHeaders) {
+ this.httpRequestHeaders = httpRequestHeaders;
+ }
+
public Long getOverallTimeout() {
return overallTimeout;
}
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
index b5a49b763..56e83ea64 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
@@ -161,11 +161,13 @@ class HttpFetcherTest extends TikaTest {
when(response.getEntity()).thenReturn(new StringEntity("Hi"));
Metadata metadata = new Metadata();
- metadata.set(Property.externalText("httpRequestHeaders"), new String[]
{"nick1=val1", "nick2=val2"});
+ metadata.set(Property.externalText("httpRequestHeaders"), new String[]
{"nick1: val1", "nick2: val2"});
httpFetcher.fetch("http://localhost", metadata);
HttpGet httpGet = httpGetArgumentCaptor.getValue();
Assertions.assertEquals("val1",
httpGet.getHeaders("nick1")[0].getValue());
Assertions.assertEquals("val2",
httpGet.getHeaders("nick2")[0].getValue());
+ // also make sure the headers from the fetcher config level are
specified - see src/test/resources/tika-config-http.xml
+ Assertions.assertEquals("headerValueFromFetcherConfig",
httpGet.getHeaders("headerNameFromFetcherConfig")[0].getValue());
}
@Test
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
index bd77de4ba..5def8f5dc 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
@@ -24,6 +24,9 @@
<header>Expires</header>
<header>Content-Length</header>
</httpHeaders>
+ <httpRequestHeaders>
+ <header>headerNameFromFetcherConfig:
headerValueFromFetcherConfig</header>
+ </httpRequestHeaders>
</fetcher>
</fetchers>
-</properties>
\ No newline at end of file
+</properties>