This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch tika-grpc-3x-features
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/tika-grpc-3x-features by this 
push:
     new a99e59899 TIKA-4252: add http request headers at fetcher config level
a99e59899 is described below

commit a99e59899132b6f2dc8020402e0575922d544c0b
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Fri May 24 16:29:23 2024 -0500

    TIKA-4252: add http request headers at fetcher config level
---
 .../tika/pipes/fetcher/http/HttpFetcher.java       | 39 ++++++++++++++++++----
 .../fetcher/http/config/HttpFetcherConfig.java     |  9 +++++
 .../tika/pipes/fetcher/http/HttpFetcherTest.java   |  4 ++-
 .../src/test/resources/tika-config-http.xml        |  5 ++-
 4 files changed, 48 insertions(+), 9 deletions(-)

diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index f5b8cba70..575e9bd7a 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -144,17 +144,19 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
         if (!StringUtils.isBlank(httpFetcherConfig.getUserAgent())) {
             get.setHeader(USER_AGENT, httpFetcherConfig.getUserAgent());
         }
+
         // additional http request headers can be sent in here.
+        // Add the headers from the Fetcher configuration.
+        if (httpFetcherConfig.getHttpRequestHeaders() != null) {
+            for (String httpRequestHeader : 
httpFetcherConfig.getHttpRequestHeaders()) {
+                placeHeaderOnGetRequest(get, httpRequestHeader);
+            }
+        }
+        // Additionally, headers can be specified per-fetch via the metadata.
         String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders");
         if (httpRequestHeaders != null) {
             for (String httpRequestHeader : httpRequestHeaders) {
-                int idxOfEquals = httpRequestHeader.indexOf(':');
-                if (idxOfEquals == -1) {
-                    continue;
-                }
-                String headerKey = httpRequestHeader.substring(0, 
idxOfEquals).trim();
-                String headerValue = httpRequestHeader.substring(idxOfEquals + 
1).trim();
-                get.setHeader(headerKey, headerValue);
+                placeHeaderOnGetRequest(get, httpRequestHeader);
             }
         }
         if (jwtGenerator != null) {
@@ -166,6 +168,16 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
         }
     }
 
+    private static void placeHeaderOnGetRequest(HttpGet get, String 
httpRequestHeader) {
+        int idxOfEquals = httpRequestHeader.indexOf(':');
+        if (idxOfEquals == -1) {
+            return;
+        }
+        String headerKey = httpRequestHeader.substring(0, idxOfEquals).trim();
+        String headerValue = httpRequestHeader.substring(idxOfEquals + 
1).trim();
+        get.setHeader(headerKey, headerValue);
+    }
+
     @Override
     public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata metadata) throws IOException, TikaException {
         HttpGet get = new HttpGet(fetchKey);
@@ -430,6 +442,19 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
         httpFetcherConfig.setMaxRedirects(maxRedirects);
     }
 
+    /**
+     * Which http request headers should we send in the http fetch requests.
+     *
+     * @param headers The headers to add to the HTTP GET requests.
+     */
+    @Field
+    public void setHttpRequestHeaders(List<String> headers) {
+        httpFetcherConfig.setHttpRequestHeaders(new ArrayList<>());
+        if (headers != null) {
+            httpFetcherConfig.getHttpRequestHeaders().addAll(headers);
+        }
+    }
+
     /**
      * Which http headers should we capture in the metadata.
      * Keys will be prepended with {@link HttpFetcher#HTTP_HEADER_PREFIX}
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
index ce2a3b3ab..1988529f6 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java
@@ -36,6 +36,7 @@ public class HttpFetcherConfig extends AbstractConfig {
     private Long maxSpoolSize = -1L;
     private Integer maxRedirects = 0;
     private List<String> httpHeaders = new ArrayList<>();
+    private List<String> httpRequestHeaders = new ArrayList<>();
     private Long overallTimeout = 120000L;
     private Integer maxErrMsgSize = 10000000;
     private String userAgent;
@@ -172,6 +173,14 @@ public class HttpFetcherConfig extends AbstractConfig {
         return this;
     }
 
+    public List<String> getHttpRequestHeaders() {
+        return httpRequestHeaders;
+    }
+
+    public void setHttpRequestHeaders(List<String> httpRequestHeaders) {
+        this.httpRequestHeaders = httpRequestHeaders;
+    }
+
     public Long getOverallTimeout() {
         return overallTimeout;
     }
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
index b5a49b763..56e83ea64 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
@@ -161,11 +161,13 @@ class HttpFetcherTest extends TikaTest {
         when(response.getEntity()).thenReturn(new StringEntity("Hi"));
 
         Metadata metadata = new Metadata();
-        metadata.set(Property.externalText("httpRequestHeaders"), new String[] 
{"nick1=val1", "nick2=val2"});
+        metadata.set(Property.externalText("httpRequestHeaders"), new String[] 
{"nick1: val1", "nick2: val2"});
         httpFetcher.fetch("http://localhost";, metadata);
         HttpGet httpGet = httpGetArgumentCaptor.getValue();
         Assertions.assertEquals("val1", 
httpGet.getHeaders("nick1")[0].getValue());
         Assertions.assertEquals("val2", 
httpGet.getHeaders("nick2")[0].getValue());
+        // also make sure the headers from the fetcher config level are 
specified - see src/test/resources/tika-config-http.xml
+        Assertions.assertEquals("headerValueFromFetcherConfig", 
httpGet.getHeaders("headerNameFromFetcherConfig")[0].getValue());
     }
 
     @Test
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
index bd77de4ba..5def8f5dc 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
@@ -24,6 +24,9 @@
         <header>Expires</header>
         <header>Content-Length</header>
       </httpHeaders>
+      <httpRequestHeaders>
+        <header>headerNameFromFetcherConfig: 
headerValueFromFetcherConfig</header>
+      </httpRequestHeaders>
     </fetcher>
   </fetchers>
-</properties>
\ No newline at end of file
+</properties>

Reply via email to