This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4247-http-request-headers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7c2fb07a11bbbbd11ce6b43d27f1be79e63a0790
Author: Nicholas DiPiazza <ndipia...@apache.org>
AuthorDate: Mon Apr 29 13:01:36 2024 -0500

    TIKA-4247 HttpFetcher - add ability to send request headers
    
    set headers in a metadata value for "httpRequestHeaders"
    those will be sent along with http request.
---
 .../tika/pipes/fetcher/http/HttpFetcher.java       | 21 +++++++++-
 .../tika/pipes/fetcher/http/HttpFetcherTest.java   | 48 +++++++++++++++++++++-
 2 files changed, 65 insertions(+), 4 deletions(-)

diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index 26b45f8bf..a8bea6f1b 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -143,10 +143,24 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
                         .setMaxRedirects(maxRedirects)
                         .setRedirectsEnabled(true).build();
         get.setConfig(requestConfig);
-        if (! StringUtils.isBlank(userAgent)) {
+        setHttpRequestHeaders(metadata, get);
+        return execute(get, metadata, httpClient, true);
+    }
+
+    private void setHttpRequestHeaders(Metadata metadata, HttpGet get) {
+        if (!StringUtils.isBlank(userAgent)) {
             get.setHeader(USER_AGENT, userAgent);
         }
-        return execute(get, metadata, httpClient, true);
+        // additional http request headers can be sent in here.
+        String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders");
+        if (httpRequestHeaders != null) {
+            for (String httpRequestHeader : httpRequestHeaders) {
+                int idxOfEquals = httpRequestHeader.indexOf('=');
+                String headerKey = httpRequestHeader.substring(0, idxOfEquals);
+                String headerValue = httpRequestHeader.substring(idxOfEquals + 
1);
+                get.setHeader(headerKey, headerValue);
+            }
+        }
     }
 
     @Override
@@ -455,4 +469,7 @@ public class HttpFetcher extends AbstractFetcher implements 
Initializable, Range
         this.httpClientFactory = httpClientFactory;
     }
 
+    void setHttpClient(HttpClient httpClient) {
+        this.httpClient = httpClient;
+    }
 }
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
index e26e6cfcb..970ec58c2 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
@@ -37,24 +37,31 @@ import org.apache.commons.io.IOUtils;
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
 import org.apache.http.HttpStatus;
+import org.apache.http.ProtocolVersion;
 import org.apache.http.StatusLine;
 import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.entity.StringEntity;
 import org.apache.http.protocol.HttpContext;
+import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.client.HttpClientFactory;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.pipes.fetcher.FetcherManager;
 
-public class HttpFetcherTest extends TikaTest {
-
+class HttpFetcherTest extends TikaTest {
     private static final String TEST_URL = "wontbecalled";
     private static final String CONTENT = "request content";
 
@@ -62,6 +69,7 @@ public class HttpFetcherTest extends TikaTest {
 
     @BeforeEach
     public void before() throws Exception {
+        httpFetcher = new HttpFetcher();
         final HttpResponse mockResponse = buildMockResponse(HttpStatus.SC_OK,
                 IOUtils.toInputStream(CONTENT, Charset.defaultCharset()));
 
@@ -98,6 +106,42 @@ public class HttpFetcherTest extends TikaTest {
         assertEquals(TEST_URL, meta.get("http-connection:target-url"));
     }
 
+    @Test
+    public void testHttpRequestHeaders() throws Exception {
+        HttpClient httpClient = Mockito.mock(HttpClient.class);
+        httpFetcher.setHttpClient(httpClient);
+        CloseableHttpResponse response = mock(CloseableHttpResponse.class);
+        ArgumentCaptor<HttpGet> httpGetArgumentCaptor = 
ArgumentCaptor.forClass(HttpGet.class);
+
+        when(httpClient.execute(httpGetArgumentCaptor.capture(), 
any(HttpContext.class)))
+                .thenReturn(response);
+        when(response.getStatusLine()).thenReturn(new StatusLine() {
+            @Override
+            public ProtocolVersion getProtocolVersion() {
+                return new HttpGet("http://localhost";).getProtocolVersion();
+            }
+
+            @Override
+            public int getStatusCode() {
+                return 200;
+            }
+
+            @Override
+            public String getReasonPhrase() {
+                return null;
+            }
+        });
+
+        when(response.getEntity()).thenReturn(new StringEntity("Hi"));
+
+        Metadata metadata = new Metadata();
+        metadata.set(Property.externalText("httpRequestHeaders"), new String[] 
{"nick1=val1", "nick2=val2"});
+        httpFetcher.fetch("http://localhost";, metadata);
+        HttpGet httpGet = httpGetArgumentCaptor.getValue();
+        Assertions.assertEquals("val1", 
httpGet.getHeaders("nick1")[0].getValue());
+        Assertions.assertEquals("val2", 
httpGet.getHeaders("nick2")[0].getValue());
+    }
+
     @Test
     @Disabled("requires network connectivity")
     public void testRedirect() throws Exception {

Reply via email to