This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4247-http-request-headers in repository https://gitbox.apache.org/repos/asf/tika.git
commit 7c2fb07a11bbbbd11ce6b43d27f1be79e63a0790 Author: Nicholas DiPiazza <ndipia...@apache.org> AuthorDate: Mon Apr 29 13:01:36 2024 -0500 TIKA-4247 HttpFetcher - add ability to send request headers set headers in a metadata value for "httpRequestHeaders" those will be sent along with http request. --- .../tika/pipes/fetcher/http/HttpFetcher.java | 21 +++++++++- .../tika/pipes/fetcher/http/HttpFetcherTest.java | 48 +++++++++++++++++++++- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java index 26b45f8bf..a8bea6f1b 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java @@ -143,10 +143,24 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range .setMaxRedirects(maxRedirects) .setRedirectsEnabled(true).build(); get.setConfig(requestConfig); - if (! StringUtils.isBlank(userAgent)) { + setHttpRequestHeaders(metadata, get); + return execute(get, metadata, httpClient, true); + } + + private void setHttpRequestHeaders(Metadata metadata, HttpGet get) { + if (!StringUtils.isBlank(userAgent)) { get.setHeader(USER_AGENT, userAgent); } - return execute(get, metadata, httpClient, true); + // additional http request headers can be sent in here. + String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders"); + if (httpRequestHeaders != null) { + for (String httpRequestHeader : httpRequestHeaders) { + int idxOfEquals = httpRequestHeader.indexOf('='); + String headerKey = httpRequestHeader.substring(0, idxOfEquals); + String headerValue = httpRequestHeader.substring(idxOfEquals + 1); + get.setHeader(headerKey, headerValue); + } + } } @Override @@ -455,4 +469,7 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range this.httpClientFactory = httpClientFactory; } + void setHttpClient(HttpClient httpClient) { + this.httpClient = httpClient; + } } diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java index e26e6cfcb..970ec58c2 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java @@ -37,24 +37,31 @@ import org.apache.commons.io.IOUtils; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; +import org.apache.http.ProtocolVersion; import org.apache.http.StatusLine; import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.entity.StringEntity; import org.apache.http.protocol.HttpContext; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; import org.apache.tika.TikaTest; import org.apache.tika.client.HttpClientFactory; import org.apache.tika.exception.TikaException; import org.apache.tika.io.TemporaryResources; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.pipes.fetcher.FetcherManager; -public class HttpFetcherTest extends TikaTest { - +class HttpFetcherTest extends TikaTest { private static final String TEST_URL = "wontbecalled"; private static final String CONTENT = "request content"; @@ -62,6 +69,7 @@ public class HttpFetcherTest extends TikaTest { @BeforeEach public void before() throws Exception { + httpFetcher = new HttpFetcher(); final HttpResponse mockResponse = buildMockResponse(HttpStatus.SC_OK, IOUtils.toInputStream(CONTENT, Charset.defaultCharset())); @@ -98,6 +106,42 @@ public class HttpFetcherTest extends TikaTest { assertEquals(TEST_URL, meta.get("http-connection:target-url")); } + @Test + public void testHttpRequestHeaders() throws Exception { + HttpClient httpClient = Mockito.mock(HttpClient.class); + httpFetcher.setHttpClient(httpClient); + CloseableHttpResponse response = mock(CloseableHttpResponse.class); + ArgumentCaptor<HttpGet> httpGetArgumentCaptor = ArgumentCaptor.forClass(HttpGet.class); + + when(httpClient.execute(httpGetArgumentCaptor.capture(), any(HttpContext.class))) + .thenReturn(response); + when(response.getStatusLine()).thenReturn(new StatusLine() { + @Override + public ProtocolVersion getProtocolVersion() { + return new HttpGet("http://localhost").getProtocolVersion(); + } + + @Override + public int getStatusCode() { + return 200; + } + + @Override + public String getReasonPhrase() { + return null; + } + }); + + when(response.getEntity()).thenReturn(new StringEntity("Hi")); + + Metadata metadata = new Metadata(); + metadata.set(Property.externalText("httpRequestHeaders"), new String[] {"nick1=val1", "nick2=val2"}); + httpFetcher.fetch("http://localhost", metadata); + HttpGet httpGet = httpGetArgumentCaptor.getValue(); + Assertions.assertEquals("val1", httpGet.getHeaders("nick1")[0].getValue()); + Assertions.assertEquals("val2", httpGet.getHeaders("nick2")[0].getValue()); + } + @Test @Disabled("requires network connectivity") public void testRedirect() throws Exception {