This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4252-fetch-tuple-missing-metadata-4
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to 
refs/heads/TIKA-4252-fetch-tuple-missing-metadata-4 by this push:
     new e1282bfa6 TIKA-4252: add some setters, hash, equals, etc
e1282bfa6 is described below

commit e1282bfa68163bce7e770d1b50ff9469ffad8d86
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Wed May 22 22:46:57 2024 -0500

    TIKA-4252: add some setters, hash, equals, etc
---
 .../java/org/apache/tika/pipes/PipesServer.java    |  6 +-
 .../apache/tika/pipes/fetcher/EmptyFetcher.java    |  2 +-
 .../org/apache/tika/pipes/fetcher/Fetcher.java     |  6 +-
 .../apache/tika/pipes/fetcher/RangeFetcher.java    |  7 +-
 .../tika/pipes/fetcher/fs/FileSystemFetcher.java   |  8 +--
 .../apache/tika/pipes/fetcher/url/UrlFetcher.java  |  4 +-
 .../org/apache/tika/pipes/async/MockFetcher.java   |  2 +-
 .../org/apache/tika/pipes/fetcher/MockFetcher.java |  2 +-
 .../tika/pipes/fetcher/azblob/AZBlobFetcher.java   |  6 +-
 .../apache/tika/pipes/fetcher/gcs/GCSFetcher.java  |  6 +-
 .../tika/pipes/fetcher/http/HttpFetcher.java       | 33 +++++++--
 .../tika/pipes/fetcher/http/HttpFetcherTest.java   | 78 +++++++++++++++++-----
 .../apache/tika/pipes/fetcher/s3/S3Fetcher.java    |  8 +--
 .../tika/server/core/FetcherStreamFactory.java     |  6 +-
 14 files changed, 125 insertions(+), 49 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java 
b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
index 59f870781..24287b513 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
@@ -462,10 +462,10 @@ public class PipesServer implements Runnable {
                 throw new IllegalArgumentException(
                         "fetch key has a range, but the fetcher is not a range 
fetcher");
             }
-            Metadata metadata = new Metadata();
+            Metadata responseMetadata = new Metadata();
             try (InputStream stream = ((RangeFetcher) 
fetcher).fetch(fetchKey.getFetchKey(),
-                    fetchKey.getRangeStart(), fetchKey.getRangeEnd(), 
metadata)) {
-                return parseWithStream(t, stream, metadata);
+                    fetchKey.getRangeStart(), fetchKey.getRangeEnd(), 
responseMetadata, t.getFetchRequestMetadata())) {
+                return parseWithStream(t, stream, responseMetadata);
             } catch (SecurityException e) {
                 LOG.error("security exception " + t.getId(), e);
                 throw e;
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
index 022d00a8c..0439e4ab6 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
@@ -30,7 +30,7 @@ public class EmptyFetcher implements Fetcher {
     }
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException {
         return null;
     }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
index 1b3fa2a24..a4cd8839b 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
@@ -33,5 +33,9 @@ public interface Fetcher {
 
     String getName();
 
-    InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException;
+    default InputStream fetch(String fetchKey, Metadata userMetadata) throws 
TikaException, IOException {
+        return fetch(fetchKey, userMetadata, new Metadata());
+    }
+
+    InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException;
 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
index 0a3ceae7f..e1aa39bce 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
@@ -28,7 +28,12 @@ import org.apache.tika.metadata.Metadata;
 public interface RangeFetcher extends Fetcher {
     //At some point, Tika 3.x?, we may want to add optional ranges to the 
fetchKey?
 
-    InputStream fetch(String fetchKey, long startOffset, long endOffset, 
Metadata metadata)
+    default InputStream fetch(String fetchKey, long startOffset, long 
endOffset, Metadata userMetadata)
+            throws TikaException, IOException {
+        return fetch(fetchKey, startOffset, endOffset, userMetadata, new 
Metadata());
+    }
+
+    InputStream fetch(String fetchKey, long startOffset, long endOffset, 
Metadata userMetadata, Metadata fetchRequestMetadata)
             throws TikaException, IOException;
 
 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
index d926e3ca6..5255089f6 100644
--- 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
+++ 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
@@ -58,7 +58,7 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
     }
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
IOException, TikaException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws IOException, TikaException {
 
         if (fetchKey.contains("\u0000")) {
             throw new IllegalArgumentException("Path must not contain \u0000. 
" +
@@ -76,8 +76,8 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
             p = Paths.get(fetchKey);
         }
 
-        metadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey);
-        updateFileSystemMetadata(p, metadata);
+        userMetadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey);
+        updateFileSystemMetadata(p, userMetadata);
 
         if (!Files.isRegularFile(p)) {
             if (basePath != null && !Files.isDirectory(basePath)) {
@@ -87,7 +87,7 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
             }
         }
 
-        return TikaInputStream.get(p, metadata);
+        return TikaInputStream.get(p, userMetadata);
     }
 
     private void updateFileSystemMetadata(Path p, Metadata metadata) throws 
IOException {
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
index f415a3560..0aaf74eb2 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
@@ -35,7 +35,7 @@ import org.apache.tika.pipes.fetcher.AbstractFetcher;
 public class UrlFetcher extends AbstractFetcher {
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
IOException, TikaException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws IOException, TikaException {
         if (fetchKey.contains("\u0000")) {
             throw new IllegalArgumentException("URL must not contain \u0000. " 
+
                     "Please review the life decisions that led you to 
requesting " +
@@ -46,7 +46,7 @@ public class UrlFetcher extends AbstractFetcher {
                     "The UrlFetcher does not fetch from file shares; " +
                     "please use the FileSystemFetcher");
         }
-        return TikaInputStream.get(new URL(fetchKey), metadata);
+        return TikaInputStream.get(new URL(fetchKey), userMetadata);
     }
 
 }
diff --git 
a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java 
b/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
index 10af275e3..636d6d9f8 100644
--- a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
+++ b/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
@@ -37,7 +37,7 @@ public class MockFetcher implements Fetcher {
     }
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException {
         return new ByteArrayInputStream(BYTES);
     }
 }
diff --git 
a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java 
b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
index 060432724..30c6c59e0 100644
--- a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
+++ b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
@@ -64,7 +64,7 @@ public class MockFetcher extends AbstractFetcher implements 
Initializable {
 
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException {
         return byteString == null ? new ByteArrayInputStream(new byte[0]) :
                 new 
ByteArrayInputStream(byteString.getBytes(StandardCharsets.UTF_8));
     }
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
index dee903040..49dfe06ed 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
@@ -70,7 +70,7 @@ public class AZBlobFetcher extends AbstractFetcher implements 
Initializable {
     private boolean spoolToTemp = true;
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException {
 
         LOGGER.debug("about to fetch fetchkey={} from endpoint ({})", 
fetchKey, endpoint);
 
@@ -81,7 +81,7 @@ public class AZBlobFetcher extends AbstractFetcher implements 
Initializable {
                 BlobProperties properties = blobClient.getProperties();
                 if (properties.getMetadata() != null) {
                     for (Map.Entry<String, String> e : 
properties.getMetadata().entrySet()) {
-                        metadata.add(PREFIX + ":" + e.getKey(), e.getValue());
+                        userMetadata.add(PREFIX + ":" + e.getKey(), 
e.getValue());
                     }
                 }
             }
@@ -94,7 +94,7 @@ public class AZBlobFetcher extends AbstractFetcher implements 
Initializable {
                 try (OutputStream os = Files.newOutputStream(tmp)) {
                     blobClient.download(os);
                 }
-                TikaInputStream tis = TikaInputStream.get(tmp, metadata, 
tmpResources);
+                TikaInputStream tis = TikaInputStream.get(tmp, userMetadata, 
tmpResources);
                 long elapsed = System.currentTimeMillis() - start;
                 LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
                 return tis;
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
index 6881c5a66..79175bb79 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
@@ -55,7 +55,7 @@ public class GCSFetcher extends AbstractFetcher implements 
Initializable {
     private boolean spoolToTemp = true;
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException {
 
         LOGGER.debug("about to fetch fetchkey={} from bucket ({})", fetchKey, 
bucket);
 
@@ -65,7 +65,7 @@ public class GCSFetcher extends AbstractFetcher implements 
Initializable {
             if (extractUserMetadata) {
                 if (blob.getMetadata() != null) {
                     for (Map.Entry<String, String> e : 
blob.getMetadata().entrySet()) {
-                        metadata.add(PREFIX + ":" + e.getKey(), e.getValue());
+                        userMetadata.add(PREFIX + ":" + e.getKey(), 
e.getValue());
                     }
                 }
             }
@@ -76,7 +76,7 @@ public class GCSFetcher extends AbstractFetcher implements 
Initializable {
                 TemporaryResources tmpResources = new TemporaryResources();
                 Path tmp = tmpResources.createTempFile();
                 blob.downloadTo(tmp);
-                TikaInputStream tis = TikaInputStream.get(tmp, metadata, 
tmpResources);
+                TikaInputStream tis = TikaInputStream.get(tmp, userMetadata, 
tmpResources);
                 long elapsed = System.currentTimeMillis() - start;
                 LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
                 return tis;
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index 26b45f8bf..d6bc472ae 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -60,7 +60,6 @@ import org.apache.tika.config.Initializable;
 import org.apache.tika.config.InitializableProblemHandler;
 import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
 import org.apache.tika.exception.TikaTimeoutException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
@@ -134,30 +133,47 @@ public class HttpFetcher extends AbstractFetcher 
implements Initializable, Range
     //By default httpclient adds e.g. "Apache-HttpClient/4.5.13 (Java/x.y.z)"
     private String userAgent = null;
 
-
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
IOException, TikaException {
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws IOException {
         HttpGet get = new HttpGet(fetchKey);
         RequestConfig requestConfig =
                 RequestConfig.custom()
                         .setMaxRedirects(maxRedirects)
                         .setRedirectsEnabled(true).build();
         get.setConfig(requestConfig);
-        if (! StringUtils.isBlank(userAgent)) {
+        setHttpRequestHeaders(fetchRequestMetadata, get);
+
+        return execute(get, userMetadata, httpClient, true);
+    }
+
+    private void setHttpRequestHeaders(Metadata fetchRequestMetadata, HttpGet 
get) {
+        if (!StringUtils.isBlank(userAgent)) {
             get.setHeader(USER_AGENT, userAgent);
         }
-        return execute(get, metadata, httpClient, true);
+        // additional http request headers can be sent in here.
+        String[] httpRequestHeaders = 
fetchRequestMetadata.getValues("httpRequestHeaders");
+        if (httpRequestHeaders != null) {
+            for (String httpRequestHeader : httpRequestHeaders) {
+                int idxOfEquals = httpRequestHeader.indexOf(':');
+                if (idxOfEquals == -1) {
+                    continue;
+                }
+                String headerKey = httpRequestHeader.substring(0, 
idxOfEquals).trim();
+                String headerValue = httpRequestHeader.substring(idxOfEquals + 
1).trim();
+                get.setHeader(headerKey, headerValue);
+            }
+        }
     }
 
     @Override
-    public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata metadata)
+    public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata userMetadata, Metadata fetchRequestMetadata)
             throws IOException {
         HttpGet get = new HttpGet(fetchKey);
         if (! StringUtils.isBlank(userAgent)) {
             get.setHeader(USER_AGENT, userAgent);
         }
         get.setHeader("Range", "bytes=" + startRange + "-" + endRange);
-        return execute(get, metadata, httpClient, true);
+        return execute(get, userMetadata, httpClient, true);
     }
 
     private InputStream execute(HttpGet get, Metadata metadata, HttpClient 
client,
@@ -455,4 +471,7 @@ public class HttpFetcher extends AbstractFetcher implements 
Initializable, Range
         this.httpClientFactory = httpClientFactory;
     }
 
+    void setHttpClient(HttpClient httpClient) {
+        this.httpClient = httpClient;
+    }
 }
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
index e26e6cfcb..9bec52925 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
@@ -37,19 +37,27 @@ import org.apache.commons.io.IOUtils;
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
 import org.apache.http.HttpStatus;
+import org.apache.http.ProtocolVersion;
 import org.apache.http.StatusLine;
 import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.entity.StringEntity;
 import org.apache.http.protocol.HttpContext;
+import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.client.HttpClientFactory;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.pipes.fetcher.FetcherManager;
 
@@ -70,18 +78,19 @@ public class HttpFetcherTest extends TikaTest {
 
     @Test
     public void test2xxResponse() throws TikaException, IOException {
-        final Metadata meta = new Metadata();
-        meta.set(TikaCoreProperties.RESOURCE_NAME_KEY, "fileName");
+        Metadata userMetadata = new Metadata();
+        Metadata fetchRequestMetadata = new Metadata();
+        userMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "fileName");
 
-        try (final InputStream ignored = httpFetcher.fetch(TEST_URL, meta)) {
+        try (final InputStream ignored = httpFetcher.fetch(TEST_URL, 
userMetadata, fetchRequestMetadata)) {
             // HTTP headers added into meta
-            assertEquals("200", meta.get("http-header:status-code"));
-            assertEquals(TEST_URL, meta.get("http-connection:target-url"));
+            assertEquals("200", userMetadata.get("http-header:status-code"));
+            assertEquals(TEST_URL, 
userMetadata.get("http-connection:target-url"));
             // Content size included in meta
-            assertEquals("15", meta.get("Content-Length"));
+            assertEquals("15", userMetadata.get("Content-Length"));
 
             // Filename passed in should be preserved
-            assertEquals("fileName", 
meta.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+            assertEquals("fileName", 
userMetadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
         }
     }
 
@@ -90,12 +99,13 @@ public class HttpFetcherTest extends TikaTest {
         // Setup client to respond with 403
         mockClientResponse(buildMockResponse(HttpStatus.SC_FORBIDDEN, null));
 
-        final Metadata meta = new Metadata();
-        assertThrows(IOException.class, () -> httpFetcher.fetch(TEST_URL, 
meta));
+        Metadata userMetadata = new Metadata();
+        Metadata fetchRequestMetadata = new Metadata();
+        assertThrows(IOException.class, () -> httpFetcher.fetch(TEST_URL, 
userMetadata, fetchRequestMetadata));
 
         // Meta still populated
-        assertEquals("403", meta.get("http-header:status-code"));
-        assertEquals(TEST_URL, meta.get("http-connection:target-url"));
+        assertEquals("403", userMetadata.get("http-header:status-code"));
+        assertEquals(TEST_URL, userMetadata.get("http-connection:target-url"));
     }
 
     @Test
@@ -104,9 +114,10 @@ public class HttpFetcherTest extends TikaTest {
         String url = "https://t.co/cvfkWAEIxw?amp=1";;
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         Metadata metadata = new Metadata();
+        Metadata fetchRequestMetadata = new Metadata();
         HttpFetcher httpFetcher =
                 (HttpFetcher) 
getFetcherManager("tika-config-http.xml").getFetcher("http");
-        try (InputStream is = httpFetcher.fetch(url, metadata)) {
+        try (InputStream is = httpFetcher.fetch(url, metadata, 
fetchRequestMetadata)) {
             IOUtils.copy(is, bos);
         }
         //debug(metadata);
@@ -119,18 +130,55 @@ public class HttpFetcherTest extends TikaTest {
                 
"https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2020-45/segments/1603107869785.9/warc/CC-MAIN-20201020021700-20201020051700-00529.warc.gz";;
         long start = 969596307;
         long end = start + 1408 - 1;
-        Metadata metadata = new Metadata();
+        Metadata responseMetadata = new Metadata();
         HttpFetcher httpFetcher =
                 (HttpFetcher) 
getFetcherManager("tika-config-http.xml").getFetcher("http");
         try (TemporaryResources tmp = new TemporaryResources()) {
-            Path tmpPath = tmp.createTempFile(metadata);
-            try (InputStream is = httpFetcher.fetch(url, start, end, 
metadata)) {
+            Path tmpPath = tmp.createTempFile(responseMetadata);
+            try (InputStream is = httpFetcher.fetch(url, start, end, 
responseMetadata, new Metadata())) {
                 Files.copy(new GZIPInputStream(is), tmpPath, 
StandardCopyOption.REPLACE_EXISTING);
             }
             assertEquals(2461, Files.size(tmpPath));
         }
     }
 
+    @Test
+    public void testHttpRequestHeaders() throws Exception {
+        HttpClient httpClient = Mockito.mock(HttpClient.class);
+        CloseableHttpResponse response = mock(CloseableHttpResponse.class);
+        ArgumentCaptor<HttpGet> httpGetArgumentCaptor = 
ArgumentCaptor.forClass(HttpGet.class);
+
+        when(response.getStatusLine()).thenReturn(new StatusLine() {
+            @Override
+            public ProtocolVersion getProtocolVersion() {
+                return new HttpGet("http://localhost";).getProtocolVersion();
+            }
+
+            @Override
+            public int getStatusCode() {
+                return 200;
+            }
+
+            @Override
+            public String getReasonPhrase() {
+                return null;
+            }
+        });
+
+        when(httpClient.execute(httpGetArgumentCaptor.capture(), 
any(HttpContext.class)))
+                .thenReturn(response);
+        when(response.getEntity()).thenReturn(new StringEntity("Hi"));
+
+        Metadata userMetadata = new Metadata();
+        userMetadata.set(Property.externalText("customPropName"), 
"customPropVal");
+        Metadata fetchRequestMetadata = new Metadata();
+        fetchRequestMetadata.set(Property.externalText("httpRequestHeaders"), 
new String[] {"nick1: val1", "nick2: val2"});
+        httpFetcher.setHttpClient(httpClient);
+        httpFetcher.fetch("http://localhost";, userMetadata, 
fetchRequestMetadata);
+        HttpGet httpGet = httpGetArgumentCaptor.getValue();
+        Assertions.assertEquals("val1", 
httpGet.getHeaders("nick1")[0].getValue());
+        Assertions.assertEquals("val2", 
httpGet.getHeaders("nick2")[0].getValue());
+    }
 
     FetcherManager getFetcherManager(String path) throws Exception {
         return FetcherManager.load(
diff --git 
a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
 
b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
index b57c361b9..35c0faacf 100644
--- 
a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
+++ 
b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
@@ -106,12 +106,12 @@ public class S3Fetcher extends AbstractFetcher implements 
Initializable, RangeFe
     private boolean pathStyleAccessEnabled = false;
 
     @Override
-    public InputStream fetch(String fetchKey, Metadata metadata) throws 
TikaException, IOException {
-        return fetch(fetchKey, -1, -1, metadata);
+    public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata 
fetchRequestMetadata) throws TikaException, IOException {
+        return fetch(fetchKey, -1, -1, userMetadata, fetchRequestMetadata);
     }
 
     @Override
-    public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata metadata)
+    public InputStream fetch(String fetchKey, long startRange, long endRange, 
Metadata userMetadata, Metadata fetchRequestMetadata)
             throws TikaException, IOException {
         String theFetchKey = StringUtils.isBlank(prefix) ? fetchKey : prefix + 
fetchKey;
 
@@ -129,7 +129,7 @@ public class S3Fetcher extends AbstractFetcher implements 
Initializable, RangeFe
         do {
             try {
                 long start = System.currentTimeMillis();
-                InputStream is = _fetch(theFetchKey, metadata, startRange, 
endRange);
+                InputStream is = _fetch(theFetchKey, userMetadata, startRange, 
endRange);
                 long elapsed = System.currentTimeMillis() - start;
                 LOGGER.debug("total to fetch {}", elapsed);
                 return is;
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
index f173808d6..6b57467b4 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
@@ -63,7 +63,7 @@ public class FetcherStreamFactory implements 
InputStreamFactory {
     }
 
     @Override
-    public InputStream getInputStream(InputStream is, Metadata metadata, 
HttpHeaders httpHeaders,
+    public InputStream getInputStream(InputStream is, Metadata 
responseMetadata, HttpHeaders httpHeaders,
                                       UriInfo uriInfo) throws IOException {
         MultivaluedMap params = (uriInfo == null) ? null : 
uriInfo.getQueryParameters();
         String fetcherName = getParam("fetcherName", httpHeaders, params);
@@ -100,9 +100,9 @@ public class FetcherStreamFactory implements 
InputStreamFactory {
                                         "that doesn't support range 
fetching?!");
                     }
                     return ((RangeFetcher) fetcher).fetch(fetchKey, 
fetchRangeStart, fetchRangeEnd,
-                            metadata);
+                            responseMetadata, new Metadata());
                 } else {
-                    return fetcher.fetch(fetchKey, metadata);
+                    return fetcher.fetch(fetchKey, responseMetadata);
                 }
             } catch (TikaException e) {
                 throw new IOException(e);

Reply via email to