This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch TIKA-4252-fetch-tuple-missing-metadata-4
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to
refs/heads/TIKA-4252-fetch-tuple-missing-metadata-4 by this push:
new e1282bfa6 TIKA-4252: add some setters, hash, equals, etc
e1282bfa6 is described below
commit e1282bfa68163bce7e770d1b50ff9469ffad8d86
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Wed May 22 22:46:57 2024 -0500
TIKA-4252: add some setters, hash, equals, etc
---
.../java/org/apache/tika/pipes/PipesServer.java | 6 +-
.../apache/tika/pipes/fetcher/EmptyFetcher.java | 2 +-
.../org/apache/tika/pipes/fetcher/Fetcher.java | 6 +-
.../apache/tika/pipes/fetcher/RangeFetcher.java | 7 +-
.../tika/pipes/fetcher/fs/FileSystemFetcher.java | 8 +--
.../apache/tika/pipes/fetcher/url/UrlFetcher.java | 4 +-
.../org/apache/tika/pipes/async/MockFetcher.java | 2 +-
.../org/apache/tika/pipes/fetcher/MockFetcher.java | 2 +-
.../tika/pipes/fetcher/azblob/AZBlobFetcher.java | 6 +-
.../apache/tika/pipes/fetcher/gcs/GCSFetcher.java | 6 +-
.../tika/pipes/fetcher/http/HttpFetcher.java | 33 +++++++--
.../tika/pipes/fetcher/http/HttpFetcherTest.java | 78 +++++++++++++++++-----
.../apache/tika/pipes/fetcher/s3/S3Fetcher.java | 8 +--
.../tika/server/core/FetcherStreamFactory.java | 6 +-
14 files changed, 125 insertions(+), 49 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
index 59f870781..24287b513 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
@@ -462,10 +462,10 @@ public class PipesServer implements Runnable {
throw new IllegalArgumentException(
"fetch key has a range, but the fetcher is not a range
fetcher");
}
- Metadata metadata = new Metadata();
+ Metadata responseMetadata = new Metadata();
try (InputStream stream = ((RangeFetcher)
fetcher).fetch(fetchKey.getFetchKey(),
- fetchKey.getRangeStart(), fetchKey.getRangeEnd(),
metadata)) {
- return parseWithStream(t, stream, metadata);
+ fetchKey.getRangeStart(), fetchKey.getRangeEnd(),
responseMetadata, t.getFetchRequestMetadata())) {
+ return parseWithStream(t, stream, responseMetadata);
} catch (SecurityException e) {
LOG.error("security exception " + t.getId(), e);
throw e;
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
index 022d00a8c..0439e4ab6 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
@@ -30,7 +30,7 @@ public class EmptyFetcher implements Fetcher {
}
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException {
return null;
}
}
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
index 1b3fa2a24..a4cd8839b 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
@@ -33,5 +33,9 @@ public interface Fetcher {
String getName();
- InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException;
+ default InputStream fetch(String fetchKey, Metadata userMetadata) throws
TikaException, IOException {
+ return fetch(fetchKey, userMetadata, new Metadata());
+ }
+
+ InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException;
}
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
index 0a3ceae7f..e1aa39bce 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java
@@ -28,7 +28,12 @@ import org.apache.tika.metadata.Metadata;
public interface RangeFetcher extends Fetcher {
//At some point, Tika 3.x?, we may want to add optional ranges to the
fetchKey?
- InputStream fetch(String fetchKey, long startOffset, long endOffset,
Metadata metadata)
+ default InputStream fetch(String fetchKey, long startOffset, long
endOffset, Metadata userMetadata)
+ throws TikaException, IOException {
+ return fetch(fetchKey, startOffset, endOffset, userMetadata, new
Metadata());
+ }
+
+ InputStream fetch(String fetchKey, long startOffset, long endOffset,
Metadata userMetadata, Metadata fetchRequestMetadata)
throws TikaException, IOException;
}
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
index d926e3ca6..5255089f6 100644
---
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
+++
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
@@ -58,7 +58,7 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
}
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
IOException, TikaException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws IOException, TikaException {
if (fetchKey.contains("\u0000")) {
throw new IllegalArgumentException("Path must not contain \u0000.
" +
@@ -76,8 +76,8 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
p = Paths.get(fetchKey);
}
- metadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey);
- updateFileSystemMetadata(p, metadata);
+ userMetadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey);
+ updateFileSystemMetadata(p, userMetadata);
if (!Files.isRegularFile(p)) {
if (basePath != null && !Files.isDirectory(basePath)) {
@@ -87,7 +87,7 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
}
}
- return TikaInputStream.get(p, metadata);
+ return TikaInputStream.get(p, userMetadata);
}
private void updateFileSystemMetadata(Path p, Metadata metadata) throws
IOException {
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
index f415a3560..0aaf74eb2 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java
@@ -35,7 +35,7 @@ import org.apache.tika.pipes.fetcher.AbstractFetcher;
public class UrlFetcher extends AbstractFetcher {
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
IOException, TikaException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws IOException, TikaException {
if (fetchKey.contains("\u0000")) {
throw new IllegalArgumentException("URL must not contain \u0000. "
+
"Please review the life decisions that led you to
requesting " +
@@ -46,7 +46,7 @@ public class UrlFetcher extends AbstractFetcher {
"The UrlFetcher does not fetch from file shares; " +
"please use the FileSystemFetcher");
}
- return TikaInputStream.get(new URL(fetchKey), metadata);
+ return TikaInputStream.get(new URL(fetchKey), userMetadata);
}
}
diff --git
a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
b/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
index 10af275e3..636d6d9f8 100644
--- a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
+++ b/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java
@@ -37,7 +37,7 @@ public class MockFetcher implements Fetcher {
}
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException {
return new ByteArrayInputStream(BYTES);
}
}
diff --git
a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
index 060432724..30c6c59e0 100644
--- a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
+++ b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java
@@ -64,7 +64,7 @@ public class MockFetcher extends AbstractFetcher implements
Initializable {
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException {
return byteString == null ? new ByteArrayInputStream(new byte[0]) :
new
ByteArrayInputStream(byteString.getBytes(StandardCharsets.UTF_8));
}
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
index dee903040..49dfe06ed 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java
@@ -70,7 +70,7 @@ public class AZBlobFetcher extends AbstractFetcher implements
Initializable {
private boolean spoolToTemp = true;
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException {
LOGGER.debug("about to fetch fetchkey={} from endpoint ({})",
fetchKey, endpoint);
@@ -81,7 +81,7 @@ public class AZBlobFetcher extends AbstractFetcher implements
Initializable {
BlobProperties properties = blobClient.getProperties();
if (properties.getMetadata() != null) {
for (Map.Entry<String, String> e :
properties.getMetadata().entrySet()) {
- metadata.add(PREFIX + ":" + e.getKey(), e.getValue());
+ userMetadata.add(PREFIX + ":" + e.getKey(),
e.getValue());
}
}
}
@@ -94,7 +94,7 @@ public class AZBlobFetcher extends AbstractFetcher implements
Initializable {
try (OutputStream os = Files.newOutputStream(tmp)) {
blobClient.download(os);
}
- TikaInputStream tis = TikaInputStream.get(tmp, metadata,
tmpResources);
+ TikaInputStream tis = TikaInputStream.get(tmp, userMetadata,
tmpResources);
long elapsed = System.currentTimeMillis() - start;
LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
return tis;
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
index 6881c5a66..79175bb79 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
@@ -55,7 +55,7 @@ public class GCSFetcher extends AbstractFetcher implements
Initializable {
private boolean spoolToTemp = true;
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException {
LOGGER.debug("about to fetch fetchkey={} from bucket ({})", fetchKey,
bucket);
@@ -65,7 +65,7 @@ public class GCSFetcher extends AbstractFetcher implements
Initializable {
if (extractUserMetadata) {
if (blob.getMetadata() != null) {
for (Map.Entry<String, String> e :
blob.getMetadata().entrySet()) {
- metadata.add(PREFIX + ":" + e.getKey(), e.getValue());
+ userMetadata.add(PREFIX + ":" + e.getKey(),
e.getValue());
}
}
}
@@ -76,7 +76,7 @@ public class GCSFetcher extends AbstractFetcher implements
Initializable {
TemporaryResources tmpResources = new TemporaryResources();
Path tmp = tmpResources.createTempFile();
blob.downloadTo(tmp);
- TikaInputStream tis = TikaInputStream.get(tmp, metadata,
tmpResources);
+ TikaInputStream tis = TikaInputStream.get(tmp, userMetadata,
tmpResources);
long elapsed = System.currentTimeMillis() - start;
LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
return tis;
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index 26b45f8bf..d6bc472ae 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -60,7 +60,6 @@ import org.apache.tika.config.Initializable;
import org.apache.tika.config.InitializableProblemHandler;
import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaTimeoutException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
@@ -134,30 +133,47 @@ public class HttpFetcher extends AbstractFetcher
implements Initializable, Range
//By default httpclient adds e.g. "Apache-HttpClient/4.5.13 (Java/x.y.z)"
private String userAgent = null;
-
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
IOException, TikaException {
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws IOException {
HttpGet get = new HttpGet(fetchKey);
RequestConfig requestConfig =
RequestConfig.custom()
.setMaxRedirects(maxRedirects)
.setRedirectsEnabled(true).build();
get.setConfig(requestConfig);
- if (! StringUtils.isBlank(userAgent)) {
+ setHttpRequestHeaders(fetchRequestMetadata, get);
+
+ return execute(get, userMetadata, httpClient, true);
+ }
+
+ private void setHttpRequestHeaders(Metadata fetchRequestMetadata, HttpGet
get) {
+ if (!StringUtils.isBlank(userAgent)) {
get.setHeader(USER_AGENT, userAgent);
}
- return execute(get, metadata, httpClient, true);
+ // additional http request headers can be sent in here.
+ String[] httpRequestHeaders =
fetchRequestMetadata.getValues("httpRequestHeaders");
+ if (httpRequestHeaders != null) {
+ for (String httpRequestHeader : httpRequestHeaders) {
+ int idxOfEquals = httpRequestHeader.indexOf(':');
+ if (idxOfEquals == -1) {
+ continue;
+ }
+ String headerKey = httpRequestHeader.substring(0,
idxOfEquals).trim();
+ String headerValue = httpRequestHeader.substring(idxOfEquals +
1).trim();
+ get.setHeader(headerKey, headerValue);
+ }
+ }
}
@Override
- public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata metadata)
+ public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata userMetadata, Metadata fetchRequestMetadata)
throws IOException {
HttpGet get = new HttpGet(fetchKey);
if (! StringUtils.isBlank(userAgent)) {
get.setHeader(USER_AGENT, userAgent);
}
get.setHeader("Range", "bytes=" + startRange + "-" + endRange);
- return execute(get, metadata, httpClient, true);
+ return execute(get, userMetadata, httpClient, true);
}
private InputStream execute(HttpGet get, Metadata metadata, HttpClient
client,
@@ -455,4 +471,7 @@ public class HttpFetcher extends AbstractFetcher implements
Initializable, Range
this.httpClientFactory = httpClientFactory;
}
+ void setHttpClient(HttpClient httpClient) {
+ this.httpClient = httpClient;
+ }
}
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
index e26e6cfcb..9bec52925 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
@@ -37,19 +37,27 @@ import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
+import org.apache.http.ProtocolVersion;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.entity.StringEntity;
import org.apache.http.protocol.HttpContext;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
import org.apache.tika.TikaTest;
import org.apache.tika.client.HttpClientFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.pipes.fetcher.FetcherManager;
@@ -70,18 +78,19 @@ public class HttpFetcherTest extends TikaTest {
@Test
public void test2xxResponse() throws TikaException, IOException {
- final Metadata meta = new Metadata();
- meta.set(TikaCoreProperties.RESOURCE_NAME_KEY, "fileName");
+ Metadata userMetadata = new Metadata();
+ Metadata fetchRequestMetadata = new Metadata();
+ userMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "fileName");
- try (final InputStream ignored = httpFetcher.fetch(TEST_URL, meta)) {
+ try (final InputStream ignored = httpFetcher.fetch(TEST_URL,
userMetadata, fetchRequestMetadata)) {
// HTTP headers added into meta
- assertEquals("200", meta.get("http-header:status-code"));
- assertEquals(TEST_URL, meta.get("http-connection:target-url"));
+ assertEquals("200", userMetadata.get("http-header:status-code"));
+ assertEquals(TEST_URL,
userMetadata.get("http-connection:target-url"));
// Content size included in meta
- assertEquals("15", meta.get("Content-Length"));
+ assertEquals("15", userMetadata.get("Content-Length"));
// Filename passed in should be preserved
- assertEquals("fileName",
meta.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ assertEquals("fileName",
userMetadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
}
}
@@ -90,12 +99,13 @@ public class HttpFetcherTest extends TikaTest {
// Setup client to respond with 403
mockClientResponse(buildMockResponse(HttpStatus.SC_FORBIDDEN, null));
- final Metadata meta = new Metadata();
- assertThrows(IOException.class, () -> httpFetcher.fetch(TEST_URL,
meta));
+ Metadata userMetadata = new Metadata();
+ Metadata fetchRequestMetadata = new Metadata();
+ assertThrows(IOException.class, () -> httpFetcher.fetch(TEST_URL,
userMetadata, fetchRequestMetadata));
// Meta still populated
- assertEquals("403", meta.get("http-header:status-code"));
- assertEquals(TEST_URL, meta.get("http-connection:target-url"));
+ assertEquals("403", userMetadata.get("http-header:status-code"));
+ assertEquals(TEST_URL, userMetadata.get("http-connection:target-url"));
}
@Test
@@ -104,9 +114,10 @@ public class HttpFetcherTest extends TikaTest {
String url = "https://t.co/cvfkWAEIxw?amp=1";
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Metadata metadata = new Metadata();
+ Metadata fetchRequestMetadata = new Metadata();
HttpFetcher httpFetcher =
(HttpFetcher)
getFetcherManager("tika-config-http.xml").getFetcher("http");
- try (InputStream is = httpFetcher.fetch(url, metadata)) {
+ try (InputStream is = httpFetcher.fetch(url, metadata,
fetchRequestMetadata)) {
IOUtils.copy(is, bos);
}
//debug(metadata);
@@ -119,18 +130,55 @@ public class HttpFetcherTest extends TikaTest {
"https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2020-45/segments/1603107869785.9/warc/CC-MAIN-20201020021700-20201020051700-00529.warc.gz";
long start = 969596307;
long end = start + 1408 - 1;
- Metadata metadata = new Metadata();
+ Metadata responseMetadata = new Metadata();
HttpFetcher httpFetcher =
(HttpFetcher)
getFetcherManager("tika-config-http.xml").getFetcher("http");
try (TemporaryResources tmp = new TemporaryResources()) {
- Path tmpPath = tmp.createTempFile(metadata);
- try (InputStream is = httpFetcher.fetch(url, start, end,
metadata)) {
+ Path tmpPath = tmp.createTempFile(responseMetadata);
+ try (InputStream is = httpFetcher.fetch(url, start, end,
responseMetadata, new Metadata())) {
Files.copy(new GZIPInputStream(is), tmpPath,
StandardCopyOption.REPLACE_EXISTING);
}
assertEquals(2461, Files.size(tmpPath));
}
}
+ @Test
+ public void testHttpRequestHeaders() throws Exception {
+ HttpClient httpClient = Mockito.mock(HttpClient.class);
+ CloseableHttpResponse response = mock(CloseableHttpResponse.class);
+ ArgumentCaptor<HttpGet> httpGetArgumentCaptor =
ArgumentCaptor.forClass(HttpGet.class);
+
+ when(response.getStatusLine()).thenReturn(new StatusLine() {
+ @Override
+ public ProtocolVersion getProtocolVersion() {
+ return new HttpGet("http://localhost").getProtocolVersion();
+ }
+
+ @Override
+ public int getStatusCode() {
+ return 200;
+ }
+
+ @Override
+ public String getReasonPhrase() {
+ return null;
+ }
+ });
+
+ when(httpClient.execute(httpGetArgumentCaptor.capture(),
any(HttpContext.class)))
+ .thenReturn(response);
+ when(response.getEntity()).thenReturn(new StringEntity("Hi"));
+
+ Metadata userMetadata = new Metadata();
+ userMetadata.set(Property.externalText("customPropName"),
"customPropVal");
+ Metadata fetchRequestMetadata = new Metadata();
+ fetchRequestMetadata.set(Property.externalText("httpRequestHeaders"),
new String[] {"nick1: val1", "nick2: val2"});
+ httpFetcher.setHttpClient(httpClient);
+ httpFetcher.fetch("http://localhost", userMetadata,
fetchRequestMetadata);
+ HttpGet httpGet = httpGetArgumentCaptor.getValue();
+ Assertions.assertEquals("val1",
httpGet.getHeaders("nick1")[0].getValue());
+ Assertions.assertEquals("val2",
httpGet.getHeaders("nick2")[0].getValue());
+ }
FetcherManager getFetcherManager(String path) throws Exception {
return FetcherManager.load(
diff --git
a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
index b57c361b9..35c0faacf 100644
---
a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
+++
b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
@@ -106,12 +106,12 @@ public class S3Fetcher extends AbstractFetcher implements
Initializable, RangeFe
private boolean pathStyleAccessEnabled = false;
@Override
- public InputStream fetch(String fetchKey, Metadata metadata) throws
TikaException, IOException {
- return fetch(fetchKey, -1, -1, metadata);
+ public InputStream fetch(String fetchKey, Metadata userMetadata, Metadata
fetchRequestMetadata) throws TikaException, IOException {
+ return fetch(fetchKey, -1, -1, userMetadata, fetchRequestMetadata);
}
@Override
- public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata metadata)
+ public InputStream fetch(String fetchKey, long startRange, long endRange,
Metadata userMetadata, Metadata fetchRequestMetadata)
throws TikaException, IOException {
String theFetchKey = StringUtils.isBlank(prefix) ? fetchKey : prefix +
fetchKey;
@@ -129,7 +129,7 @@ public class S3Fetcher extends AbstractFetcher implements
Initializable, RangeFe
do {
try {
long start = System.currentTimeMillis();
- InputStream is = _fetch(theFetchKey, metadata, startRange,
endRange);
+ InputStream is = _fetch(theFetchKey, userMetadata, startRange,
endRange);
long elapsed = System.currentTimeMillis() - start;
LOGGER.debug("total to fetch {}", elapsed);
return is;
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
index f173808d6..6b57467b4 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
@@ -63,7 +63,7 @@ public class FetcherStreamFactory implements
InputStreamFactory {
}
@Override
- public InputStream getInputStream(InputStream is, Metadata metadata,
HttpHeaders httpHeaders,
+ public InputStream getInputStream(InputStream is, Metadata
responseMetadata, HttpHeaders httpHeaders,
UriInfo uriInfo) throws IOException {
MultivaluedMap params = (uriInfo == null) ? null :
uriInfo.getQueryParameters();
String fetcherName = getParam("fetcherName", httpHeaders, params);
@@ -100,9 +100,9 @@ public class FetcherStreamFactory implements
InputStreamFactory {
"that doesn't support range
fetching?!");
}
return ((RangeFetcher) fetcher).fetch(fetchKey,
fetchRangeStart, fetchRangeEnd,
- metadata);
+ responseMetadata, new Metadata());
} else {
- return fetcher.fetch(fetchKey, metadata);
+ return fetcher.fetch(fetchKey, responseMetadata);
}
} catch (TikaException e) {
throw new IOException(e);