This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 89c6b7229 TIKA-3864 - url decode fetchkey when sent in via a header.
89c6b7229 is described below
commit 89c6b7229d481eba9d4afcad40456796d27b304f
Author: tallison <[email protected]>
AuthorDate: Tue Oct 4 18:04:09 2022 -0400
TIKA-3864 - url decode fetchkey when sent in via a header.
---
CHANGES.txt | 5 +++++
.../tika/server/core/FetcherStreamFactory.java | 21 +++++++++++++++++++--
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 17446f489..472ab79df 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,8 @@
+Release 2.5.1 - ???
+
+ * Fetch keys sent via headers to tika server are now URL decoded
(TIKA-3864).
+
+
Release 2.5.0 - 09/30/2022
* Improved extraction of PDF subset info for PDF/UA, PDF/VT, and PDF/X.
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
index e410f5243..8eeec1162 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
@@ -18,6 +18,9 @@ package org.apache.tika.server.core;
import java.io.IOException;
import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.UriInfo;
@@ -36,10 +39,10 @@ import org.apache.tika.pipes.fetcher.RangeFetcher;
* This class looks for "fetcherName" in the http header. If it is
not null
* and not empty, this will return a new TikaInputStream from the fetch key
* and the base path as set in the definition of the named fetcher.
+ * As of Tika > 2.5.0, the "fetchKey" is URL decoded.
* <p>
* Users may also specify the "fetcherName"e; and
"fetchKey" in
- * query parameters with in the request. This is the only option if there are
- * non-ASCII characters in the "fetcherName"e; or
"fetchKey".
+ * query parameters with in the request.
* <p>
* <em>WARNING:</em> Unless you carefully lock down access to the server,
* whoever has access to this service will have the read access of the server.
@@ -65,6 +68,8 @@ public class FetcherStreamFactory implements
InputStreamFactory {
MultivaluedMap params = (uriInfo == null) ? null :
uriInfo.getQueryParameters();
String fetcherName = getParam("fetcherName", httpHeaders, params);
String fetchKey = getParam("fetchKey", httpHeaders, params);
+ fetchKey = urlDecode(fetchKey);
+
long fetchRangeStart = getLong(getParam("fetchRangeStart",
httpHeaders, params));
long fetchRangeEnd = getLong(getParam("fetchRangeEnd", httpHeaders,
params));
if (StringUtils.isBlank(fetcherName) != StringUtils.isBlank(fetchKey))
{
@@ -103,6 +108,18 @@ public class FetcherStreamFactory implements
InputStreamFactory {
return is;
}
+ private String urlDecode(String fetchKey) {
+ if (fetchKey == null) {
+ return fetchKey;
+ }
+ try {
+ return URLDecoder.decode(fetchKey,
StandardCharsets.UTF_8.toString());
+ } catch (UnsupportedEncodingException | IllegalArgumentException e) {
+ LOG.warn("couldn't decode fetch key", fetchKey);
+ }
+ return fetchKey;
+ }
+
private String getParam(String paramName, HttpHeaders httpHeaders,
MultivaluedMap uriParams) {
if (uriParams == null || ! uriParams.containsKey(paramName)) {
return httpHeaders.getHeaderString(paramName);