This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 89c6b7229 TIKA-3864 - url decode fetchkey when sent in via a header.
89c6b7229 is described below

commit 89c6b7229d481eba9d4afcad40456796d27b304f
Author: tallison <[email protected]>
AuthorDate: Tue Oct 4 18:04:09 2022 -0400

    TIKA-3864 - url decode fetchkey when sent in via a header.
---
 CHANGES.txt                                         |  5 +++++
 .../tika/server/core/FetcherStreamFactory.java      | 21 +++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 17446f489..472ab79df 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,8 @@
+Release 2.5.1 - ???
+
+   * Fetch keys sent via headers to tika server are now URL decoded 
(TIKA-3864).
+
+
 Release 2.5.0 - 09/30/2022
 
    * Improved extraction of PDF subset info for PDF/UA, PDF/VT, and PDF/X.
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
index e410f5243..8eeec1162 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
@@ -18,6 +18,9 @@ package org.apache.tika.server.core;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
 import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.MultivaluedMap;
 import javax.ws.rs.core.UriInfo;
@@ -36,10 +39,10 @@ import org.apache.tika.pipes.fetcher.RangeFetcher;
  * This class looks for &quot;fetcherName&quot; in the http header.  If it is 
not null
  * and not empty, this will return a new TikaInputStream from the fetch key
  * and the base path as set in the definition of the named fetcher.
+ * As of Tika &gt; 2.5.0, the &quot;fetchKey&quot; is URL decoded.
  * <p>
  * Users may also specify the &quot;fetcherName&quote; and 
&quot;fetchKey&quot; in
- * query parameters with in the request.  This is the only option if there are
- * non-ASCII characters in the &quot;fetcherName&quote; or 
&quot;fetchKey&quot;.
+ * query parameters with in the request.
  * <p>
  * <em>WARNING:</em> Unless you carefully lock down access to the server,
  * whoever has access to this service will have the read access of the server.
@@ -65,6 +68,8 @@ public class FetcherStreamFactory implements 
InputStreamFactory {
         MultivaluedMap params = (uriInfo == null) ? null : 
uriInfo.getQueryParameters();
         String fetcherName = getParam("fetcherName", httpHeaders, params);
         String fetchKey = getParam("fetchKey", httpHeaders, params);
+        fetchKey = urlDecode(fetchKey);
+
         long fetchRangeStart = getLong(getParam("fetchRangeStart", 
httpHeaders, params));
         long fetchRangeEnd = getLong(getParam("fetchRangeEnd", httpHeaders, 
params));
         if (StringUtils.isBlank(fetcherName) != StringUtils.isBlank(fetchKey)) 
{
@@ -103,6 +108,18 @@ public class FetcherStreamFactory implements 
InputStreamFactory {
         return is;
     }
 
+    private String urlDecode(String fetchKey) {
+        if (fetchKey == null) {
+            return fetchKey;
+        }
+        try {
+            return URLDecoder.decode(fetchKey, 
StandardCharsets.UTF_8.toString());
+        } catch (UnsupportedEncodingException | IllegalArgumentException e) {
+            LOG.warn("couldn't decode fetch key", fetchKey);
+        }
+        return fetchKey;
+    }
+
     private String getParam(String paramName, HttpHeaders httpHeaders, 
MultivaluedMap uriParams) {
         if (uriParams == null || ! uriParams.containsKey(paramName)) {
             return httpHeaders.getHeaderString(paramName);

Reply via email to