This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 89de688213 improve unpack endpoint (#2851)
89de688213 is described below
commit 89de6882133c5d821a8a05d76d453f473b73e4b3
Author: Tim Allison <[email protected]>
AuthorDate: Fri May 29 14:27:27 2026 -0400
improve unpack endpoint (#2851)
---
.../apache/tika/server/core/TikaServerProcess.java | 3 +-
.../tika/server/core/resource/TikaResource.java | 20 ++++-
.../org/apache/tika/server/core/CXFTestBase.java | 11 ++-
.../tika/server/standard/MetadataResourceTest.java | 5 ++
.../standard/RecursiveMetadataResourceTest.java | 5 ++
.../tika/server/standard/TikaResourceTest.java | 5 ++
.../UnpackerResourceConfigDisabledTest.java | 100 +++++++++++++++++++++
.../tika/server/standard/UnpackerResourceTest.java | 5 ++
.../standard/UnpackerResourceWithConfigTest.java | 5 ++
9 files changed, 156 insertions(+), 3 deletions(-)
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
index 3111d2f529..36685f112a 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
@@ -185,7 +185,8 @@ public class TikaServerProcess {
LOG.info("Pipes-based parsing enabled for /tika and /rmeta
endpoints");
}
- TikaResource.init(tikaLoader, serverStatus, pipesParsingHelper);
+ TikaResource.init(tikaLoader, serverStatus, pipesParsingHelper,
+ tikaServerConfig.isEnableUnsecureFeatures());
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
List<ResourceProvider> resourceProviders = new ArrayList<>();
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 24b8549063..a0ea80b80e 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -37,9 +37,12 @@ import jakarta.ws.rs.PUT;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.PathParam;
import jakarta.ws.rs.Produces;
+import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.Context;
import jakarta.ws.rs.core.HttpHeaders;
+import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.MultivaluedMap;
+import jakarta.ws.rs.core.Response;
import jakarta.ws.rs.core.StreamingOutput;
import org.apache.cxf.attachment.ContentDisposition;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
@@ -85,6 +88,9 @@ public class TikaResource {
private static ServerStatus SERVER_STATUS = null;
private static PipesParsingHelper PIPES_PARSING_HELPER = null;
private static MetadataWriteLimiterFactory
DEFAULT_METADATA_WRITE_LIMITER_FACTORY = null;
+ // Whether per-request config injection (multipart "config" parts) is
permitted.
+ // Enforced in setupMultipartConfig so every config-consuming endpoint
honors it.
+ private static boolean ENABLE_UNSECURE_FEATURES = false;
/**
* Initialize TikaResource with pipes-based parsing for process isolation.
@@ -92,12 +98,14 @@ public class TikaResource {
* @param tikaLoader the Tika loader
* @param serverStatus server status tracker
* @param pipesParsingHelper helper for pipes-based parsing, may be null
if /tika endpoint is not enabled
+ * @param enableUnsecureFeatures whether per-request config injection is
permitted
*/
public static void init(TikaLoader tikaLoader, ServerStatus serverStatus,
- PipesParsingHelper pipesParsingHelper) {
+ PipesParsingHelper pipesParsingHelper, boolean
enableUnsecureFeatures) {
TIKA_LOADER = tikaLoader;
SERVER_STATUS = serverStatus;
PIPES_PARSING_HELPER = pipesParsingHelper;
+ ENABLE_UNSECURE_FEATURES = enableUnsecureFeatures;
// MetadataWriteLimiterFactory is now loaded dynamically via
loadParseContext()
}
@@ -264,6 +272,16 @@ public class TikaResource {
}
}
+ // Enforce the per-request config gate where the config part is
actually
+ // consumed, so every endpoint that accepts a config part honors
+ // enableUnsecureFeatures uniformly.
+ if (configAtt != null && !ENABLE_UNSECURE_FEATURES) {
+ throw new
WebApplicationException(Response.status(Response.Status.FORBIDDEN)
+ .entity("Per-request configuration is disabled. Set
enableUnsecureFeatures=true in server config.")
+ .type(MediaType.TEXT_PLAIN)
+ .build());
+ }
+
if (fileAtt == null) {
throw new IOException("Missing file attachment (use name='file' or
send single unnamed attachment)");
}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 69c14521af..87d5fe2bd3 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -216,7 +216,7 @@ public abstract class CXFTestBase {
PipesParsingHelper pipesParsingHelper = new
PipesParsingHelper(this.pipesParser, pipesConfig,
inputTempDirectory, getUnpackEmitterBasePath());
- TikaResource.init(tika, new ServerStatus(), pipesParsingHelper);
+ TikaResource.init(tika, new ServerStatus(), pipesParsingHelper,
isEnableUnsecureFeatures());
} finally {
// Only delete tika config, keep pipes config for child processes
Files.deleteIfExists(tmp);
@@ -367,6 +367,15 @@ public abstract class CXFTestBase {
return null;
}
+ /**
+ * Whether per-request config injection is permitted. Defaults to false,
matching
+ * the production default. Tests that POST a multipart "config" part must
override
+ * this to return true, otherwise the config part is rejected with 403.
+ */
+ protected boolean isEnableUnsecureFeatures() {
+ return false;
+ }
+
protected InputStream getPipesConfigInputStream() throws IOException {
if (getPipesInputPath() == null) {
return null;
diff --git
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java
index 331a31253b..8ae59274c3 100644
---
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java
+++
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java
@@ -58,6 +58,11 @@ public class MetadataResourceTest extends CXFTestBase {
private static final String META_PATH = "/meta";
+ @Override
+ protected boolean isEnableUnsecureFeatures() {
+ return true; // exercises per-request config injection
+ }
+
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(MetadataResource.class,
XMPMetadataResource.class);
diff --git
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
index a03d8055fa..d17ca0f889 100644
---
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
+++
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
@@ -63,6 +63,11 @@ public class RecursiveMetadataResourceTest extends
CXFTestBase {
private static final String TEST_RECURSIVE_DOC =
"test-documents/test_recursive_embedded.docx";
+ @Override
+ protected boolean isEnableUnsecureFeatures() {
+ return true; // exercises per-request config injection
+ }
+
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(RecursiveMetadataResource.class);
diff --git
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
index 3f81fc354b..68f9eed897 100644
---
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
+++
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
@@ -64,6 +64,11 @@ public class TikaResourceTest extends CXFTestBase {
private static final String TIKA_PATH = "/tika";
private static final int UNPROCESSEABLE = 422;
+ @Override
+ protected boolean isEnableUnsecureFeatures() {
+ return true; // exercises per-request config injection
+ }
+
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(TikaResource.class);
diff --git
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceConfigDisabledTest.java
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceConfigDisabledTest.java
new file mode 100644
index 0000000000..5e96ff7604
--- /dev/null
+++
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceConfigDisabledTest.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.standard;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import jakarta.ws.rs.core.Response;
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition;
+import org.apache.cxf.jaxrs.ext.multipart.MultipartBody;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.TikaServerParseExceptionMapper;
+import org.apache.tika.server.core.resource.UnpackerResource;
+import org.apache.tika.server.core.writer.TarWriter;
+import org.apache.tika.server.core.writer.ZipWriter;
+
+/**
+ * Verifies that /unpack and /unpack/all honor enableUnsecureFeatures: when
per-request
+ * config injection is disabled (the default), a multipart "config" part is
rejected
+ * with 403. Counterpart to {@link UnpackerResourceWithConfigTest}, which
covers the
+ * enabled path.
+ */
+public class UnpackerResourceConfigDisabledTest extends CXFTestBase {
+
+ private static final String BASE_PATH = "/unpack";
+ private static final String ALL_PATH = BASE_PATH + "/all";
+
+ // isEnableUnsecureFeatures() is intentionally NOT overridden; it defaults
to false
+ // so a config part must be rejected.
+
+ @Override
+ protected void setUpResources(JAXRSServerFactoryBean sf) {
+ sf.setResourceClasses(UnpackerResource.class);
+ sf.setResourceProvider(UnpackerResource.class, new
SingletonResourceProvider(new UnpackerResource()));
+ }
+
+ @Override
+ protected void setUpProviders(JAXRSServerFactoryBean sf) {
+ List<Object> providers = new ArrayList<>();
+ providers.add(new TarWriter());
+ providers.add(new ZipWriter());
+ providers.add(new TikaServerParseExceptionMapper(false));
+ sf.setProviders(providers);
+ }
+
+ private Response postWithConfig(String path) {
+ ContentDisposition fileCd = new ContentDisposition("form-data;
name=\"file\"; filename=\"test.txt\"");
+ Attachment fileAtt = new Attachment("file",
+ new
ByteArrayInputStream("hello".getBytes(StandardCharsets.UTF_8)), fileCd);
+ Attachment configAtt = new Attachment("config", "application/json",
+ new
ByteArrayInputStream("{\"pdf-parser\":{}}".getBytes(StandardCharsets.UTF_8)));
+ return WebClient
+ .create(endPoint + path)
+ .type("multipart/form-data")
+ .accept("application/zip")
+ .post(new MultipartBody(Arrays.asList(fileAtt, configAtt)));
+ }
+
+ @Test
+ public void testConfigPartRejectedOnUnpackWhenDisabled() throws Exception {
+ Response response = postWithConfig(BASE_PATH);
+ assertEquals(403, response.getStatus());
+ String msg = getStringFromInputStream((InputStream)
response.getEntity());
+ assertTrue(msg.contains("Per-request configuration is disabled"),
+ "expected the config-disabled message, got: " + msg);
+ }
+
+ @Test
+ public void testConfigPartRejectedOnUnpackAllWhenDisabled() throws
Exception {
+ Response response = postWithConfig(ALL_PATH);
+ assertEquals(403, response.getStatus());
+ }
+}
diff --git
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
index b8c62b17ae..497dfd812c 100644
---
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
+++
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
@@ -98,6 +98,11 @@ public class UnpackerResourceTest extends CXFTestBase {
private Path unpackTempDir;
+ @Override
+ protected boolean isEnableUnsecureFeatures() {
+ return true; // exercises per-request config injection
+ }
+
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(UnpackerResource.class);
diff --git
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java
index 2c216fca28..09fd301f61 100644
---
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java
+++
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java
@@ -72,6 +72,11 @@ public class UnpackerResourceWithConfigTest extends
CXFTestBase {
private Path unpackTempDir;
+ @Override
+ protected boolean isEnableUnsecureFeatures() {
+ return true; // exercises per-request config injection
+ }
+
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(UnpackerResource.class);