This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch unpack-improvements in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4725aab4cc5e3c20fae39870bec4a45da774562e Author: tallison <[email protected]> AuthorDate: Fri May 29 13:42:15 2026 -0400 improve unpack endpoint --- .../apache/tika/server/core/TikaServerProcess.java | 3 +- .../tika/server/core/resource/TikaResource.java | 20 ++++- .../org/apache/tika/server/core/CXFTestBase.java | 11 ++- .../tika/server/standard/MetadataResourceTest.java | 5 ++ .../standard/RecursiveMetadataResourceTest.java | 5 ++ .../tika/server/standard/TikaResourceTest.java | 5 ++ .../UnpackerResourceConfigDisabledTest.java | 100 +++++++++++++++++++++ .../tika/server/standard/UnpackerResourceTest.java | 5 ++ .../standard/UnpackerResourceWithConfigTest.java | 5 ++ 9 files changed, 156 insertions(+), 3 deletions(-) diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java index 3111d2f529..36685f112a 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java @@ -185,7 +185,8 @@ public class TikaServerProcess { LOG.info("Pipes-based parsing enabled for /tika and /rmeta endpoints"); } - TikaResource.init(tikaLoader, serverStatus, pipesParsingHelper); + TikaResource.init(tikaLoader, serverStatus, pipesParsingHelper, + tikaServerConfig.isEnableUnsecureFeatures()); JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); List<ResourceProvider> resourceProviders = new ArrayList<>(); diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java index 24b8549063..a0ea80b80e 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java @@ -37,9 +37,12 @@ import jakarta.ws.rs.PUT; import jakarta.ws.rs.Path; import jakarta.ws.rs.PathParam; import jakarta.ws.rs.Produces; +import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.HttpHeaders; +import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.MultivaluedMap; +import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.StreamingOutput; import org.apache.cxf.attachment.ContentDisposition; import org.apache.cxf.jaxrs.ext.multipart.Attachment; @@ -85,6 +88,9 @@ public class TikaResource { private static ServerStatus SERVER_STATUS = null; private static PipesParsingHelper PIPES_PARSING_HELPER = null; private static MetadataWriteLimiterFactory DEFAULT_METADATA_WRITE_LIMITER_FACTORY = null; + // Whether per-request config injection (multipart "config" parts) is permitted. + // Enforced in setupMultipartConfig so every config-consuming endpoint honors it. + private static boolean ENABLE_UNSECURE_FEATURES = false; /** * Initialize TikaResource with pipes-based parsing for process isolation. @@ -92,12 +98,14 @@ public class TikaResource { * @param tikaLoader the Tika loader * @param serverStatus server status tracker * @param pipesParsingHelper helper for pipes-based parsing, may be null if /tika endpoint is not enabled + * @param enableUnsecureFeatures whether per-request config injection is permitted */ public static void init(TikaLoader tikaLoader, ServerStatus serverStatus, - PipesParsingHelper pipesParsingHelper) { + PipesParsingHelper pipesParsingHelper, boolean enableUnsecureFeatures) { TIKA_LOADER = tikaLoader; SERVER_STATUS = serverStatus; PIPES_PARSING_HELPER = pipesParsingHelper; + ENABLE_UNSECURE_FEATURES = enableUnsecureFeatures; // MetadataWriteLimiterFactory is now loaded dynamically via loadParseContext() } @@ -264,6 +272,16 @@ public class TikaResource { } } + // Enforce the per-request config gate where the config part is actually + // consumed, so every endpoint that accepts a config part honors + // enableUnsecureFeatures uniformly. + if (configAtt != null && !ENABLE_UNSECURE_FEATURES) { + throw new WebApplicationException(Response.status(Response.Status.FORBIDDEN) + .entity("Per-request configuration is disabled. Set enableUnsecureFeatures=true in server config.") + .type(MediaType.TEXT_PLAIN) + .build()); + } + if (fileAtt == null) { throw new IOException("Missing file attachment (use name='file' or send single unnamed attachment)"); } diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java index 69c14521af..87d5fe2bd3 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java @@ -216,7 +216,7 @@ public abstract class CXFTestBase { PipesParsingHelper pipesParsingHelper = new PipesParsingHelper(this.pipesParser, pipesConfig, inputTempDirectory, getUnpackEmitterBasePath()); - TikaResource.init(tika, new ServerStatus(), pipesParsingHelper); + TikaResource.init(tika, new ServerStatus(), pipesParsingHelper, isEnableUnsecureFeatures()); } finally { // Only delete tika config, keep pipes config for child processes Files.deleteIfExists(tmp); @@ -367,6 +367,15 @@ public abstract class CXFTestBase { return null; } + /** + * Whether per-request config injection is permitted. Defaults to false, matching + * the production default. Tests that POST a multipart "config" part must override + * this to return true, otherwise the config part is rejected with 403. + */ + protected boolean isEnableUnsecureFeatures() { + return false; + } + protected InputStream getPipesConfigInputStream() throws IOException { if (getPipesInputPath() == null) { return null; diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java index 331a31253b..8ae59274c3 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/MetadataResourceTest.java @@ -58,6 +58,11 @@ public class MetadataResourceTest extends CXFTestBase { private static final String META_PATH = "/meta"; + @Override + protected boolean isEnableUnsecureFeatures() { + return true; // exercises per-request config injection + } + @Override protected void setUpResources(JAXRSServerFactoryBean sf) { sf.setResourceClasses(MetadataResource.class, XMPMetadataResource.class); diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java index a03d8055fa..d17ca0f889 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java @@ -63,6 +63,11 @@ public class RecursiveMetadataResourceTest extends CXFTestBase { private static final String TEST_RECURSIVE_DOC = "test-documents/test_recursive_embedded.docx"; + @Override + protected boolean isEnableUnsecureFeatures() { + return true; // exercises per-request config injection + } + @Override protected void setUpResources(JAXRSServerFactoryBean sf) { sf.setResourceClasses(RecursiveMetadataResource.class); diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java index 3f81fc354b..68f9eed897 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java @@ -64,6 +64,11 @@ public class TikaResourceTest extends CXFTestBase { private static final String TIKA_PATH = "/tika"; private static final int UNPROCESSEABLE = 422; + @Override + protected boolean isEnableUnsecureFeatures() { + return true; // exercises per-request config injection + } + @Override protected void setUpResources(JAXRSServerFactoryBean sf) { sf.setResourceClasses(TikaResource.class); diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceConfigDisabledTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceConfigDisabledTest.java new file mode 100644 index 0000000000..5e96ff7604 --- /dev/null +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceConfigDisabledTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.server.standard; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import jakarta.ws.rs.core.Response; +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.cxf.jaxrs.ext.multipart.Attachment; +import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition; +import org.apache.cxf.jaxrs.ext.multipart.MultipartBody; +import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; +import org.junit.jupiter.api.Test; + +import org.apache.tika.server.core.CXFTestBase; +import org.apache.tika.server.core.TikaServerParseExceptionMapper; +import org.apache.tika.server.core.resource.UnpackerResource; +import org.apache.tika.server.core.writer.TarWriter; +import org.apache.tika.server.core.writer.ZipWriter; + +/** + * Verifies that /unpack and /unpack/all honor enableUnsecureFeatures: when per-request + * config injection is disabled (the default), a multipart "config" part is rejected + * with 403. Counterpart to {@link UnpackerResourceWithConfigTest}, which covers the + * enabled path. + */ +public class UnpackerResourceConfigDisabledTest extends CXFTestBase { + + private static final String BASE_PATH = "/unpack"; + private static final String ALL_PATH = BASE_PATH + "/all"; + + // isEnableUnsecureFeatures() is intentionally NOT overridden; it defaults to false + // so a config part must be rejected. + + @Override + protected void setUpResources(JAXRSServerFactoryBean sf) { + sf.setResourceClasses(UnpackerResource.class); + sf.setResourceProvider(UnpackerResource.class, new SingletonResourceProvider(new UnpackerResource())); + } + + @Override + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<>(); + providers.add(new TarWriter()); + providers.add(new ZipWriter()); + providers.add(new TikaServerParseExceptionMapper(false)); + sf.setProviders(providers); + } + + private Response postWithConfig(String path) { + ContentDisposition fileCd = new ContentDisposition("form-data; name=\"file\"; filename=\"test.txt\""); + Attachment fileAtt = new Attachment("file", + new ByteArrayInputStream("hello".getBytes(StandardCharsets.UTF_8)), fileCd); + Attachment configAtt = new Attachment("config", "application/json", + new ByteArrayInputStream("{\"pdf-parser\":{}}".getBytes(StandardCharsets.UTF_8))); + return WebClient + .create(endPoint + path) + .type("multipart/form-data") + .accept("application/zip") + .post(new MultipartBody(Arrays.asList(fileAtt, configAtt))); + } + + @Test + public void testConfigPartRejectedOnUnpackWhenDisabled() throws Exception { + Response response = postWithConfig(BASE_PATH); + assertEquals(403, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response.getEntity()); + assertTrue(msg.contains("Per-request configuration is disabled"), + "expected the config-disabled message, got: " + msg); + } + + @Test + public void testConfigPartRejectedOnUnpackAllWhenDisabled() throws Exception { + Response response = postWithConfig(ALL_PATH); + assertEquals(403, response.getStatus()); + } +} diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java index b8c62b17ae..497dfd812c 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java @@ -98,6 +98,11 @@ public class UnpackerResourceTest extends CXFTestBase { private Path unpackTempDir; + @Override + protected boolean isEnableUnsecureFeatures() { + return true; // exercises per-request config injection + } + @Override protected void setUpResources(JAXRSServerFactoryBean sf) { sf.setResourceClasses(UnpackerResource.class); diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java index 2c216fca28..09fd301f61 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceWithConfigTest.java @@ -72,6 +72,11 @@ public class UnpackerResourceWithConfigTest extends CXFTestBase { private Path unpackTempDir; + @Override + protected boolean isEnableUnsecureFeatures() { + return true; // exercises per-request config injection + } + @Override protected void setUpResources(JAXRSServerFactoryBean sf) { sf.setResourceClasses(UnpackerResource.class);
