This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4649-require-opt-in-absolute-paths in repository https://gitbox.apache.org/repos/asf/tika.git
commit 79c2c6dc1b180a65ce4a6ebdc569b626d2e68e75 Author: tallison <[email protected]> AuthorDate: Wed Feb 4 09:55:52 2026 -0500 TIKA-4649 --- CHANGES.txt | 3 ++ .../tika/pipes/fetcher/fs/FileSystemFetcher.java | 31 ++++++++++++++++---- .../fetcher/fs/config/FileSystemFetcherConfig.java | 15 ++++++++++ .../apache/tika/config/TikaPipesConfigTest.java | 16 +++++++--- .../pipes/fetcher/fs/FileSystemFetcherTest.java | 34 ++++++++++++++++++++++ .../fetchers-nobasepath-allowabsolute-config.xml | 29 ++++++++++++++++++ 6 files changed, 119 insertions(+), 9 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 2bf3d55683..f890bcfdef 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,8 @@ Release 3.3.0 - ??? + * Users need to add "allowAbsolutePaths=true" for the FileSystemFetcher to fetch + an absolute path ( + * Add detection of compressed bmp (TIKA-4511). * Allow per file timeouts in tika-pipes (TIKA-4497). diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java index 7289aa673b..d062310315 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java @@ -52,6 +52,7 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable public FileSystemFetcher(FileSystemFetcherConfig fileSystemFetcherConfig) { setBasePath(fileSystemFetcherConfig.getBasePath()); setExtractFileSystemMetadata(fileSystemFetcherConfig.isExtractFileSystemMetadata()); + setAllowAbsolutePaths(fileSystemFetcherConfig.isAllowAbsolutePaths()); } private static final Logger LOG = LoggerFactory.getLogger(FileSystemFetcher.class); @@ -61,6 +62,8 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable private boolean extractFileSystemMetadata = false; + private boolean allowAbsolutePaths = false; + static boolean isDescendant(Path root, Path descendant) { return descendant.toAbsolutePath().normalize() .startsWith(root.toAbsolutePath().normalize()); @@ -147,6 +150,22 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable this.extractFileSystemMetadata = extractFileSystemMetadata; } + /** + * If true, allows fetchKey to be an absolute path when basePath is not set. + * This explicitly acknowledges the security risk of unrestricted file access. + * The default is <code>false</code>. + * + * @param allowAbsolutePaths + */ + @Field + public void setAllowAbsolutePaths(boolean allowAbsolutePaths) { + this.allowAbsolutePaths = allowAbsolutePaths; + } + + public boolean isAllowAbsolutePaths() { + return allowAbsolutePaths; + } + @Override public void initialize(Map<String, Param> params) throws TikaConfigException { //no-op @@ -156,11 +175,13 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable public void checkInitialization(InitializableProblemHandler problemHandler) throws TikaConfigException { if (basePath == null || basePath.toString().isBlank()) { - LOG.warn("'basePath' has not been set. " + - "This means that client code or clients can read from any file that this " + - "process has permissions to read. If you are running tika-server, make " + - "absolutely certain that you've locked down " + - "access to tika-server and file-permissions for the tika-server process."); + if (!allowAbsolutePaths) { + throw new TikaConfigException( + "'basePath' must be set, or 'allowAbsolutePaths' must be true. " + + "Without basePath, clients can read any file this process " + + "has access to. Set 'allowAbsolutePaths' to 'true' to explicitly " + + "allow this behavior and accept the security risks."); + } return; } if (basePath.toString().startsWith("http://")) { diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java index b9f155fbd7..cf3d1a6369 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java @@ -21,6 +21,7 @@ import org.apache.tika.pipes.fetcher.config.AbstractConfig; public class FileSystemFetcherConfig extends AbstractConfig { private String basePath; private boolean extractFileSystemMetadata; + private boolean allowAbsolutePaths; public String getBasePath() { return basePath; @@ -39,4 +40,18 @@ public class FileSystemFetcherConfig extends AbstractConfig { this.extractFileSystemMetadata = extractFileSystemMetadata; return this; } + + /** + * If true, allows fetchKey to be an absolute path when basePath is not set. + * This explicitly acknowledges the security risk of unrestricted file access. + * The default is <code>false</code>. + */ + public boolean isAllowAbsolutePaths() { + return allowAbsolutePaths; + } + + public FileSystemFetcherConfig setAllowAbsolutePaths(boolean allowAbsolutePaths) { + this.allowAbsolutePaths = allowAbsolutePaths; + return this; + } } diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java index 486388c1c2..5a3ed9a3c7 100644 --- a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java +++ b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java @@ -69,12 +69,20 @@ public class TikaPipesConfigTest extends AbstractTikaConfigTest { } @Test - public void testNoBasePathFetchers() throws Exception { - //no basepath is allowed as of > 2.3.0 - //test that this does not throw an exception. + public void testNoBasePathFetchersRequiresAllowAbsolutePaths() throws Exception { + //no basepath requires allowAbsolutePaths=true as of 3.x + assertThrows(TikaConfigException.class, () -> { + FetcherManager.load(getConfigFilePath("fetchers-nobasepath-config.xml")); + }); + } + @Test + public void testNoBasePathFetchersWithAllowAbsolutePaths() throws Exception { + //no basepath is allowed if allowAbsolutePaths=true FetcherManager fetcherManager = FetcherManager.load( - getConfigFilePath("fetchers-nobasepath-config.xml")); + getConfigFilePath("fetchers-nobasepath-allowabsolute-config.xml")); + Fetcher f = fetcherManager.getFetcher("fs2"); + assertTrue(((FileSystemFetcher) f).isAllowAbsolutePaths()); } @Test diff --git a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java index 7e29ac20ad..b46ddcda9e 100644 --- a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java +++ b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java @@ -16,6 +16,7 @@ */ package org.apache.tika.pipes.fetcher.fs; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -27,6 +28,7 @@ import java.nio.file.Paths; import org.junit.jupiter.api.Test; import org.apache.tika.config.InitializableProblemHandler; +import org.apache.tika.exception.TikaConfigException; public class FileSystemFetcherTest { @@ -54,4 +56,36 @@ public class FileSystemFetcherTest { f.checkInitialization(InitializableProblemHandler.IGNORE); }); } + + @Test + public void testNoBasePathWithoutAllowAbsolutePathsThrows() { + FileSystemFetcher f = new FileSystemFetcher(); + f.setName("fs"); + // No basePath set, allowAbsolutePaths defaults to false + assertThrows(TikaConfigException.class, () -> { + f.checkInitialization(InitializableProblemHandler.IGNORE); + }); + } + + @Test + public void testNoBasePathWithAllowAbsolutePathsSucceeds() { + FileSystemFetcher f = new FileSystemFetcher(); + f.setName("fs"); + f.setAllowAbsolutePaths(true); + // No basePath set, but allowAbsolutePaths is true + assertDoesNotThrow(() -> { + f.checkInitialization(InitializableProblemHandler.IGNORE); + }); + } + + @Test + public void testWithBasePathSucceeds() { + FileSystemFetcher f = new FileSystemFetcher(); + f.setName("fs"); + f.setBasePath("/some/path"); + // basePath is set, so allowAbsolutePaths doesn't matter + assertDoesNotThrow(() -> { + f.checkInitialization(InitializableProblemHandler.IGNORE); + }); + } } diff --git a/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml new file mode 100644 index 0000000000..da92587d0a --- /dev/null +++ b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<properties> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> + <name>fs1</name> + <basePath>/my/base/path1</basePath> + </fetcher> + <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> + <name>fs2</name> + <allowAbsolutePaths>true</allowAbsolutePaths> + </fetcher> + </fetchers> +</properties>
