This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4649-require-opt-in-absolute-paths
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 79c2c6dc1b180a65ce4a6ebdc569b626d2e68e75
Author: tallison <[email protected]>
AuthorDate: Wed Feb 4 09:55:52 2026 -0500

    TIKA-4649
---
 CHANGES.txt                                        |  3 ++
 .../tika/pipes/fetcher/fs/FileSystemFetcher.java   | 31 ++++++++++++++++----
 .../fetcher/fs/config/FileSystemFetcherConfig.java | 15 ++++++++++
 .../apache/tika/config/TikaPipesConfigTest.java    | 16 +++++++---
 .../pipes/fetcher/fs/FileSystemFetcherTest.java    | 34 ++++++++++++++++++++++
 .../fetchers-nobasepath-allowabsolute-config.xml   | 29 ++++++++++++++++++
 6 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 2bf3d55683..f890bcfdef 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 3.3.0 - ???
 
+  * Users need to add "allowAbsolutePaths=true" for the FileSystemFetcher to 
fetch
+    an absolute path (
+
   * Add detection of compressed bmp (TIKA-4511).
 
   * Allow per file timeouts in tika-pipes (TIKA-4497).
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
index 7289aa673b..d062310315 100644
--- 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
+++ 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
@@ -52,6 +52,7 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
     public FileSystemFetcher(FileSystemFetcherConfig fileSystemFetcherConfig) {
         setBasePath(fileSystemFetcherConfig.getBasePath());
         
setExtractFileSystemMetadata(fileSystemFetcherConfig.isExtractFileSystemMetadata());
+        setAllowAbsolutePaths(fileSystemFetcherConfig.isAllowAbsolutePaths());
     }
 
     private static final Logger LOG = 
LoggerFactory.getLogger(FileSystemFetcher.class);
@@ -61,6 +62,8 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
 
     private boolean extractFileSystemMetadata = false;
 
+    private boolean allowAbsolutePaths = false;
+
     static boolean isDescendant(Path root, Path descendant) {
         return descendant.toAbsolutePath().normalize()
                 .startsWith(root.toAbsolutePath().normalize());
@@ -147,6 +150,22 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
         this.extractFileSystemMetadata = extractFileSystemMetadata;
     }
 
+    /**
+     * If true, allows fetchKey to be an absolute path when basePath is not 
set.
+     * This explicitly acknowledges the security risk of unrestricted file 
access.
+     * The default is <code>false</code>.
+     *
+     * @param allowAbsolutePaths
+     */
+    @Field
+    public void setAllowAbsolutePaths(boolean allowAbsolutePaths) {
+        this.allowAbsolutePaths = allowAbsolutePaths;
+    }
+
+    public boolean isAllowAbsolutePaths() {
+        return allowAbsolutePaths;
+    }
+
     @Override
     public void initialize(Map<String, Param> params) throws 
TikaConfigException {
         //no-op
@@ -156,11 +175,13 @@ public class FileSystemFetcher extends AbstractFetcher 
implements Initializable
     public void checkInitialization(InitializableProblemHandler problemHandler)
             throws TikaConfigException {
         if (basePath == null || basePath.toString().isBlank()) {
-            LOG.warn("'basePath' has not been set. " +
-                    "This means that client code or clients can read from any 
file that this " +
-                    "process has permissions to read. If you are running 
tika-server, make " +
-                    "absolutely certain that you've locked down " +
-                    "access to tika-server and file-permissions for the 
tika-server process.");
+            if (!allowAbsolutePaths) {
+                throw new TikaConfigException(
+                        "'basePath' must be set, or 'allowAbsolutePaths' must 
be true. "
+                                + "Without basePath, clients can read any file 
this process "
+                                + "has access to. Set 'allowAbsolutePaths' to 
'true' to explicitly "
+                                + "allow this behavior and accept the security 
risks.");
+            }
             return;
         }
         if (basePath.toString().startsWith("http://";)) {
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
index b9f155fbd7..cf3d1a6369 100644
--- 
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
+++ 
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
@@ -21,6 +21,7 @@ import org.apache.tika.pipes.fetcher.config.AbstractConfig;
 public class FileSystemFetcherConfig extends AbstractConfig {
     private String basePath;
     private boolean extractFileSystemMetadata;
+    private boolean allowAbsolutePaths;
 
     public String getBasePath() {
         return basePath;
@@ -39,4 +40,18 @@ public class FileSystemFetcherConfig extends AbstractConfig {
         this.extractFileSystemMetadata = extractFileSystemMetadata;
         return this;
     }
+
+    /**
+     * If true, allows fetchKey to be an absolute path when basePath is not 
set.
+     * This explicitly acknowledges the security risk of unrestricted file 
access.
+     * The default is <code>false</code>.
+     */
+    public boolean isAllowAbsolutePaths() {
+        return allowAbsolutePaths;
+    }
+
+    public FileSystemFetcherConfig setAllowAbsolutePaths(boolean 
allowAbsolutePaths) {
+        this.allowAbsolutePaths = allowAbsolutePaths;
+        return this;
+    }
 }
diff --git 
a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java 
b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
index 486388c1c2..5a3ed9a3c7 100644
--- a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
+++ b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
@@ -69,12 +69,20 @@ public class TikaPipesConfigTest extends 
AbstractTikaConfigTest {
     }
 
     @Test
-    public void testNoBasePathFetchers() throws Exception {
-        //no basepath is allowed as of > 2.3.0
-        //test that this does not throw an exception.
+    public void testNoBasePathFetchersRequiresAllowAbsolutePaths() throws 
Exception {
+        //no basepath requires allowAbsolutePaths=true as of 3.x
+        assertThrows(TikaConfigException.class, () -> {
+            
FetcherManager.load(getConfigFilePath("fetchers-nobasepath-config.xml"));
+        });
+    }
 
+    @Test
+    public void testNoBasePathFetchersWithAllowAbsolutePaths() throws 
Exception {
+        //no basepath is allowed if allowAbsolutePaths=true
         FetcherManager fetcherManager = FetcherManager.load(
-                getConfigFilePath("fetchers-nobasepath-config.xml"));
+                
getConfigFilePath("fetchers-nobasepath-allowabsolute-config.xml"));
+        Fetcher f = fetcherManager.getFetcher("fs2");
+        assertTrue(((FileSystemFetcher) f).isAllowAbsolutePaths());
     }
 
     @Test
diff --git 
a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
 
b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
index 7e29ac20ad..b46ddcda9e 100644
--- 
a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
+++ 
b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.pipes.fetcher.fs;
 
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -27,6 +28,7 @@ import java.nio.file.Paths;
 import org.junit.jupiter.api.Test;
 
 import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.exception.TikaConfigException;
 
 
 public class FileSystemFetcherTest {
@@ -54,4 +56,36 @@ public class FileSystemFetcherTest {
             f.checkInitialization(InitializableProblemHandler.IGNORE);
         });
     }
+
+    @Test
+    public void testNoBasePathWithoutAllowAbsolutePathsThrows() {
+        FileSystemFetcher f = new FileSystemFetcher();
+        f.setName("fs");
+        // No basePath set, allowAbsolutePaths defaults to false
+        assertThrows(TikaConfigException.class, () -> {
+            f.checkInitialization(InitializableProblemHandler.IGNORE);
+        });
+    }
+
+    @Test
+    public void testNoBasePathWithAllowAbsolutePathsSucceeds() {
+        FileSystemFetcher f = new FileSystemFetcher();
+        f.setName("fs");
+        f.setAllowAbsolutePaths(true);
+        // No basePath set, but allowAbsolutePaths is true
+        assertDoesNotThrow(() -> {
+            f.checkInitialization(InitializableProblemHandler.IGNORE);
+        });
+    }
+
+    @Test
+    public void testWithBasePathSucceeds() {
+        FileSystemFetcher f = new FileSystemFetcher();
+        f.setName("fs");
+        f.setBasePath("/some/path");
+        // basePath is set, so allowAbsolutePaths doesn't matter
+        assertDoesNotThrow(() -> {
+            f.checkInitialization(InitializableProblemHandler.IGNORE);
+        });
+    }
 }
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml
 
b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml
new file mode 100644
index 0000000000..da92587d0a
--- /dev/null
+++ 
b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <fetchers>
+    <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+      <name>fs1</name>
+      <basePath>/my/base/path1</basePath>
+    </fetcher>
+    <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+      <name>fs2</name>
+      <allowAbsolutePaths>true</allowAbsolutePaths>
+    </fetcher>
+  </fetchers>
+</properties>

Reply via email to