This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new c226e367ba TIKA-4649 (#2583)
c226e367ba is described below
commit c226e367ba7c3c9d09c9a54591ff393d17f5a153
Author: Tim Allison <[email protected]>
AuthorDate: Wed Feb 4 10:43:00 2026 -0500
TIKA-4649 (#2583)
---
CHANGES.txt | 3 ++
.../tika/pipes/fetcher/fs/FileSystemFetcher.java | 31 ++++++++++++++++----
.../fetcher/fs/config/FileSystemFetcherConfig.java | 15 ++++++++++
.../apache/tika/config/TikaPipesConfigTest.java | 16 +++++++---
.../pipes/fetcher/fs/FileSystemFetcherTest.java | 34 ++++++++++++++++++++++
.../fetchers-nobasepath-allowabsolute-config.xml | 29 ++++++++++++++++++
6 files changed, 119 insertions(+), 9 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 2bf3d55683..f890bcfdef 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
Release 3.3.0 - ???
+ * Users need to add "allowAbsolutePaths=true" for the FileSystemFetcher to
fetch
+ an absolute path (
+
* Add detection of compressed bmp (TIKA-4511).
* Allow per file timeouts in tika-pipes (TIKA-4497).
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
index 7289aa673b..d062310315 100644
---
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
+++
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
@@ -52,6 +52,7 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
public FileSystemFetcher(FileSystemFetcherConfig fileSystemFetcherConfig) {
setBasePath(fileSystemFetcherConfig.getBasePath());
setExtractFileSystemMetadata(fileSystemFetcherConfig.isExtractFileSystemMetadata());
+ setAllowAbsolutePaths(fileSystemFetcherConfig.isAllowAbsolutePaths());
}
private static final Logger LOG =
LoggerFactory.getLogger(FileSystemFetcher.class);
@@ -61,6 +62,8 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
private boolean extractFileSystemMetadata = false;
+ private boolean allowAbsolutePaths = false;
+
static boolean isDescendant(Path root, Path descendant) {
return descendant.toAbsolutePath().normalize()
.startsWith(root.toAbsolutePath().normalize());
@@ -147,6 +150,22 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
this.extractFileSystemMetadata = extractFileSystemMetadata;
}
+ /**
+ * If true, allows fetchKey to be an absolute path when basePath is not
set.
+ * This explicitly acknowledges the security risk of unrestricted file
access.
+ * The default is <code>false</code>.
+ *
+ * @param allowAbsolutePaths
+ */
+ @Field
+ public void setAllowAbsolutePaths(boolean allowAbsolutePaths) {
+ this.allowAbsolutePaths = allowAbsolutePaths;
+ }
+
+ public boolean isAllowAbsolutePaths() {
+ return allowAbsolutePaths;
+ }
+
@Override
public void initialize(Map<String, Param> params) throws
TikaConfigException {
//no-op
@@ -156,11 +175,13 @@ public class FileSystemFetcher extends AbstractFetcher
implements Initializable
public void checkInitialization(InitializableProblemHandler problemHandler)
throws TikaConfigException {
if (basePath == null || basePath.toString().isBlank()) {
- LOG.warn("'basePath' has not been set. " +
- "This means that client code or clients can read from any
file that this " +
- "process has permissions to read. If you are running
tika-server, make " +
- "absolutely certain that you've locked down " +
- "access to tika-server and file-permissions for the
tika-server process.");
+ if (!allowAbsolutePaths) {
+ throw new TikaConfigException(
+ "'basePath' must be set, or 'allowAbsolutePaths' must
be true. "
+ + "Without basePath, clients can read any file
this process "
+ + "has access to. Set 'allowAbsolutePaths' to
'true' to explicitly "
+ + "allow this behavior and accept the security
risks.");
+ }
return;
}
if (basePath.toString().startsWith("http://")) {
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
index b9f155fbd7..cf3d1a6369 100644
---
a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
+++
b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java
@@ -21,6 +21,7 @@ import org.apache.tika.pipes.fetcher.config.AbstractConfig;
public class FileSystemFetcherConfig extends AbstractConfig {
private String basePath;
private boolean extractFileSystemMetadata;
+ private boolean allowAbsolutePaths;
public String getBasePath() {
return basePath;
@@ -39,4 +40,18 @@ public class FileSystemFetcherConfig extends AbstractConfig {
this.extractFileSystemMetadata = extractFileSystemMetadata;
return this;
}
+
+ /**
+ * If true, allows fetchKey to be an absolute path when basePath is not
set.
+ * This explicitly acknowledges the security risk of unrestricted file
access.
+ * The default is <code>false</code>.
+ */
+ public boolean isAllowAbsolutePaths() {
+ return allowAbsolutePaths;
+ }
+
+ public FileSystemFetcherConfig setAllowAbsolutePaths(boolean
allowAbsolutePaths) {
+ this.allowAbsolutePaths = allowAbsolutePaths;
+ return this;
+ }
}
diff --git
a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
index 486388c1c2..5a3ed9a3c7 100644
--- a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
+++ b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java
@@ -69,12 +69,20 @@ public class TikaPipesConfigTest extends
AbstractTikaConfigTest {
}
@Test
- public void testNoBasePathFetchers() throws Exception {
- //no basepath is allowed as of > 2.3.0
- //test that this does not throw an exception.
+ public void testNoBasePathFetchersRequiresAllowAbsolutePaths() throws
Exception {
+ //no basepath requires allowAbsolutePaths=true as of 3.x
+ assertThrows(TikaConfigException.class, () -> {
+
FetcherManager.load(getConfigFilePath("fetchers-nobasepath-config.xml"));
+ });
+ }
+ @Test
+ public void testNoBasePathFetchersWithAllowAbsolutePaths() throws
Exception {
+ //no basepath is allowed if allowAbsolutePaths=true
FetcherManager fetcherManager = FetcherManager.load(
- getConfigFilePath("fetchers-nobasepath-config.xml"));
+
getConfigFilePath("fetchers-nobasepath-allowabsolute-config.xml"));
+ Fetcher f = fetcherManager.getFetcher("fs2");
+ assertTrue(((FileSystemFetcher) f).isAllowAbsolutePaths());
}
@Test
diff --git
a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
index 7e29ac20ad..b46ddcda9e 100644
---
a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
+++
b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.pipes.fetcher.fs;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -27,6 +28,7 @@ import java.nio.file.Paths;
import org.junit.jupiter.api.Test;
import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.exception.TikaConfigException;
public class FileSystemFetcherTest {
@@ -54,4 +56,36 @@ public class FileSystemFetcherTest {
f.checkInitialization(InitializableProblemHandler.IGNORE);
});
}
+
+ @Test
+ public void testNoBasePathWithoutAllowAbsolutePathsThrows() {
+ FileSystemFetcher f = new FileSystemFetcher();
+ f.setName("fs");
+ // No basePath set, allowAbsolutePaths defaults to false
+ assertThrows(TikaConfigException.class, () -> {
+ f.checkInitialization(InitializableProblemHandler.IGNORE);
+ });
+ }
+
+ @Test
+ public void testNoBasePathWithAllowAbsolutePathsSucceeds() {
+ FileSystemFetcher f = new FileSystemFetcher();
+ f.setName("fs");
+ f.setAllowAbsolutePaths(true);
+ // No basePath set, but allowAbsolutePaths is true
+ assertDoesNotThrow(() -> {
+ f.checkInitialization(InitializableProblemHandler.IGNORE);
+ });
+ }
+
+ @Test
+ public void testWithBasePathSucceeds() {
+ FileSystemFetcher f = new FileSystemFetcher();
+ f.setName("fs");
+ f.setBasePath("/some/path");
+ // basePath is set, so allowAbsolutePaths doesn't matter
+ assertDoesNotThrow(() -> {
+ f.checkInitialization(InitializableProblemHandler.IGNORE);
+ });
+ }
}
diff --git
a/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml
b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml
new file mode 100644
index 0000000000..da92587d0a
--- /dev/null
+++
b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-allowabsolute-config.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <fetchers>
+ <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+ <name>fs1</name>
+ <basePath>/my/base/path1</basePath>
+ </fetcher>
+ <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+ <name>fs2</name>
+ <allowAbsolutePaths>true</allowAbsolutePaths>
+ </fetcher>
+ </fetchers>
+</properties>