This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new e4da37eba TIKA-4518 -- escape quotes in filenames for better cross 
platform robustness
e4da37eba is described below

commit e4da37eba0bb9c8ba4a9e58ea846d2216ecb5ed3
Author: tallison <[email protected]>
AuthorDate: Wed Oct 15 15:01:46 2025 -0400

    TIKA-4518 -- escape quotes in filenames for better cross platform robustness
---
 tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java       | 4 ++--
 tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java     | 2 +-
 tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 94ccfd96c..391fffd61 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -299,10 +299,10 @@ public class TikaCLITest {
                 "testPST.pst-embed/00000008-First email.msg",
                 "testPST.pst-embed/00000004-[jira] [Resolved] (TIKA-1249) 
Vcard files detection.msg",
                 "testPST.pst-embed/00000003-Feature Generators.msg",
-                "testPST.pst-embed/00000002-putstatic\".msg",
+                "testPST.pst-embed/00000002-putstatic%22.msg",
                 "testPST.pst-embed/00000005-[jira] [Commented] (TIKA-1250) 
Process loops infintely processing a CHM file.msg",
                 "testPST.pst-embed/00000009-attachment.docx",
-                "testPST.pst-embed/00000006-[WEBINAR] - \"Introducing 
Couchbase Server 2.5\".msg"};
+                "testPST.pst-embed/00000006-[WEBINAR] - %22Introducing 
Couchbase Server 2.5%22.msg"};
         testRecursiveUnpack("testPST.pst", expectedChildren, 2);
         try (Reader reader = 
Files.newBufferedReader(extractDir.resolve("testPST.pst.json"))) {
             List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java 
b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
index cf250e934..d4230d441 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
@@ -39,7 +39,7 @@ public class FilenameUtils {
     public final static char[] RESERVED_FILENAME_CHARACTERS =
             {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 
0x0B, 0x0C, 0x0D,
                     0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 
0x17, 0x18, 0x19, 0x1A,
-                    0x1B, 0x1C, 0x1D, 0x1E, 0x1F, '?', ':', '*', '<', '>', 
'|'};
+                    0x1B, 0x1C, 0x1D, 0x1E, 0x1F, '?', ':', '*', '<', '>', 
'|', '"', '\''};
 
     private final static HashSet<Character> RESERVED = new HashSet<>(38);
 
diff --git a/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java 
b/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
index c670bac83..bbcfe0889 100644
--- a/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
@@ -152,7 +152,7 @@ public class FilenameUtilsTest {
         assertEquals("_the quick brown fox.xlsx", 
sanitizeFilename("C:\\a/b/c/..the quick brown fox.xlsx"));
         assertEquals("_the quick brown fox.xlsx", 
sanitizeFilename("~/a/b/c/.the quick brown fox.xlsx"));
         assertEquals("the quick%3Ebrown fox.xlsx", sanitizeFilename("the 
quick>brown fox.xlsx"));
-        assertEquals("the quick\"brown fox.xlsx", sanitizeFilename("the 
quick\"brown fox.xlsx"));
+        assertEquals("the quick%22brown fox.xlsx", sanitizeFilename("the 
quick\"brown fox.xlsx"));
         assertEquals("the quick brown fox.xlsx", sanitizeFilename("\"the quick 
brown fox.xlsx\""));
 
         assertEquals("_.docx", sanitizeFilename("..................docx"));

Reply via email to