This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4147 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 54bf8880e859bb09c5c99ab73cd15761752647ff Author: tallison <[email protected]> AuthorDate: Tue Oct 3 12:51:36 2023 -0400 TIKA-4147 -- look for custom-mimetypes.xml directly on the class path --- CHANGES.txt | 3 +++ .../java/org/apache/tika/mime/MimeTypesFactory.java | 17 ++++++++++++++++- .../java/org/apache/tika/config/TikaConfigTest.java | 2 +- .../org/apache/tika/fork/ForkParserTikaBinTest.java | 4 ++-- .../java/org/apache/tika/mime/CustomReaderTest.java | 4 ++-- .../{org/apache/tika/mime => }/custom-mimetypes.xml | 0 .../{org/apache/tika/mime => }/custom-mimetypes2.xml | 0 7 files changed, 24 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4ede898b6..9abc95149 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -14,6 +14,9 @@ Release 3.0.0-BETA - ?? * Removed xerces2 as a dependency (TIKA-4135). + * Tika will look for "custom-mimetypes.xml" directly on the classpath, NOT + under "/org/apache/tika/mime/". (TIKA-4147). + Other Changes/Updates * Fix bug in DateUtils that stripped timezone information from diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java index 973a9dac8..05a1aca6a 100644 --- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java +++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java @@ -24,13 +24,20 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.w3c.dom.Document; +import org.apache.tika.config.TikaConfig; + /** * Creates instances of MimeTypes. */ public class MimeTypesFactory { + private static final Logger LOG = LoggerFactory.getLogger(MimeTypesFactory.class); + + /** * System property to set a path to an additional external custom mimetypes * XML file to be loaded. @@ -170,12 +177,17 @@ public class MimeTypesFactory { // Get the core URL, and all the extensions URLs URL coreURL = classLoader.getResource(classPrefix + coreFilePath); List<URL> extensionURLs = - Collections.list(classLoader.getResources(classPrefix + extensionFilePath)); + Collections.list(classLoader.getResources(extensionFilePath)); // Swap that into an Array, and process List<URL> urls = new ArrayList<>(); urls.add(coreURL); urls.addAll(extensionURLs); + if (LOG.isDebugEnabled()) { + urls.stream().forEach( u -> + LOG.debug("Loaded custom mimes file: {}", u) + ); + } String customMimesPath = System.getProperty(CUSTOM_MIMES_SYS_PROP); if (customMimesPath != null) { @@ -186,6 +198,9 @@ public class MimeTypesFactory { } URL externalURL = externalFile.toURI().toURL(); urls.add(externalURL); + if (LOG.isDebugEnabled()) { + LOG.debug("Loaded external custom mimetypes file: {}", externalFile.getAbsolutePath()); + } } return create(urls.toArray(new URL[0])); diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java index 90742d54c..7fa021729 100644 --- a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java +++ b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java @@ -173,7 +173,7 @@ public class TikaConfigTest extends AbstractTikaConfigTest { // - Built-In Mimetypes assertNotNull(resources.get("org/apache/tika/mime/tika-mimetypes.xml")); // - Custom Mimetypes - assertNotNull(resources.get("org/apache/tika/mime/custom-mimetypes.xml")); + assertNotNull(resources.get("custom-mimetypes.xml")); } /** diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java index e484e8f39..4756f00ab 100644 --- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java +++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java @@ -87,8 +87,8 @@ public class ForkParserTikaBinTest extends TikaTest { IOUtils.copy(input, jarOs); } try (InputStream input = ForkParserTikaBinTest.class - .getResourceAsStream("/org/apache/tika/mime/custom-mimetypes.xml")) { - jarOs.putNextEntry(new JarEntry("org/apache/tika/mime/custom-mimetypes.xml")); + .getResourceAsStream("/custom-mimetypes.xml")) { + jarOs.putNextEntry(new JarEntry("custom-mimetypes.xml")); IOUtils.copy(input, jarOs); } diff --git a/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java index 988bf5841..6c5774087 100644 --- a/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java +++ b/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java @@ -36,7 +36,7 @@ public class CustomReaderTest { public void testCustomReader() throws Exception { MimeTypes mimeTypes = new MimeTypes(); CustomMimeTypesReader reader = new CustomMimeTypesReader(mimeTypes); - try (InputStream is = getClass().getResourceAsStream("custom-mimetypes.xml")) { + try (InputStream is = getClass().getResourceAsStream("/custom-mimetypes.xml")) { reader.read(is); } String key = "hello/world-file"; @@ -47,7 +47,7 @@ public class CustomReaderTest { assertEquals(0, reader.ignorePatterns.size()); // Now add another resource with conflicting regex - try (InputStream is = getClass().getResourceAsStream("custom-mimetypes2.xml")) { + try (InputStream is = getClass().getResourceAsStream("/custom-mimetypes2.xml")) { reader.read(is); } key = "another/world-file"; diff --git a/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml b/tika-core/src/test/resources/custom-mimetypes.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml rename to tika-core/src/test/resources/custom-mimetypes.xml diff --git a/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml b/tika-core/src/test/resources/custom-mimetypes2.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml rename to tika-core/src/test/resources/custom-mimetypes2.xml
