This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_2x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 722aaaf006f02d2e5c6deaaae1dcbf08a2e331e4 Author: Tim Allison <talli...@apache.org> AuthorDate: Mon Mar 25 17:06:45 2024 -0400 TIKA-4224 -- add detection for 3mf (#1689) (cherry picked from commit 3ffbc04f7a1023aa8e6d5ea22d19feb2a7e61a8f) --- .../org/apache/tika/mime/tika-mimetypes.xml | 6 +++ .../detect/microsoft/ooxml/OPCPackageDetector.java | 47 +++++++++++++-------- .../tika/detect/TestContainerAwareDetector.java | 5 +++ .../src/test/resources/test-documents/test3mf.3mf | Bin 0 -> 28243 bytes 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index 81a0af3c9..de95917bb 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -2062,6 +2062,12 @@ <glob pattern="*.ost"/> </mime-type> + <mime-type type="application/vnd.ms-package.3dmanufacturing-3dmodel+xml"> + <tika:link>https://en.wikipedia.org/wiki/3D_Manufacturing_Format</tika:link> + <_comment>3D manufacturing format</_comment> + <glob pattern="*.3mf"/> + </mime-type> + <mime-type type="application/vnd.ms-pki.seccat"> <glob pattern="*.cat"/> </mime-type> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java index cdef864e0..369ba475c 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java @@ -88,6 +88,9 @@ public class OPCPackageDetector implements ZipContainerDetector { MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template"); static final MediaType XLAM = MediaType.application("vnd.ms-excel.addin.macroEnabled.12"); static final MediaType XPS = MediaType.application("vnd.ms-xpsdocument"); + + static final MediaType THREE_MF = MediaType.application("vnd.ms-package.3dmanufacturing-3dmodel+xml"); + static final Set<String> OOXML_HINTS = fillSet("word/document.xml", "_rels/.rels", "[Content_Types].xml", "ppt/presentation.xml", "ppt/slides/slide1.xml", "xl/workbook.xml", @@ -100,6 +103,8 @@ public class OPCPackageDetector implements ZipContainerDetector { "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation"; private static final String STAR_OFFICE_6_WRITER = "application/vnd.sun.xml.writer"; + private static final String THREE_MF_DOCUMENT = + "http://schemas.microsoft.com/3dmanufacturing/2013/01/3dmodel"; static Map<String, MediaType> OOXML_CONTENT_TYPES = new ConcurrentHashMap<>(); static { @@ -153,29 +158,37 @@ public class OPCPackageDetector implements ZipContainerDetector { // Check for the normal Office core document PackageRelationshipCollection core = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT); + // Otherwise check for some other Office core document types if (core.size() == 0) { core = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT); - } - if (core.size() == 0) { - core = pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT); - } - if (core.size() == 0) { - core = pkg.getRelationshipsByType(XPS_DOCUMENT); - if (core.size() == 1) { - return MediaType.application("vnd.ms-xpsdocument"); + + if (core.size() == 0) { + core = pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT); } - core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT); - if (core.size() == 1) { - return MediaType.application("vnd.ms-xpsdocument"); + if (core.size() == 0) { + core = pkg.getRelationshipsByType(XPS_DOCUMENT); + if (core.size() == 1) { + return MediaType.application("vnd.ms-xpsdocument"); + } + core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT); + if (core.size() == 1) { + return MediaType.application("vnd.ms-xpsdocument"); + } } - } - if (core.size() == 0) { - core = pkg.getRelationshipsByType( - "http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence"); - if (core.size() == 1) { - return MediaType.parse("model/vnd.dwfx+xps"); + if (core.size() == 0) { + core = pkg.getRelationshipsByType( + "http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence"); + if (core.size() == 1) { + return MediaType.parse("model/vnd.dwfx+xps"); + } + } + if (core.size() == 0) { + core = pkg.getRelationshipsByType(THREE_MF_DOCUMENT); + if (core.size() == 1) { + return THREE_MF; + } } } // If we didn't find a single core document of any type, skip detection diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java index 9ad968b9c..d35df67bf 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java @@ -262,6 +262,11 @@ public class TestContainerAwareDetector extends MultiThreadedTikaTest { assertTypeByData("testODTnotaZipFile.odt", "text/plain"); } + @Test + public void test3MF() throws Exception { + assertTypeByData("test3mf.3mf", "application/vnd.ms-package.3dmanufacturing-3dmodel+xml"); + assertTypeByNameAndData("test3mf.3mf", "application/vnd.ms-package.3dmanufacturing-3dmodel+xml"); + } @Test public void testODFDifferentOrder() throws Exception { //TIKA-3356 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf new file mode 100644 index 000000000..f7d0cf5a7 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf differ