This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4224
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 2a5a8266b3b78a5ea013353a7bec4010e02adcda
Author: tallison <talli...@apache.org>
AuthorDate: Mon Mar 25 15:00:07 2024 -0400

    TIKA-4224 -- add detection for 3mf
---
 .../org/apache/tika/mime/tika-mimetypes.xml        |   6 +++
 .../detect/microsoft/ooxml/OPCPackageDetector.java |  47 +++++++++++++--------
 .../tika/detect/TestContainerAwareDetector.java    |   5 +++
 .../src/test/resources/test-documents/test3mf.3mf  | Bin 0 -> 28243 bytes
 4 files changed, 41 insertions(+), 17 deletions(-)

diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 7176332ef..f6e974946 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -2065,6 +2065,12 @@
     <glob pattern="*.ost"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-package.3dmanufacturing-3dmodel+xml">
+    
<tika:link>https://en.wikipedia.org/wiki/3D_Manufacturing_Format</tika:link>
+    <_comment>3D manufacturing format</_comment>
+    <glob pattern="*.3mf"/>
+  </mime-type>
+
   <mime-type type="application/vnd.ms-pki.seccat">
     <glob pattern="*.cat"/>
   </mime-type>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
index cdef864e0..369ba475c 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
@@ -88,6 +88,9 @@ public class OPCPackageDetector implements 
ZipContainerDetector {
             
MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template");
     static final MediaType XLAM = 
MediaType.application("vnd.ms-excel.addin.macroEnabled.12");
     static final MediaType XPS = MediaType.application("vnd.ms-xpsdocument");
+
+    static final MediaType THREE_MF = 
MediaType.application("vnd.ms-package.3dmanufacturing-3dmodel+xml");
+
     static final Set<String> OOXML_HINTS =
             fillSet("word/document.xml", "_rels/.rels", "[Content_Types].xml",
                     "ppt/presentation.xml", "ppt/slides/slide1.xml", 
"xl/workbook.xml",
@@ -100,6 +103,8 @@ public class OPCPackageDetector implements 
ZipContainerDetector {
             "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation";;
     private static final String STAR_OFFICE_6_WRITER = 
"application/vnd.sun.xml.writer";
 
+    private static final String THREE_MF_DOCUMENT =
+            "http://schemas.microsoft.com/3dmanufacturing/2013/01/3dmodel";;
     static Map<String, MediaType> OOXML_CONTENT_TYPES = new 
ConcurrentHashMap<>();
 
     static {
@@ -153,29 +158,37 @@ public class OPCPackageDetector implements 
ZipContainerDetector {
         // Check for the normal Office core document
         PackageRelationshipCollection core =
                 
pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
+
         // Otherwise check for some other Office core document types
         if (core.size() == 0) {
             core = 
pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT);
-        }
-        if (core.size() == 0) {
-            core = 
pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
-        }
-        if (core.size() == 0) {
-            core = pkg.getRelationshipsByType(XPS_DOCUMENT);
-            if (core.size() == 1) {
-                return MediaType.application("vnd.ms-xpsdocument");
+
+            if (core.size() == 0) {
+                core = 
pkg.getRelationshipsByType(PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
             }
-            core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT);
-            if (core.size() == 1) {
-                return MediaType.application("vnd.ms-xpsdocument");
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(XPS_DOCUMENT);
+                if (core.size() == 1) {
+                    return MediaType.application("vnd.ms-xpsdocument");
+                }
+                core = pkg.getRelationshipsByType(OPEN_XPS_DOCUMENT);
+                if (core.size() == 1) {
+                    return MediaType.application("vnd.ms-xpsdocument");
+                }
             }
-        }
 
-        if (core.size() == 0) {
-            core = pkg.getRelationshipsByType(
-                    
"http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence";);
-            if (core.size() == 1) {
-                return MediaType.parse("model/vnd.dwfx+xps");
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(
+                        
"http://schemas.autodesk.com/dwfx/2007/relationships/documentsequence";);
+                if (core.size() == 1) {
+                    return MediaType.parse("model/vnd.dwfx+xps");
+                }
+            }
+            if (core.size() == 0) {
+                core = pkg.getRelationshipsByType(THREE_MF_DOCUMENT);
+                if (core.size() == 1) {
+                    return THREE_MF;
+                }
             }
         }
         // If we didn't find a single core document of any type, skip detection
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index 9ad968b9c..d35df67bf 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -262,6 +262,11 @@ public class TestContainerAwareDetector extends 
MultiThreadedTikaTest {
         assertTypeByData("testODTnotaZipFile.odt", "text/plain");
     }
 
+    @Test
+    public void test3MF() throws Exception {
+        assertTypeByData("test3mf.3mf", 
"application/vnd.ms-package.3dmanufacturing-3dmodel+xml");
+        assertTypeByNameAndData("test3mf.3mf", 
"application/vnd.ms-package.3dmanufacturing-3dmodel+xml");
+    }
     @Test
     public void testODFDifferentOrder() throws Exception {
         //TIKA-3356
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf
new file mode 100644
index 000000000..f7d0cf5a7
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test3mf.3mf
 differ

Reply via email to