This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new ebbd895  TIKA-2950 -- add boolean metadata value to identify signed 
ooxml files
ebbd895 is described below

commit ebbd895749337f5bfaa4a653d245ba9356f3207c
Author: TALLISON <[email protected]>
AuthorDate: Fri Sep 27 11:31:32 2019 -0400

    TIKA-2950 -- add boolean metadata value to identify signed ooxml files
    
    # Conflicts:
    #   tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
---
 .../org/apache/tika/metadata/TikaCoreProperties.java    |   8 +++++---
 .../parser/microsoft/ooxml/OOXMLExtractorFactory.java   |   9 +++++++++
 .../apache/tika/parser/microsoft/ooxml/OOXMLParser.java |   3 +++
 .../tika/parser/microsoft/ooxml/OOXMLParserTest.java    |  13 +++++++++++++
 .../test/resources/test-documents/testEXCEL_signed.xlsx | Bin 0 -> 15221 bytes
 .../test/resources/test-documents/testPPT_signed.pptx   | Bin 0 -> 39761 bytes
 .../test/resources/test-documents/testWord_signed.docx  | Bin 0 -> 18245 bytes
 7 files changed, 30 insertions(+), 3 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java 
b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index 24e3ae0..616e812 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -222,7 +222,7 @@ public interface TikaCoreProperties {
       */
       Property CREATED = Property.composite(DublinCore.CREATED,
              new Property[] { 
-                     Office.CREATION_DATE, 
+                     Office.CREATION_DATE,
              });
      
      /** 
@@ -230,7 +230,7 @@ public interface TikaCoreProperties {
       * @see Office#SAVE_DATE
       */
       Property MODIFIED = Property.composite(DublinCore.MODIFIED,
-             new Property[] { 
+             new Property[] {
                      Office.SAVE_DATE,
                      Property.internalText("Last-Modified")
              });
@@ -284,5 +284,7 @@ public interface TikaCoreProperties {
                                                                      
EmbeddedResourceType.ATTACHMENT.toString(),
                                                                      
EmbeddedResourceType.INLINE.toString());
 
-    
+
+
+    Property HAS_SIGNATURE = Property.internalBoolean("hasSignature");
 }
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
index 4ac436c..141dee3 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
@@ -46,6 +46,7 @@ import org.apache.poi.xwpf.usermodel.XWPFRelation;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.EmptyParser;
 import org.apache.tika.parser.ParseContext;
@@ -120,6 +121,14 @@ public class OOXMLExtractorFactory {
                 }
             }
 
+            if (pkg != null) {
+                PackageRelationshipCollection prc =
+                        
pkg.getRelationshipsByType(OOXMLParser.SIGNATURE_RELATIONSHIP);
+                if (prc != null && prc.size() > 0) {
+                    metadata.set(TikaCoreProperties.HAS_SIGNATURE, "true");
+                }
+            }
+
             MediaType type = null;
             String mediaTypeString = metadata.get(Metadata.CONTENT_TYPE);
             if (mediaTypeString != null) {
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
index 81ec4b6..c18e500 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
@@ -41,6 +41,9 @@ public class OOXMLParser extends AbstractOfficeParser {
         ZipSecureFile.setMinInflateRatio(-1.0d);
     }
 
+    protected static final String SIGNATURE_RELATIONSHIP =
+            
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/origin";;
+
     protected static final MediaType XPS = 
MediaType.application("vnd.ms-xpsdocument");
 
     protected static final Set<MediaType> SUPPORTED_TYPES =
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 1edd89b..542073c 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -1878,6 +1878,19 @@ public class OOXMLParserTest extends TikaTest {
         //TIKA_2446
         getRecursiveMetadata("testZIP_corrupted_oom.zip");
     }
+
+    @Test
+    public void testSigned() throws Exception {
+        Metadata m = getXML("testWORD_signed.docx").metadata;
+        assertEquals("true", m.get(TikaCoreProperties.HAS_SIGNATURE));
+
+        m = getXML("testEXCEL_signed.xlsx").metadata;
+        assertEquals("true", m.get(TikaCoreProperties.HAS_SIGNATURE));
+
+        m = getXML("testPPT_signed.pptx").metadata;
+        assertEquals("true", m.get(TikaCoreProperties.HAS_SIGNATURE));
+
+    }
 }
 
 
diff --git 
a/tika-parsers/src/test/resources/test-documents/testEXCEL_signed.xlsx 
b/tika-parsers/src/test/resources/test-documents/testEXCEL_signed.xlsx
new file mode 100644
index 0000000..b7a0df2
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testEXCEL_signed.xlsx differ
diff --git a/tika-parsers/src/test/resources/test-documents/testPPT_signed.pptx 
b/tika-parsers/src/test/resources/test-documents/testPPT_signed.pptx
new file mode 100644
index 0000000..1de8f5d
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testPPT_signed.pptx differ
diff --git 
a/tika-parsers/src/test/resources/test-documents/testWord_signed.docx 
b/tika-parsers/src/test/resources/test-documents/testWord_signed.docx
new file mode 100644
index 0000000..cad54b3
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testWord_signed.docx differ

Reply via email to