This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-2224
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/TIKA-2224 by this push:
     new 490af05  add author support and last modified timestamp support (#301)
490af05 is described below

commit 490af05612449faa3370b924e214443bf46ee95c
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Wed Dec 11 15:46:43 2019 -0600

    add author support and last modified timestamp support (#301)
    
    * add author support
    
    * clean up some bad code
    
    * add last modified timestamp support
    
    * creation timestamp and modified timestamps correctly implemented.
    
    * remove incorrect dead code
    
    * add jcid to tostring
    
    * add jcid to tostring
    
    * fix the calculation
    
    * better comment.
---
 .../apache/tika/parser/microsoft/onenote/JCID.java |  43 +++----
 .../microsoft/onenote/JCIDPropertySetTypeEnum.java |  79 +++++++++++++
 .../parser/microsoft/onenote/OneNoteParser.java    |  20 ++++
 .../microsoft/onenote/OneNoteTreeWalker.java       | 126 ++++++++++++++++++++-
 .../microsoft/onenote/OneNoteParserTest.java       |  78 +++++++++++++
 5 files changed, 325 insertions(+), 21 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
index 175ed96..745ff55 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
@@ -46,6 +46,7 @@ package org.apache.tika.parser.microsoft.onenote;
  * reserved (11 bits): MUST be zero, and MUST be ignored.
  */
 class JCID {
+    long jcid;
     long index;
     boolean isBinary;
     boolean isPropertySet;
@@ -64,6 +65,7 @@ class JCID {
     }
 
     public void loadFrom32BitIndex(long fullIndex) {
+        jcid = fullIndex;
         index = fullIndex & 0xffff;
         isBinary = ((fullIndex >> 16) & 1) == 1;
         isPropertySet = ((fullIndex >> 17) & 1) == 1;
@@ -78,66 +80,69 @@ class JCID {
     @Override
     public String toString() {
         return "JCID{" +
-          "index=" + index +
-          ", isBinary=" + isBinary +
-          ", isPropertySet=" + isPropertySet +
-          ", isGraphNode=" + isGraphNode +
-          ", isFileData=" + isFileData +
-          ", isReadOnly=" + isReadOnly +
-          '}';
+            "jcid=" + JCIDPropertySetTypeEnum.of(jcid) + " (0x" + 
Long.toHexString(jcid) + ")" +
+            ", index=" + index +
+            ", isBinary=" + isBinary +
+            ", isPropertySet=" + isPropertySet +
+            ", isGraphNode=" + isGraphNode +
+            ", isFileData=" + isFileData +
+            ", isReadOnly=" + isReadOnly +
+            '}';
+    }
+
+    public long getJcid() {
+        return jcid;
+    }
+
+    public void setJcid(long jcid) {
+        this.jcid = jcid;
     }
 
     public long getIndex() {
         return index;
     }
 
-    public JCID setIndex(long index) {
+    public void setIndex(long index) {
         this.index = index;
-        return this;
     }
 
     public boolean isBinary() {
         return isBinary;
     }
 
-    public JCID setBinary(boolean binary) {
+    public void setBinary(boolean binary) {
         isBinary = binary;
-        return this;
     }
 
     public boolean isPropertySet() {
         return isPropertySet;
     }
 
-    public JCID setPropertySet(boolean propertySet) {
+    public void setPropertySet(boolean propertySet) {
         isPropertySet = propertySet;
-        return this;
     }
 
     public boolean isGraphNode() {
         return isGraphNode;
     }
 
-    public JCID setGraphNode(boolean graphNode) {
+    public void setGraphNode(boolean graphNode) {
         isGraphNode = graphNode;
-        return this;
     }
 
     public boolean isFileData() {
         return isFileData;
     }
 
-    public JCID setFileData(boolean fileData) {
+    public void setFileData(boolean fileData) {
         isFileData = fileData;
-        return this;
     }
 
     public boolean isReadOnly() {
         return isReadOnly;
     }
 
-    public JCID setReadOnly(boolean readOnly) {
+    public void setReadOnly(boolean readOnly) {
         isReadOnly = readOnly;
-        return this;
     }
 }
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
new file mode 100644
index 0000000..4b30da0
--- /dev/null
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The JCID property set type enum from section 2.1.13 of MS-ONE
+ * specification.
+ */
+enum JCIDPropertySetTypeEnum {
+  jcidReadOnlyPersistablePropertyContainerForAuthor(0x00120001),
+  jcidPersistablePropertyContainerForTOC(0x00020001),
+  jcidPersistablePropertyContainerForTOCSection(0x00020001),
+  jcidSectionNode(0x00060007),
+  jcidPageSeriesNode(0x00060008),
+  jcidPageNode(0x0006000B),
+  jcidOutlineNode(0x0006000C),
+  jcidOutlineElementNode(0x0006000D),
+  jcidRichTextOENode(0x0006000E),
+  jcidImageNode(0x00060011),
+  jcidNumberListNode(0x00060012),
+  jcidOutlineGroup(0x00060019),
+  jcidTableNode(0x00060022),
+  jcidTableRowNode(0x00060023),
+  jcidTableCellNode(0x00060024),
+  jcidTitleNode(0x0006002C),
+  jcidPageMetaData(0x00020030),
+  jcidSectionMetaData(0x00020031),
+  jcidEmbeddedFileNode(0x00060035),
+  jcidPageManifestNode(0x00060037),
+  jcidConflictPageMetaData(0x00020038),
+  jcidVersionHistoryContent(0x0006003C),
+  jcidVersionProxy(0x0006003D),
+  jcidNoteTagSharedDefinitionContainer(0x00120043),
+  jcidRevisionMetaData(0x00020044),
+  jcidVersionHistoryMetaData(0x00020046),
+  jcidParagraphStyleObject(0x0012004D),
+  jcidParagraphStyleObjectForText(0x0012004D),
+  unknown(0x0);
+
+  private long jcid;
+
+  JCIDPropertySetTypeEnum(long jcid) {
+    this.jcid = jcid;
+  }
+
+  private static final Map<Long, JCIDPropertySetTypeEnum> BY_ID = new 
HashMap<>();
+
+  static {
+    for (JCIDPropertySetTypeEnum e : values()) {
+      BY_ID.put(e.jcid, e);
+    }
+  }
+
+  public static JCIDPropertySetTypeEnum of(Long id) {
+    JCIDPropertySetTypeEnum result = BY_ID.get(id);
+    if (result == null) {
+      return unknown;
+    }
+    return result;
+  }
+}
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
index 32b8b23..22756e3 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
@@ -21,6 +21,7 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
@@ -30,6 +31,7 @@ import org.xml.sax.SAXException;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.time.Instant;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -102,6 +104,24 @@ public class OneNoteParser extends AbstractParser {
 
             oneNoteTreeWalker.walkTree();
 
+            if (!oneNoteTreeWalker.getAuthors().isEmpty()) {
+                metadata.set(Property.externalTextBag("authors"), 
oneNoteTreeWalker.getAuthors().toArray(new String[] {}));
+            }
+            if (!oneNoteTreeWalker.getMostRecentAuthors().isEmpty()) {
+                metadata.set(Property.externalTextBag("mostRecentAuthors"), 
oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[] {}));
+            }
+            if (!oneNoteTreeWalker.getOriginalAuthors().isEmpty()) {
+                metadata.set(Property.externalTextBag("originalAuthors"), 
oneNoteTreeWalker.getOriginalAuthors().toArray(new String[] {}));
+            }
+            if (!Instant.MAX.equals(oneNoteTreeWalker.getCreationTimestamp())) 
{
+                metadata.set("creationTimestamp", 
String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
+            }
+            if 
(!Instant.MIN.equals(oneNoteTreeWalker.getLastModifiedTimestamp())) {
+                metadata.set("lastModifiedTimestamp", 
String.valueOf(oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
+            }
+            if (oneNoteTreeWalker.getLastModified() > Long.MIN_VALUE) {
+                metadata.set("lastModified", 
String.valueOf(oneNoteTreeWalker.getLastModified()));
+            }
             xhtml.endDocument();
         }
     }
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
index ff2d192..14b3745 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
@@ -33,6 +33,10 @@ import org.xml.sax.helpers.AttributesImpl;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.Month;
+import java.time.ZoneOffset;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -51,6 +55,28 @@ class OneNoteTreeWalker {
 
     private static final String P = "p";
     private static Pattern HYPERLINK_PATTERN = 
Pattern.compile("\uFDDFHYPERLINK\\s+\"([^\"]+)\"([^\"]+)$");
+
+    /**
+     * See spec MS-ONE - 2.3.1 - TIME32 - epoch of jan 1 1980 UTC.
+     * So we create this offset used to calculate number of seconds between 
this and the Instant.EPOCH.
+     */
+    private static final long TIME32_EPOCH_DIFF_1980;
+    static {
+        LocalDateTime time32Epoch1980 = LocalDateTime.of(1980, Month.JANUARY, 
1, 0, 0);
+        Instant instant = time32Epoch1980.atZone(ZoneOffset.UTC).toInstant();
+        TIME32_EPOCH_DIFF_1980 = (instant.toEpochMilli() - 
Instant.EPOCH.toEpochMilli()) / 1000;
+    }
+    /**
+     * See spec MS-DTYP - 2.3.3 - DATETIME dates are based on epoch of jan 1 
1601 UTC.
+     * So we create this offset used to calculate number of seconds between 
this and the Instant.EPOCH.
+     */
+    private static final long DATETIME_EPOCH_DIFF_1601;
+    static {
+        LocalDateTime time32Epoch1601 = LocalDateTime.of(1601, Month.JANUARY, 
1, 0, 0);
+        Instant instant = time32Epoch1601.atZone(ZoneOffset.UTC).toInstant();
+        DATETIME_EPOCH_DIFF_1601 = (instant.toEpochMilli() - 
Instant.EPOCH.toEpochMilli()) / 1000;
+    }
+
     private OneNoteTreeWalkerOptions options;
     private OneNoteDocument oneNoteDocument;
     private OneNoteDirectFileResource dif;
@@ -58,6 +84,14 @@ class OneNoteTreeWalker {
     private Pair<Long, ExtendedGUID> roleAndContext;
     private final Metadata parentMetadata;
     private final EmbeddedDocumentExtractor embeddedDocumentExtractor;
+    private final Set<String> authors = new HashSet<>();
+    private final Set<String> mostRecentAuthors = new HashSet<>();
+    private final Set<String> originalAuthors = new HashSet<>();
+    private Instant lastModifiedTimestamp = Instant.MIN;
+    private long creationTimestamp = Long.MAX_VALUE;
+    private long lastModified = Long.MIN_VALUE;
+    private boolean mostRecentAuthorProp = false;
+    private boolean originalAuthorProp = false;
 
     /**
      * Create a one tree walker.
@@ -271,7 +305,6 @@ class OneNoteTreeWalker {
             throw new TikaMemoryLimitException("File data store cb " + 
fileDataStoreObjectReference.ref.fileData.cb +
               " exceeds document size: " + dif.size());
         }
-
         handleEmbedded((int)fileDataStoreObjectReference.ref.fileData.cb);
         structure.put("fileDataStoreObjectMetadata", 
fileDataStoreObjectReference);
         return structure;
@@ -346,7 +379,43 @@ class OneNoteTreeWalker {
         propMap.put("oneNoteType", "PropertyValue");
         propMap.put("propertyId", propertyValue.propertyId.toString());
 
-        if (propertyValue.propertyId.type > 0 && propertyValue.propertyId.type 
<= 6) {
+        if (propertyValue.propertyId.propertyEnum == 
OneNotePropertyEnum.LastModifiedTimeStamp) {
+            long fullval = propertyValue.scalar;
+            Instant instant = Instant.ofEpochSecond(fullval / 10000000 + 
DATETIME_EPOCH_DIFF_1601);
+            if (instant.isAfter(lastModifiedTimestamp)) {
+                lastModifiedTimestamp = instant;
+            }
+        } else if (propertyValue.propertyId.propertyEnum == 
OneNotePropertyEnum.CreationTimeStamp) {
+            // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch 
time is per 1980, not 1970
+            long creationTs = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
+            if (creationTs < creationTimestamp) {
+                creationTimestamp = creationTs;
+            }
+        } else if (propertyValue.propertyId.propertyEnum == 
OneNotePropertyEnum.LastModifiedTime) {
+            // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch 
time is per 1980, not 1970
+            long lastMod = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
+            if (lastMod > lastModified) {
+                lastModified = lastMod;
+            }
+        } else if (propertyValue.propertyId.propertyEnum == 
OneNotePropertyEnum.Author) {
+            String author = getAuthor(propertyValue);
+            if (mostRecentAuthorProp) {
+                propMap.put("MostRecentAuthor", author);
+                mostRecentAuthors.add(author);
+            } else if (originalAuthorProp) {
+                propMap.put("OriginalAuthor", author);
+                originalAuthors.add(author);
+            } else {
+                propMap.put("Author", author);
+                authors.add(author);
+            }
+            mostRecentAuthorProp = false;
+            originalAuthorProp = false;
+        } else if (propertyValue.propertyId.propertyEnum == 
OneNotePropertyEnum.AuthorMostRecent) {
+            mostRecentAuthorProp = true;
+        } else if (propertyValue.propertyId.propertyEnum == 
OneNotePropertyEnum.AuthorOriginal) {
+            originalAuthorProp = true;
+        } else if (propertyValue.propertyId.type > 0 && 
propertyValue.propertyId.type <= 6) {
             propMap.put("scalar", propertyValue.scalar);
         } else {
             OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
@@ -425,6 +494,23 @@ class OneNoteTreeWalker {
         return propMap;
     }
 
+    /**
+     * returns a UTF-16LE author string.
+     * @param propertyValue The property value of an author.
+     * @return Resulting author string in UTF-16LE format.
+     */
+    private String getAuthor(PropertyValue propertyValue) throws IOException, 
TikaMemoryLimitException {
+        OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
+        content.reposition(propertyValue.rawData);
+        if (content.size() > dif.size()) {
+            throw new TikaMemoryLimitException("File data store cb " + 
content.size() +
+                " exceeds document size: " + dif.size());
+        }
+        ByteBuffer buf = ByteBuffer.allocate(content.size());
+        dif.read(buf);
+        return new String(buf.array(), StandardCharsets.UTF_16LE);
+    }
+
     private void handleRichEditTextUnicode(int length) throws SAXException, 
IOException, TikaException {
         //this is a null-ended UTF-16LE string
         ByteBuffer buf = ByteBuffer.allocate(length);
@@ -454,4 +540,40 @@ class OneNoteTreeWalker {
             xhtml.endElement(P);
         }
     }
+
+    public Set<String> getAuthors() {
+        return authors;
+    }
+
+    public Set<String> getMostRecentAuthors() {
+        return mostRecentAuthors;
+    }
+
+    public Set<String> getOriginalAuthors() {
+        return originalAuthors;
+    }
+
+    public Instant getLastModifiedTimestamp() {
+        return lastModifiedTimestamp;
+    }
+
+    public void setLastModifiedTimestamp(Instant lastModifiedTimestamp) {
+        this.lastModifiedTimestamp = lastModifiedTimestamp;
+    }
+
+    public long getLastModified() {
+        return lastModified;
+    }
+
+    public void setLastModified(long lastModified) {
+        this.lastModified = lastModified;
+    }
+
+    public long getCreationTimestamp() {
+        return creationTimestamp;
+    }
+
+    public void setCreationTimestamp(long creationTimestamp) {
+        this.creationTimestamp = creationTimestamp;
+    }
 }
\ No newline at end of file
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
index e117fa4..2e4f464 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
@@ -18,8 +18,13 @@ package org.apache.tika.parser.microsoft.onenote;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
+import org.junit.Assert;
 import org.junit.Test;
 
+import java.time.Instant;
+import java.util.Arrays;
+import java.util.List;
+
 public class OneNoteParserTest extends TikaTest {
 
     //TODO: rename test files testOneNote...
@@ -35,6 +40,22 @@ public class OneNoteParserTest extends TikaTest {
         Metadata metadata = new Metadata();
         String txt = getText("Sample1.one", metadata);
         assertNoJunk(txt);
+
+        List<String> authors = Arrays.asList(metadata.getValues("authors"));
+        assertContains("Olya Veselova\u0000", authors);
+        assertContains("Microsoft\u0000", authors);
+        assertContains("Scott\u0000", authors);
+        assertContains("Scott H. W. Snyder\u0000", authors);
+
+        List<String> mostRecentAuthors = 
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+        assertContains("Microsoft\u0000", mostRecentAuthors);
+
+        List<String> originalAuthors = 
Arrays.asList(metadata.getValues("originalAuthors"));
+        assertContains("Microsoft\u0000", originalAuthors);
+
+        Assert.assertEquals(Instant.ofEpochSecond(1336059427), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochMilli(1383613114000L), 
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochSecond(1446572147), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
     }
 
     @Test
@@ -47,6 +68,25 @@ public class OneNoteParserTest extends TikaTest {
         assertContains("Section1HeaderTitle", txt);
         assertContains("Section1TextArea2", txt);
         assertNoJunk(txt);
+
+        List<String> authors = Arrays.asList(metadata.getValues("authors"));
+        assertContains("Olya Veselova\u0000", authors);
+        assertContains("Microsoft\u0000", authors);
+        assertContains("Scott\u0000", authors);
+        assertContains("Scott H. W. Snyder\u0000", authors);
+        assertContains("ndipiazza\u0000", authors);
+
+        List<String> mostRecentAuthors = 
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+        assertContains("ndipiazza\u0000", mostRecentAuthors);
+        assertContains("Microsoft\u0000", mostRecentAuthors);
+
+        List<String> originalAuthors = 
Arrays.asList(metadata.getValues("originalAuthors"));
+        assertContains("Microsoft\u0000", originalAuthors);
+        assertContains("ndipiazza\u0000", mostRecentAuthors);
+
+        Assert.assertEquals(Instant.ofEpochSecond(1336059427), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochMilli(1574426629000L), 
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochSecond(1574426628), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
     }
 
     @Test
@@ -59,6 +99,25 @@ public class OneNoteParserTest extends TikaTest {
         assertContains("Section2HeaderTitle", txt);
         assertContains("Section2TextArea2", txt);
         assertNoJunk(txt);
+
+        List<String> authors = Arrays.asList(metadata.getValues("authors"));
+        assertNotContained("Olya Veselova\u0000", authors);
+        assertNotContained("Microsoft\u0000", authors);
+        assertNotContained("Scott\u0000", authors);
+        assertNotContained("Scott H. W. Snyder\u0000", authors);
+        assertContains("ndipiazza\u0000", authors);
+
+        List<String> mostRecentAuthors = 
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+        assertContains("ndipiazza\u0000", mostRecentAuthors);
+        assertNotContained("Microsoft\u0000", mostRecentAuthors);
+
+        List<String> originalAuthors = 
Arrays.asList(metadata.getValues("originalAuthors"));
+        assertNotContained("Microsoft\u0000", originalAuthors);
+        assertContains("ndipiazza\u0000", mostRecentAuthors);
+
+        Assert.assertEquals(Instant.ofEpochSecond(1574426349), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochMilli(1574426623000L), 
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochSecond(1574426624), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
     }
 
     @Test
@@ -71,6 +130,25 @@ public class OneNoteParserTest extends TikaTest {
         assertContains("Section3HeaderTitle", txt);
         assertContains("Section3TextArea2", txt);
         assertNoJunk(txt);
+
+        List<String> authors = Arrays.asList(metadata.getValues("authors"));
+        assertNotContained("Olya Veselova\u0000", authors);
+        assertNotContained("Microsoft\u0000", authors);
+        assertNotContained("Scott\u0000", authors);
+        assertNotContained("Scott H. W. Snyder\u0000", authors);
+        assertContains("ndipiazza\u0000", authors);
+
+        List<String> mostRecentAuthors = 
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+        assertContains("ndipiazza\u0000", mostRecentAuthors);
+        assertNotContained("Microsoft\u0000", mostRecentAuthors);
+
+        List<String> originalAuthors = 
Arrays.asList(metadata.getValues("originalAuthors"));
+        assertNotContained("Microsoft\u0000", originalAuthors);
+        assertContains("ndipiazza\u0000", mostRecentAuthors);
+
+        Assert.assertEquals(Instant.ofEpochSecond(1574426385), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochMilli(1574426548000L), 
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+        Assert.assertEquals(Instant.ofEpochSecond(1574426547), 
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
     }
 
     private void assertNoJunk(String txt) {

Reply via email to