This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-2224
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-2224 by this push:
new 490af05 add author support and last modified timestamp support (#301)
490af05 is described below
commit 490af05612449faa3370b924e214443bf46ee95c
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Wed Dec 11 15:46:43 2019 -0600
add author support and last modified timestamp support (#301)
* add author support
* clean up some bad code
* add last modified timestamp support
* creation timestamp and modified timestamps correctly implemented.
* remove incorrect dead code
* add jcid to tostring
* add jcid to tostring
* fix the calculation
* better comment.
---
.../apache/tika/parser/microsoft/onenote/JCID.java | 43 +++----
.../microsoft/onenote/JCIDPropertySetTypeEnum.java | 79 +++++++++++++
.../parser/microsoft/onenote/OneNoteParser.java | 20 ++++
.../microsoft/onenote/OneNoteTreeWalker.java | 126 ++++++++++++++++++++-
.../microsoft/onenote/OneNoteParserTest.java | 78 +++++++++++++
5 files changed, 325 insertions(+), 21 deletions(-)
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
index 175ed96..745ff55 100644
---
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
@@ -46,6 +46,7 @@ package org.apache.tika.parser.microsoft.onenote;
* reserved (11 bits): MUST be zero, and MUST be ignored.
*/
class JCID {
+ long jcid;
long index;
boolean isBinary;
boolean isPropertySet;
@@ -64,6 +65,7 @@ class JCID {
}
public void loadFrom32BitIndex(long fullIndex) {
+ jcid = fullIndex;
index = fullIndex & 0xffff;
isBinary = ((fullIndex >> 16) & 1) == 1;
isPropertySet = ((fullIndex >> 17) & 1) == 1;
@@ -78,66 +80,69 @@ class JCID {
@Override
public String toString() {
return "JCID{" +
- "index=" + index +
- ", isBinary=" + isBinary +
- ", isPropertySet=" + isPropertySet +
- ", isGraphNode=" + isGraphNode +
- ", isFileData=" + isFileData +
- ", isReadOnly=" + isReadOnly +
- '}';
+ "jcid=" + JCIDPropertySetTypeEnum.of(jcid) + " (0x" +
Long.toHexString(jcid) + ")" +
+ ", index=" + index +
+ ", isBinary=" + isBinary +
+ ", isPropertySet=" + isPropertySet +
+ ", isGraphNode=" + isGraphNode +
+ ", isFileData=" + isFileData +
+ ", isReadOnly=" + isReadOnly +
+ '}';
+ }
+
+ public long getJcid() {
+ return jcid;
+ }
+
+ public void setJcid(long jcid) {
+ this.jcid = jcid;
}
public long getIndex() {
return index;
}
- public JCID setIndex(long index) {
+ public void setIndex(long index) {
this.index = index;
- return this;
}
public boolean isBinary() {
return isBinary;
}
- public JCID setBinary(boolean binary) {
+ public void setBinary(boolean binary) {
isBinary = binary;
- return this;
}
public boolean isPropertySet() {
return isPropertySet;
}
- public JCID setPropertySet(boolean propertySet) {
+ public void setPropertySet(boolean propertySet) {
isPropertySet = propertySet;
- return this;
}
public boolean isGraphNode() {
return isGraphNode;
}
- public JCID setGraphNode(boolean graphNode) {
+ public void setGraphNode(boolean graphNode) {
isGraphNode = graphNode;
- return this;
}
public boolean isFileData() {
return isFileData;
}
- public JCID setFileData(boolean fileData) {
+ public void setFileData(boolean fileData) {
isFileData = fileData;
- return this;
}
public boolean isReadOnly() {
return isReadOnly;
}
- public JCID setReadOnly(boolean readOnly) {
+ public void setReadOnly(boolean readOnly) {
isReadOnly = readOnly;
- return this;
}
}
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
new file mode 100644
index 0000000..4b30da0
--- /dev/null
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The JCID property set type enum from section 2.1.13 of MS-ONE
+ * specification.
+ */
+enum JCIDPropertySetTypeEnum {
+ jcidReadOnlyPersistablePropertyContainerForAuthor(0x00120001),
+ jcidPersistablePropertyContainerForTOC(0x00020001),
+ jcidPersistablePropertyContainerForTOCSection(0x00020001),
+ jcidSectionNode(0x00060007),
+ jcidPageSeriesNode(0x00060008),
+ jcidPageNode(0x0006000B),
+ jcidOutlineNode(0x0006000C),
+ jcidOutlineElementNode(0x0006000D),
+ jcidRichTextOENode(0x0006000E),
+ jcidImageNode(0x00060011),
+ jcidNumberListNode(0x00060012),
+ jcidOutlineGroup(0x00060019),
+ jcidTableNode(0x00060022),
+ jcidTableRowNode(0x00060023),
+ jcidTableCellNode(0x00060024),
+ jcidTitleNode(0x0006002C),
+ jcidPageMetaData(0x00020030),
+ jcidSectionMetaData(0x00020031),
+ jcidEmbeddedFileNode(0x00060035),
+ jcidPageManifestNode(0x00060037),
+ jcidConflictPageMetaData(0x00020038),
+ jcidVersionHistoryContent(0x0006003C),
+ jcidVersionProxy(0x0006003D),
+ jcidNoteTagSharedDefinitionContainer(0x00120043),
+ jcidRevisionMetaData(0x00020044),
+ jcidVersionHistoryMetaData(0x00020046),
+ jcidParagraphStyleObject(0x0012004D),
+ jcidParagraphStyleObjectForText(0x0012004D),
+ unknown(0x0);
+
+ private long jcid;
+
+ JCIDPropertySetTypeEnum(long jcid) {
+ this.jcid = jcid;
+ }
+
+ private static final Map<Long, JCIDPropertySetTypeEnum> BY_ID = new
HashMap<>();
+
+ static {
+ for (JCIDPropertySetTypeEnum e : values()) {
+ BY_ID.put(e.jcid, e);
+ }
+ }
+
+ public static JCIDPropertySetTypeEnum of(Long id) {
+ JCIDPropertySetTypeEnum result = BY_ID.get(id);
+ if (result == null) {
+ return unknown;
+ }
+ return result;
+ }
+}
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
index 32b8b23..22756e3 100644
---
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
@@ -21,6 +21,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -30,6 +31,7 @@ import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.InputStream;
+import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -102,6 +104,24 @@ public class OneNoteParser extends AbstractParser {
oneNoteTreeWalker.walkTree();
+ if (!oneNoteTreeWalker.getAuthors().isEmpty()) {
+ metadata.set(Property.externalTextBag("authors"),
oneNoteTreeWalker.getAuthors().toArray(new String[] {}));
+ }
+ if (!oneNoteTreeWalker.getMostRecentAuthors().isEmpty()) {
+ metadata.set(Property.externalTextBag("mostRecentAuthors"),
oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[] {}));
+ }
+ if (!oneNoteTreeWalker.getOriginalAuthors().isEmpty()) {
+ metadata.set(Property.externalTextBag("originalAuthors"),
oneNoteTreeWalker.getOriginalAuthors().toArray(new String[] {}));
+ }
+ if (!Instant.MAX.equals(oneNoteTreeWalker.getCreationTimestamp()))
{
+ metadata.set("creationTimestamp",
String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
+ }
+ if
(!Instant.MIN.equals(oneNoteTreeWalker.getLastModifiedTimestamp())) {
+ metadata.set("lastModifiedTimestamp",
String.valueOf(oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
+ }
+ if (oneNoteTreeWalker.getLastModified() > Long.MIN_VALUE) {
+ metadata.set("lastModified",
String.valueOf(oneNoteTreeWalker.getLastModified()));
+ }
xhtml.endDocument();
}
}
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
index ff2d192..14b3745 100644
---
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
@@ -33,6 +33,10 @@ import org.xml.sax.helpers.AttributesImpl;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.Month;
+import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -51,6 +55,28 @@ class OneNoteTreeWalker {
private static final String P = "p";
private static Pattern HYPERLINK_PATTERN =
Pattern.compile("\uFDDFHYPERLINK\\s+\"([^\"]+)\"([^\"]+)$");
+
+ /**
+ * See spec MS-ONE - 2.3.1 - TIME32 - epoch of jan 1 1980 UTC.
+ * So we create this offset used to calculate number of seconds between
this and the Instant.EPOCH.
+ */
+ private static final long TIME32_EPOCH_DIFF_1980;
+ static {
+ LocalDateTime time32Epoch1980 = LocalDateTime.of(1980, Month.JANUARY,
1, 0, 0);
+ Instant instant = time32Epoch1980.atZone(ZoneOffset.UTC).toInstant();
+ TIME32_EPOCH_DIFF_1980 = (instant.toEpochMilli() -
Instant.EPOCH.toEpochMilli()) / 1000;
+ }
+ /**
+ * See spec MS-DTYP - 2.3.3 - DATETIME dates are based on epoch of jan 1
1601 UTC.
+ * So we create this offset used to calculate number of seconds between
this and the Instant.EPOCH.
+ */
+ private static final long DATETIME_EPOCH_DIFF_1601;
+ static {
+ LocalDateTime time32Epoch1601 = LocalDateTime.of(1601, Month.JANUARY,
1, 0, 0);
+ Instant instant = time32Epoch1601.atZone(ZoneOffset.UTC).toInstant();
+ DATETIME_EPOCH_DIFF_1601 = (instant.toEpochMilli() -
Instant.EPOCH.toEpochMilli()) / 1000;
+ }
+
private OneNoteTreeWalkerOptions options;
private OneNoteDocument oneNoteDocument;
private OneNoteDirectFileResource dif;
@@ -58,6 +84,14 @@ class OneNoteTreeWalker {
private Pair<Long, ExtendedGUID> roleAndContext;
private final Metadata parentMetadata;
private final EmbeddedDocumentExtractor embeddedDocumentExtractor;
+ private final Set<String> authors = new HashSet<>();
+ private final Set<String> mostRecentAuthors = new HashSet<>();
+ private final Set<String> originalAuthors = new HashSet<>();
+ private Instant lastModifiedTimestamp = Instant.MIN;
+ private long creationTimestamp = Long.MAX_VALUE;
+ private long lastModified = Long.MIN_VALUE;
+ private boolean mostRecentAuthorProp = false;
+ private boolean originalAuthorProp = false;
/**
* Create a one tree walker.
@@ -271,7 +305,6 @@ class OneNoteTreeWalker {
throw new TikaMemoryLimitException("File data store cb " +
fileDataStoreObjectReference.ref.fileData.cb +
" exceeds document size: " + dif.size());
}
-
handleEmbedded((int)fileDataStoreObjectReference.ref.fileData.cb);
structure.put("fileDataStoreObjectMetadata",
fileDataStoreObjectReference);
return structure;
@@ -346,7 +379,43 @@ class OneNoteTreeWalker {
propMap.put("oneNoteType", "PropertyValue");
propMap.put("propertyId", propertyValue.propertyId.toString());
- if (propertyValue.propertyId.type > 0 && propertyValue.propertyId.type
<= 6) {
+ if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.LastModifiedTimeStamp) {
+ long fullval = propertyValue.scalar;
+ Instant instant = Instant.ofEpochSecond(fullval / 10000000 +
DATETIME_EPOCH_DIFF_1601);
+ if (instant.isAfter(lastModifiedTimestamp)) {
+ lastModifiedTimestamp = instant;
+ }
+ } else if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.CreationTimeStamp) {
+ // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch
time is per 1980, not 1970
+ long creationTs = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
+ if (creationTs < creationTimestamp) {
+ creationTimestamp = creationTs;
+ }
+ } else if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.LastModifiedTime) {
+ // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch
time is per 1980, not 1970
+ long lastMod = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
+ if (lastMod > lastModified) {
+ lastModified = lastMod;
+ }
+ } else if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.Author) {
+ String author = getAuthor(propertyValue);
+ if (mostRecentAuthorProp) {
+ propMap.put("MostRecentAuthor", author);
+ mostRecentAuthors.add(author);
+ } else if (originalAuthorProp) {
+ propMap.put("OriginalAuthor", author);
+ originalAuthors.add(author);
+ } else {
+ propMap.put("Author", author);
+ authors.add(author);
+ }
+ mostRecentAuthorProp = false;
+ originalAuthorProp = false;
+ } else if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.AuthorMostRecent) {
+ mostRecentAuthorProp = true;
+ } else if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.AuthorOriginal) {
+ originalAuthorProp = true;
+ } else if (propertyValue.propertyId.type > 0 &&
propertyValue.propertyId.type <= 6) {
propMap.put("scalar", propertyValue.scalar);
} else {
OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
@@ -425,6 +494,23 @@ class OneNoteTreeWalker {
return propMap;
}
+ /**
+ * returns a UTF-16LE author string.
+ * @param propertyValue The property value of an author.
+ * @return Resulting author string in UTF-16LE format.
+ */
+ private String getAuthor(PropertyValue propertyValue) throws IOException,
TikaMemoryLimitException {
+ OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
+ content.reposition(propertyValue.rawData);
+ if (content.size() > dif.size()) {
+ throw new TikaMemoryLimitException("File data store cb " +
content.size() +
+ " exceeds document size: " + dif.size());
+ }
+ ByteBuffer buf = ByteBuffer.allocate(content.size());
+ dif.read(buf);
+ return new String(buf.array(), StandardCharsets.UTF_16LE);
+ }
+
private void handleRichEditTextUnicode(int length) throws SAXException,
IOException, TikaException {
//this is a null-ended UTF-16LE string
ByteBuffer buf = ByteBuffer.allocate(length);
@@ -454,4 +540,40 @@ class OneNoteTreeWalker {
xhtml.endElement(P);
}
}
+
+ public Set<String> getAuthors() {
+ return authors;
+ }
+
+ public Set<String> getMostRecentAuthors() {
+ return mostRecentAuthors;
+ }
+
+ public Set<String> getOriginalAuthors() {
+ return originalAuthors;
+ }
+
+ public Instant getLastModifiedTimestamp() {
+ return lastModifiedTimestamp;
+ }
+
+ public void setLastModifiedTimestamp(Instant lastModifiedTimestamp) {
+ this.lastModifiedTimestamp = lastModifiedTimestamp;
+ }
+
+ public long getLastModified() {
+ return lastModified;
+ }
+
+ public void setLastModified(long lastModified) {
+ this.lastModified = lastModified;
+ }
+
+ public long getCreationTimestamp() {
+ return creationTimestamp;
+ }
+
+ public void setCreationTimestamp(long creationTimestamp) {
+ this.creationTimestamp = creationTimestamp;
+ }
}
\ No newline at end of file
diff --git
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
index e117fa4..2e4f464 100644
---
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
+++
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
@@ -18,8 +18,13 @@ package org.apache.tika.parser.microsoft.onenote;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
+import org.junit.Assert;
import org.junit.Test;
+import java.time.Instant;
+import java.util.Arrays;
+import java.util.List;
+
public class OneNoteParserTest extends TikaTest {
//TODO: rename test files testOneNote...
@@ -35,6 +40,22 @@ public class OneNoteParserTest extends TikaTest {
Metadata metadata = new Metadata();
String txt = getText("Sample1.one", metadata);
assertNoJunk(txt);
+
+ List<String> authors = Arrays.asList(metadata.getValues("authors"));
+ assertContains("Olya Veselova\u0000", authors);
+ assertContains("Microsoft\u0000", authors);
+ assertContains("Scott\u0000", authors);
+ assertContains("Scott H. W. Snyder\u0000", authors);
+
+ List<String> mostRecentAuthors =
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+ assertContains("Microsoft\u0000", mostRecentAuthors);
+
+ List<String> originalAuthors =
Arrays.asList(metadata.getValues("originalAuthors"));
+ assertContains("Microsoft\u0000", originalAuthors);
+
+ Assert.assertEquals(Instant.ofEpochSecond(1336059427),
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochMilli(1383613114000L),
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochSecond(1446572147),
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
}
@Test
@@ -47,6 +68,25 @@ public class OneNoteParserTest extends TikaTest {
assertContains("Section1HeaderTitle", txt);
assertContains("Section1TextArea2", txt);
assertNoJunk(txt);
+
+ List<String> authors = Arrays.asList(metadata.getValues("authors"));
+ assertContains("Olya Veselova\u0000", authors);
+ assertContains("Microsoft\u0000", authors);
+ assertContains("Scott\u0000", authors);
+ assertContains("Scott H. W. Snyder\u0000", authors);
+ assertContains("ndipiazza\u0000", authors);
+
+ List<String> mostRecentAuthors =
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+ assertContains("ndipiazza\u0000", mostRecentAuthors);
+ assertContains("Microsoft\u0000", mostRecentAuthors);
+
+ List<String> originalAuthors =
Arrays.asList(metadata.getValues("originalAuthors"));
+ assertContains("Microsoft\u0000", originalAuthors);
+ assertContains("ndipiazza\u0000", mostRecentAuthors);
+
+ Assert.assertEquals(Instant.ofEpochSecond(1336059427),
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochMilli(1574426629000L),
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochSecond(1574426628),
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
}
@Test
@@ -59,6 +99,25 @@ public class OneNoteParserTest extends TikaTest {
assertContains("Section2HeaderTitle", txt);
assertContains("Section2TextArea2", txt);
assertNoJunk(txt);
+
+ List<String> authors = Arrays.asList(metadata.getValues("authors"));
+ assertNotContained("Olya Veselova\u0000", authors);
+ assertNotContained("Microsoft\u0000", authors);
+ assertNotContained("Scott\u0000", authors);
+ assertNotContained("Scott H. W. Snyder\u0000", authors);
+ assertContains("ndipiazza\u0000", authors);
+
+ List<String> mostRecentAuthors =
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+ assertContains("ndipiazza\u0000", mostRecentAuthors);
+ assertNotContained("Microsoft\u0000", mostRecentAuthors);
+
+ List<String> originalAuthors =
Arrays.asList(metadata.getValues("originalAuthors"));
+ assertNotContained("Microsoft\u0000", originalAuthors);
+ assertContains("ndipiazza\u0000", mostRecentAuthors);
+
+ Assert.assertEquals(Instant.ofEpochSecond(1574426349),
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochMilli(1574426623000L),
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochSecond(1574426624),
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
}
@Test
@@ -71,6 +130,25 @@ public class OneNoteParserTest extends TikaTest {
assertContains("Section3HeaderTitle", txt);
assertContains("Section3TextArea2", txt);
assertNoJunk(txt);
+
+ List<String> authors = Arrays.asList(metadata.getValues("authors"));
+ assertNotContained("Olya Veselova\u0000", authors);
+ assertNotContained("Microsoft\u0000", authors);
+ assertNotContained("Scott\u0000", authors);
+ assertNotContained("Scott H. W. Snyder\u0000", authors);
+ assertContains("ndipiazza\u0000", authors);
+
+ List<String> mostRecentAuthors =
Arrays.asList(metadata.getValues("mostRecentAuthors"));
+ assertContains("ndipiazza\u0000", mostRecentAuthors);
+ assertNotContained("Microsoft\u0000", mostRecentAuthors);
+
+ List<String> originalAuthors =
Arrays.asList(metadata.getValues("originalAuthors"));
+ assertNotContained("Microsoft\u0000", originalAuthors);
+ assertContains("ndipiazza\u0000", mostRecentAuthors);
+
+ Assert.assertEquals(Instant.ofEpochSecond(1574426385),
Instant.ofEpochSecond(Long.parseLong(metadata.get("creationTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochMilli(1574426548000L),
Instant.ofEpochMilli(Long.parseLong(metadata.get("lastModifiedTimestamp"))));
+ Assert.assertEquals(Instant.ofEpochSecond(1574426547),
Instant.ofEpochSecond(Long.parseLong(metadata.get("lastModified"))));
}
private void assertNoJunk(String txt) {