This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new f2e13ed  Addendum to Tika 2224 - Add support for OneNote 2007 and 
earlier (#307)
f2e13ed is described below

commit f2e13edccb82aede088fef336b315a4b09a5cd1d
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Wed Jan 29 07:08:52 2020 -0800

    Addendum to Tika 2224 - Add support for OneNote 2007 and earlier (#307)
    
    * one note 2007 and previous support
    
    * add some test files
    
    * fix tests.
    
    * javadoc
    
    * remove useless javadoc
    
    * improve javadoc
    
    * Update OneNoteHeader.java
---
 .../parser/microsoft/onenote/OneNoteHeader.java    |  62 ++---
 .../onenote/OneNoteLegacyDumpStrings.java          | 128 ++++++++++
 .../parser/microsoft/onenote/OneNoteParser.java    |  94 ++++----
 .../tika/parser/microsoft/onenote/OneNotePtr.java  | 260 +++++++++++----------
 .../microsoft/onenote/OneNoteParserTest.java       |  25 ++
 .../test-documents/testOneNote2007OrEarlier1.one   | Bin 0 -> 1246998 bytes
 .../test-documents/testOneNote2007OrEarlier2.one   | Bin 0 -> 36786 bytes
 7 files changed, 377 insertions(+), 192 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
index 2ff811b..a6dc733 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.microsoft.onenote;
 
+import org.apache.tika.exception.TikaException;
+
 import java.io.Serializable;
 
 class OneNoteHeader implements Serializable {
@@ -24,10 +26,11 @@ class OneNoteHeader implements Serializable {
     GUID guidFile;
     GUID guidLegacyFileVersion;
     GUID guidFileFormat;
-    long ffvLastCode;
-    long ffvNewestCode;
-    long ffvOldestCode;
-    long ffvOldestReader;
+    long ffvLastCodeThatWroteToThisFile;
+    long ffvOldestCodeThatHasWrittenToThisFile;
+    long ffvNewestCodeThatHasWrittenToThisFile;
+    long ffvOldestCodeThatMayReadThisFile;
+
     FileChunkReference fcrLegacyFreeChunkList;
     FileChunkReference fcrLegacyTransactionLog;
     long cTransactionsInLog;
@@ -57,7 +60,15 @@ class OneNoteHeader implements Serializable {
     long buildNumberLastWroteToFile;
     long buildNumberOldestWritten;
     long buildNumberNewestWritten;
-    byte[] reserved;
+
+    /**
+     * Determine if this OneNote file pre-dates the open specs published by
+     * microsoft.
+     * @return True if file is based on the MS-ONE and MS-ONESTORE specs. 
False otherwise.
+     */
+    public boolean isLegacy() {
+        return !GUID.nil().equals(guidLegacyFileVersion);
+    }
 
     public GUID getGuidFileType() {
         return guidFileType;
@@ -95,39 +106,39 @@ class OneNoteHeader implements Serializable {
         return this;
     }
 
-    public long getFfvLastCode() {
-        return ffvLastCode;
+    public long getFfvLastCodeThatWroteToThisFile() {
+        return ffvLastCodeThatWroteToThisFile;
     }
 
-    public OneNoteHeader setFfvLastCode(long ffvLastCode) {
-        this.ffvLastCode = ffvLastCode;
+    public OneNoteHeader setFfvLastCodeThatWroteToThisFile(long 
ffvLastCodeThatWroteToThisFile) {
+        this.ffvLastCodeThatWroteToThisFile = ffvLastCodeThatWroteToThisFile;
         return this;
     }
 
-    public long getFfvNewestCode() {
-        return ffvNewestCode;
+    public long getFfvOldestCodeThatHasWrittenToThisFile() {
+        return ffvOldestCodeThatHasWrittenToThisFile;
     }
 
-    public OneNoteHeader setFfvNewestCode(long ffvNewestCode) {
-        this.ffvNewestCode = ffvNewestCode;
+    public OneNoteHeader setFfvOldestCodeThatHasWrittenToThisFile(long 
ffvOldestCodeThatHasWrittenToThisFile) {
+        this.ffvOldestCodeThatHasWrittenToThisFile = 
ffvOldestCodeThatHasWrittenToThisFile;
         return this;
     }
 
-    public long getFfvOldestCode() {
-        return ffvOldestCode;
+    public long getFfvNewestCodeThatHasWrittenToThisFile() {
+        return ffvNewestCodeThatHasWrittenToThisFile;
     }
 
-    public OneNoteHeader setFfvOldestCode(long ffvOldestCode) {
-        this.ffvOldestCode = ffvOldestCode;
+    public OneNoteHeader setFfvNewestCodeThatHasWrittenToThisFile(long 
ffvNewestCodeThatHasWrittenToThisFile) {
+        this.ffvNewestCodeThatHasWrittenToThisFile = 
ffvNewestCodeThatHasWrittenToThisFile;
         return this;
     }
 
-    public long getFfvOldestReader() {
-        return ffvOldestReader;
+    public long getFfvOldestCodeThatMayReadThisFile() {
+        return ffvOldestCodeThatMayReadThisFile;
     }
 
-    public OneNoteHeader setFfvOldestReader(long ffvOldestReader) {
-        this.ffvOldestReader = ffvOldestReader;
+    public OneNoteHeader setFfvOldestCodeThatMayReadThisFile(long 
ffvOldestCodeThatMayReadThisFile) {
+        this.ffvOldestCodeThatMayReadThisFile = 
ffvOldestCodeThatMayReadThisFile;
         return this;
     }
 
@@ -391,13 +402,4 @@ class OneNoteHeader implements Serializable {
         this.buildNumberNewestWritten = buildNumberNewestWritten;
         return this;
     }
-
-    public byte[] getReserved() {
-        return reserved;
-    }
-
-    public OneNoteHeader setReserved(byte[] reserved) {
-        this.reserved = reserved;
-        return this;
-    }
 }
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java
new file mode 100644
index 0000000..3f63576
--- /dev/null
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java
@@ -0,0 +1,128 @@
+package org.apache.tika.parser.microsoft.onenote;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * OneNote versions before OneNote 2010 do not have a published OpenSpec 
document, and the older formats are drastically
+ * incompatible with the later OpenSpecs.
+ * Therefore, we resort to scraping out useful ASCII and UTF16LE strings using 
a similar algorithm used by the GNU "strings"
+ * program.
+ *
+ * This is only needed for OneNote versions prior to 2010.
+ */
+class OneNoteLegacyDumpStrings {
+
+    // TODO - parameterize this
+    public static int MIN_STRING_LENGTH = 8;
+    // TODO - parameterize this
+    public static float ACCEPTABLE_ALPHA_TO_OTHER_CHAR_RATIO = 0.6f;
+
+    OneNoteDirectFileResource oneNoteDirectFileResource;
+    XHTMLContentHandler xhtml;
+
+    public OneNoteLegacyDumpStrings(OneNoteDirectFileResource 
oneNoteDirectFileResource, XHTMLContentHandler xhtml) {
+        this.oneNoteDirectFileResource = oneNoteDirectFileResource;
+        this.xhtml = xhtml;
+    }
+
+    /**
+     * Dump all "useful" Ascii and UTF16LE strings found in the file to the 
XHTMLContentHandler.
+     * @throws TikaException
+     * @throws SAXException
+     */
+    public void dump() throws TikaException, SAXException {
+        dumpAscii();
+        dumpUtf16LE();
+    }
+
+    /**
+     * Based on GNU "strings" implementation. Pulls out ascii text segments 
and writes them to the XHTMLContentHandler.
+     */
+    private void dumpAscii() throws SAXException, TikaException {
+        try {
+            oneNoteDirectFileResource.position(0);
+
+            ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+            for (int b = oneNoteDirectFileResource.read(); b != -1; b = 
oneNoteDirectFileResource.read()) {
+                if (b >= 0x20 && b < 0x7F) {
+                    os.write(b);
+                } else {
+                    if (os.size() >= MIN_STRING_LENGTH) {
+                        writeIfUseful(os);
+                    }
+                    os.reset();
+                }
+            }
+            if (os.size() >= MIN_STRING_LENGTH) {
+                writeIfUseful(os);
+            }
+        } catch (IOException e) {
+            throw new TikaException("Could not extract text from legacy 
OneNote document", e);
+        }
+    }
+
+    /**
+     * Based on GNU "strings" implementation. Pulls out UTF16 LE text segments 
and writes them to the XHTMLContentHandler.
+     */
+    private void dumpUtf16LE() throws SAXException, TikaException {
+        try {
+            oneNoteDirectFileResource.position(0);
+
+            ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+            long sz = oneNoteDirectFileResource.size();
+
+            for (long i = 0; i < sz - 1; ++i) {
+                oneNoteDirectFileResource.position(i);
+
+                int c1 = oneNoteDirectFileResource.read();
+                int c2 = oneNoteDirectFileResource.read();
+
+                if (c1 == 0x00 && c2 >= 0x20 && c2 < 0x7F) {
+                    ++i;
+                    os.write(c2);
+                } else {
+                    if (os.size() >= MIN_STRING_LENGTH) {
+                        writeIfUseful(os);
+                    }
+                    os.reset();
+                }
+            }
+            if (os.size() >= MIN_STRING_LENGTH) {
+                writeIfUseful(os);
+            }
+        } catch (IOException e) {
+            throw new TikaException("Could not extract text from legacy 
OneNote document", e);
+        }
+    }
+
+    /**
+     * Writes a buffer of output characters if the (num alpha chars in the 
buffer) / (number of chars in the buffer) >
+     * ACCEPTABLE_ALPHA_TO_OTHER_CHAR_RATIO.
+     * @param os Byte array output stream containing the buffer.
+     */
+    private void writeIfUseful(ByteArrayOutputStream os) throws SAXException {
+        String str = new String(os.toByteArray(), StandardCharsets.US_ASCII);
+        String [] spl = str.split(" ");
+        if (spl.length > 1) {
+            int numAlpha = 0;
+            for (int i = 0; i < str.length(); ++i) {
+                if (Character.isAlphabetic(str.charAt(i)) || 
Character.isWhitespace(i)) {
+                    ++numAlpha;
+                }
+            }
+            float ratioAlphaToOtherChars = (float) numAlpha / (float) 
str.length();
+            if (ratioAlphaToOtherChars > ACCEPTABLE_ALPHA_TO_OTHER_CHAR_RATIO) 
{
+                xhtml.characters(str);
+                xhtml.characters("\n");
+            }
+        }
+    }
+}
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
index 22756e3..6c27505 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
@@ -79,48 +79,54 @@ public class OneNoteParser extends AbstractParser {
             xhtml.startDocument();
             OneNoteDocument oneNoteDocument = 
createOneNoteDocumentFromDirectFileResource(oneNoteDirectFileResource);
 
-            metadata.set("buildNumberCreated", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberCreated));
-            metadata.set("buildNumberLastWroteToFile", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberLastWroteToFile));
-            metadata.set("buildNumberNewestWritten", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberNewestWritten));
-            metadata.set("buildNumberOldestWritten", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberOldestWritten));
-            metadata.set("cbExpectedFileLength", "0x" + 
Long.toHexString(oneNoteDocument.header.cbExpectedFileLength));
-            metadata.set("cbFreeSpaceInFreeChunkList", "0x" + 
Long.toHexString(oneNoteDocument.header.cbFreeSpaceInFreeChunkList));
-            metadata.set("cbLegacyExpectedFileLength", "0x" + 
Long.toHexString(oneNoteDocument.header.cbLegacyExpectedFileLength));
-            metadata.set("cbLegacyFreeSpaceInFreeChunkList",
+            if (!oneNoteDocument.header.isLegacy()) {
+                metadata.set("buildNumberCreated", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberCreated));
+                metadata.set("buildNumberLastWroteToFile", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberLastWroteToFile));
+                metadata.set("buildNumberNewestWritten", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberNewestWritten));
+                metadata.set("buildNumberOldestWritten", "0x" + 
Long.toHexString(oneNoteDocument.header.buildNumberOldestWritten));
+                metadata.set("cbExpectedFileLength", "0x" + 
Long.toHexString(oneNoteDocument.header.cbExpectedFileLength));
+                metadata.set("cbFreeSpaceInFreeChunkList", "0x" + 
Long.toHexString(oneNoteDocument.header.cbFreeSpaceInFreeChunkList));
+                metadata.set("cbLegacyExpectedFileLength", "0x" + 
Long.toHexString(oneNoteDocument.header.cbLegacyExpectedFileLength));
+                metadata.set("cbLegacyFreeSpaceInFreeChunkList",
                     "0x" + 
Long.toHexString(oneNoteDocument.header.cbLegacyFreeSpaceInFreeChunkList));
-            metadata.set("crcName", "0x" + 
Long.toHexString(oneNoteDocument.header.crcName));
-            metadata.set("cTransactionsInLog", "0x" + 
Long.toHexString(oneNoteDocument.header.cTransactionsInLog));
-            metadata.set("ffvLastCode", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvLastCode));
-            metadata.set("ffvNewestCode", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvNewestCode));
-            metadata.set("ffvOldestReader", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvOldestReader));
-            metadata.set("grfDebugLogFlags", "0x" + 
Long.toHexString(oneNoteDocument.header.grfDebugLogFlags));
-            metadata.set("nFileVersionGeneration", "0x" + 
Long.toHexString(oneNoteDocument.header.nFileVersionGeneration));
-            metadata.set("rgbPlaceholder", "0x" + 
Long.toHexString(oneNoteDocument.header.rgbPlaceholder));
-
-            Pair<Long, ExtendedGUID> roleAndContext = Pair.of(1L, 
ExtendedGUID.nil());
-            OneNoteTreeWalker oneNoteTreeWalker = new OneNoteTreeWalker(
+                metadata.set("crcName", "0x" + 
Long.toHexString(oneNoteDocument.header.crcName));
+                metadata.set("cTransactionsInLog", "0x" + 
Long.toHexString(oneNoteDocument.header.cTransactionsInLog));
+                metadata.set("ffvLastCodeThatWroteToThisFile", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvLastCodeThatWroteToThisFile));
+                metadata.set("ffvNewestCodeThatHasWrittenToThisFile", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvNewestCodeThatHasWrittenToThisFile));
+                metadata.set("ffvOldestCodeThatMayReadThisFile", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvOldestCodeThatMayReadThisFile));
+                metadata.set("ffvOldestCodeThatHasWrittenToThisFile", "0x" + 
Long.toHexString(oneNoteDocument.header.ffvOldestCodeThatHasWrittenToThisFile));
+                metadata.set("grfDebugLogFlags", "0x" + 
Long.toHexString(oneNoteDocument.header.grfDebugLogFlags));
+                metadata.set("nFileVersionGeneration", "0x" + 
Long.toHexString(oneNoteDocument.header.nFileVersionGeneration));
+                metadata.set("rgbPlaceholder", "0x" + 
Long.toHexString(oneNoteDocument.header.rgbPlaceholder));
+
+                Pair<Long, ExtendedGUID> roleAndContext = Pair.of(1L, 
ExtendedGUID.nil());
+                OneNoteTreeWalker oneNoteTreeWalker = new OneNoteTreeWalker(
                     new OneNoteTreeWalkerOptions(), oneNoteDocument,
                     oneNoteDirectFileResource, xhtml, metadata, context, 
roleAndContext);
 
-            oneNoteTreeWalker.walkTree();
-
-            if (!oneNoteTreeWalker.getAuthors().isEmpty()) {
-                metadata.set(Property.externalTextBag("authors"), 
oneNoteTreeWalker.getAuthors().toArray(new String[] {}));
-            }
-            if (!oneNoteTreeWalker.getMostRecentAuthors().isEmpty()) {
-                metadata.set(Property.externalTextBag("mostRecentAuthors"), 
oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[] {}));
-            }
-            if (!oneNoteTreeWalker.getOriginalAuthors().isEmpty()) {
-                metadata.set(Property.externalTextBag("originalAuthors"), 
oneNoteTreeWalker.getOriginalAuthors().toArray(new String[] {}));
-            }
-            if (!Instant.MAX.equals(oneNoteTreeWalker.getCreationTimestamp())) 
{
-                metadata.set("creationTimestamp", 
String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
-            }
-            if 
(!Instant.MIN.equals(oneNoteTreeWalker.getLastModifiedTimestamp())) {
-                metadata.set("lastModifiedTimestamp", 
String.valueOf(oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
-            }
-            if (oneNoteTreeWalker.getLastModified() > Long.MIN_VALUE) {
-                metadata.set("lastModified", 
String.valueOf(oneNoteTreeWalker.getLastModified()));
+                oneNoteTreeWalker.walkTree();
+
+                if (!oneNoteTreeWalker.getAuthors().isEmpty()) {
+                    metadata.set(Property.externalTextBag("authors"), 
oneNoteTreeWalker.getAuthors().toArray(new String[] {}));
+                }
+                if (!oneNoteTreeWalker.getMostRecentAuthors().isEmpty()) {
+                    
metadata.set(Property.externalTextBag("mostRecentAuthors"), 
oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[] {}));
+                }
+                if (!oneNoteTreeWalker.getOriginalAuthors().isEmpty()) {
+                    metadata.set(Property.externalTextBag("originalAuthors"), 
oneNoteTreeWalker.getOriginalAuthors().toArray(new String[] {}));
+                }
+                if 
(!Instant.MAX.equals(oneNoteTreeWalker.getCreationTimestamp())) {
+                    metadata.set("creationTimestamp", 
String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
+                }
+                if 
(!Instant.MIN.equals(oneNoteTreeWalker.getLastModifiedTimestamp())) {
+                    metadata.set("lastModifiedTimestamp", 
String.valueOf(oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
+                }
+                if (oneNoteTreeWalker.getLastModified() > Long.MIN_VALUE) {
+                    metadata.set("lastModified", 
String.valueOf(oneNoteTreeWalker.getLastModified()));
+                }
+            } else {
+                OneNoteLegacyDumpStrings dumpStrings = new 
OneNoteLegacyDumpStrings(oneNoteDirectFileResource, xhtml);
+                dumpStrings.dump();
             }
             xhtml.endDocument();
         }
@@ -159,12 +165,12 @@ public class OneNoteParser extends AbstractParser {
         // First parse out the header.
         oneNoteDocument.header = oneNotePtr.deserializeHeader();
 
-        // Now that we parsed the header, the "root file node list"
-
-        oneNotePtr.reposition(oneNoteDocument.header.fcrFileNodeListRoot);
-        FileNodePtr curPath = new FileNodePtr();
-        oneNotePtr.deserializeFileNodeList(oneNoteDocument.root, curPath);
-
+        if (!oneNoteDocument.header.isLegacy()) {
+            // Now that we parsed the header, the "root file node list"
+            oneNotePtr.reposition(oneNoteDocument.header.fcrFileNodeListRoot);
+            FileNodePtr curPath = new FileNodePtr();
+            oneNotePtr.deserializeFileNodeList(oneNoteDocument.root, curPath);
+        }
         return oneNoteDocument;
     }
 }
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
index 408cc27..c3fb150 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
@@ -14,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.tika.parser.microsoft.onenote;
 
 import org.apache.commons.codec.binary.Hex;
@@ -47,47 +48,48 @@ class OneNotePtr {
     public static final long FOOTER_CONST = 0x8BC215C38233BA4BL;
     public static final String UNKNOWN = "unknown";
     private static final byte[] IFNDF = new byte[] {
-      60, 0, 105, 0, 102, 0, 110, 0, 100, 0, 102, 0, 62, 0
+        60, 0, 105, 0, 102, 0, 110, 0, 100, 0, 102, 0, 62, 0
     };
 
     private static final GUID FILE_DATA_STORE_OBJ_HEADER = new GUID(new int[] {
-      0xBD,
-      0xE3,
-      0x16,
-      0xE7,
-      0x26,
-      0x65,
-      0x45,
-      0x11,
-      0xA4,
-      0xC4,
-      0x8D,
-      0x4D,
-      0x0B,
-      0x7A,
-      0x9E,
-      0xAC
+        0xBD,
+        0xE3,
+        0x16,
+        0xE7,
+        0x26,
+        0x65,
+        0x45,
+        0x11,
+        0xA4,
+        0xC4,
+        0x8D,
+        0x4D,
+        0x0B,
+        0x7A,
+        0x9E,
+        0xAC
     });
 
     private static final GUID FILE_DATA_STORE_OBJ_FOOTER = new GUID(new int[] {
-      0x71,
-      0xFB,
-      0xA7,
-      0x22,
-      0x0F,
-      0x79,
-      0x4A,
-      0x0B,
-      0xBB,
-      0x13,
-      0x89,
-      0x92,
-      0x56,
-      0x42,
-      0x6B,
-      0x24});
+        0x71,
+        0xFB,
+        0xA7,
+        0x22,
+        0x0F,
+        0x79,
+        0x4A,
+        0x0B,
+        0xBB,
+        0x13,
+        0x89,
+        0x92,
+        0x56,
+        0x42,
+        0x6B,
+        0x24});
 
     public static final int IFNDF_GUID_LENGTH = 38; // 36 char guid with a { 
and a } char.
+    public static final int NUM_RESERVED_BYTES_AT_END_OF_HEADER = 728;
     int indentLevel = 0;
 
     long offset;
@@ -111,46 +113,47 @@ class OneNotePtr {
         this.indentLevel = oneNotePtr.indentLevel;
     }
 
-    public OneNoteHeader deserializeHeader() throws IOException {
+    public OneNoteHeader deserializeHeader() throws IOException, TikaException 
{
         OneNoteHeader data = new OneNoteHeader();
         data.setGuidFileType(deserializeGUID())
-          .setGuidFile(deserializeGUID())
-          .setGuidLegacyFileVersion(deserializeGUID())
-          .setGuidFileFormat(deserializeGUID())
-          .setFfvLastCode(deserializeLittleEndianInt())
-          .setFfvNewestCode(deserializeLittleEndianInt())
-          .setFfvOldestCode(deserializeLittleEndianInt())
-          .setFfvOldestReader(deserializeLittleEndianInt())
-          .setFcrLegacyFreeChunkList(deserializeFileChunkReference64())
-          .setFcrLegacyTransactionLog(deserializeFileChunkReference64())
-          .setcTransactionsInLog(deserializeLittleEndianInt())
-          .setCbExpectedFileLength(deserializeLittleEndianInt())
-          .setRgbPlaceholder(deserializeLittleEndianLong())
-          .setFcrLegacyFileNodeListRoot(deserializeFileChunkReference64())
-          .setCbLegacyFreeSpaceInFreeChunkList(deserializeLittleEndianInt())
-          .setIgnoredZeroA(deserializeLittleEndianChar())
-          .setIgnoredZeroB(deserializeLittleEndianChar())
-          .setIgnoredZeroC(deserializeLittleEndianChar())
-          .setIgnoredZeroD(deserializeLittleEndianChar())
-          .setGuidAncestor(deserializeGUID())
-          .setCrcName(deserializeLittleEndianInt())
-          .setFcrHashedChunkList(deserializeFileChunkReference64x32())
-          .setFcrTransactionLog(deserializeFileChunkReference64x32())
-          .setFcrFileNodeListRoot(deserializeFileChunkReference64x32())
-          .setFcrFreeChunkList(deserializeFileChunkReference64x32())
-          .setCbExpectedFileLength(deserializeLittleEndianLong())
-          .setCbFreeSpaceInFreeChunkList(deserializeLittleEndianLong())
-          .setGuidFileVersion(deserializeGUID())
-          .setnFileVersionGeneration(deserializeLittleEndianLong())
-          .setGuidDenyReadFileVersion(deserializeGUID())
-          .setGrfDebugLogFlags(deserializeLittleEndianInt())
-          .setFcrDebugLogA(deserializeFileChunkReference64x32())
-          .setFcrDebugLogB(deserializeFileChunkReference64x32())
-          .setBuildNumberCreated(deserializeLittleEndianInt())
-          .setBuildNumberLastWroteToFile(deserializeLittleEndianInt())
-          .setBuildNumberOldestWritten(deserializeLittleEndianInt())
-          .setBuildNumberNewestWritten(deserializeLittleEndianInt())
-          .setReserved(deserializedReservedHeader());
+            .setGuidFile(deserializeGUID())
+            .setGuidLegacyFileVersion(deserializeGUID())
+            .setGuidFileFormat(deserializeGUID())
+            .setFfvLastCodeThatWroteToThisFile(deserializeLittleEndianInt())
+            
.setFfvOldestCodeThatHasWrittenToThisFile(deserializeLittleEndianInt())
+            
.setFfvNewestCodeThatHasWrittenToThisFile(deserializeLittleEndianInt())
+            .setFfvOldestCodeThatMayReadThisFile(deserializeLittleEndianInt())
+            .setFcrLegacyFreeChunkList(deserializeFileChunkReference64())
+            .setFcrLegacyTransactionLog(deserializeFileChunkReference64())
+            .setcTransactionsInLog(deserializeLittleEndianInt())
+            .setCbExpectedFileLength(deserializeLittleEndianInt())
+            .setRgbPlaceholder(deserializeLittleEndianLong())
+            .setFcrLegacyFileNodeListRoot(deserializeFileChunkReference64())
+            .setCbLegacyFreeSpaceInFreeChunkList(deserializeLittleEndianInt())
+            .setIgnoredZeroA(deserializeLittleEndianChar())
+            .setIgnoredZeroB(deserializeLittleEndianChar())
+            .setIgnoredZeroC(deserializeLittleEndianChar())
+            .setIgnoredZeroD(deserializeLittleEndianChar())
+            .setGuidAncestor(deserializeGUID())
+            .setCrcName(deserializeLittleEndianInt())
+            .setFcrHashedChunkList(deserializeFileChunkReference64x32())
+            .setFcrTransactionLog(deserializeFileChunkReference64x32())
+            .setFcrFileNodeListRoot(deserializeFileChunkReference64x32())
+            .setFcrFreeChunkList(deserializeFileChunkReference64x32())
+            .setCbExpectedFileLength(deserializeLittleEndianLong())
+            .setCbFreeSpaceInFreeChunkList(deserializeLittleEndianLong())
+            .setGuidFileVersion(deserializeGUID())
+            .setnFileVersionGeneration(deserializeLittleEndianLong())
+            .setGuidDenyReadFileVersion(deserializeGUID())
+            .setGrfDebugLogFlags(deserializeLittleEndianInt())
+            .setFcrDebugLogA(deserializeFileChunkReference64x32())
+            .setFcrDebugLogB(deserializeFileChunkReference64x32())
+            .setBuildNumberCreated(deserializeLittleEndianInt())
+            .setBuildNumberLastWroteToFile(deserializeLittleEndianInt())
+            .setBuildNumberOldestWritten(deserializeLittleEndianInt())
+            .setBuildNumberNewestWritten(deserializeLittleEndianInt());
+        ByteBuffer reservedBytesAtEndOfHeader = 
ByteBuffer.allocate(NUM_RESERVED_BYTES_AT_END_OF_HEADER);
+        deserializeBytes(reservedBytesAtEndOfHeader);
         return data;
     }
 
@@ -159,8 +162,29 @@ class OneNotePtr {
         for (int i = 0; i < 16; ++i) {
             guid[i] = dif.read();
         }
+        int[] guid2 = new int[16];
+        // re-order [0,1,2,3] to little endian
+        guid2[0] = guid[3];
+        guid2[1] = guid[2];
+        guid2[2] = guid[1];
+        guid2[3] = guid[0];
+        // re-order [4,5,6,7] to little endian
+        guid2[4] = guid[5];
+        guid2[5] = guid[4];
+        guid2[6] = guid[7];
+        guid2[7] = guid[6];
+        // the rest is already in right order.
+        guid2[8] = guid[8];
+        guid2[9] = guid[9];
+        guid2[10] = guid[10];
+        guid2[11] = guid[11];
+        guid2[12] = guid[12];
+        guid2[13] = guid[13];
+        guid2[14] = guid[14];
+        guid2[15] = guid[15];
+
         offset = dif.position();
-        return new GUID(guid);
+        return new GUID(guid2);
     }
 
     private byte[] deserializedReservedHeader() throws IOException {
@@ -227,7 +251,7 @@ class OneNotePtr {
         int c1 = dif.read();
         int c2 = dif.read();
         long res = (((c1 & 0xff) << 0) +
-          ((c2 & 0xff) << 8));
+            ((c2 & 0xff) << 8));
         offset = dif.position();
         return res;
     }
@@ -268,7 +292,7 @@ class OneNotePtr {
      * @return The resulting one note pointer after node lists are all parsed.
      */
     public OneNotePtr internalDeserializeFileNodeList(OneNotePtr ptr, 
FileNodeList fileNodeList, FileNodePtr curPath) throws IOException,
-      TikaException {
+        TikaException {
         OneNotePtr localPtr = new OneNotePtr(document, dif);
         FileNodePtrBackPush bp = new FileNodePtrBackPush(curPath);
         try {
@@ -305,7 +329,7 @@ class OneNotePtr {
      * @param curPath The current FileNodePtr.
      */
     void deserializeFileNodeListFragment(FileNodeList data, FileChunkReference 
next, FileNodePtr curPath) throws IOException,
-      TikaException {
+        TikaException {
         data.fileNodeListHeader = deserializeFileNodeListHeader();
         boolean terminated = false;
         while (offset + 24 <= end) { // while there are at least 24 bytes free
@@ -334,7 +358,7 @@ class OneNotePtr {
         next.stp = nextChunkRef.stp;
         if (terminated) {
             LOG.debug("{}Chunk terminator found NextChunkRef.cb={}, 
NextChunkRef.stp={}, Offset={}, End={}", getIndent(), nextChunkRef.cb
-              , nextChunkRef.stp, offset, end);
+                , nextChunkRef.stp, offset, end);
             // TODO check that next is OK
         }
         long footer = deserializeLittleEndianLong();
@@ -381,7 +405,7 @@ class OneNotePtr {
         } else if (data.id == FndStructureConstants.ObjectGroupEndFND) {
             // no data
         } else if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND
-          || data.id == FndStructureConstants.ObjectSpaceManifestListStartFND) 
{
+            || data.id == 
FndStructureConstants.ObjectSpaceManifestListStartFND) {
             if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND) {
                 data.idDesc = "gosidRoot";
             } else {
@@ -419,7 +443,7 @@ class OneNotePtr {
             data.gctxid = ExtendedGUID.nil();
             document.registerRevisionManifest(data);
         } else if (data.id == FndStructureConstants.RevisionManifestStart6FND
-          || data.id == FndStructureConstants.RevisionManifestStart7FND) {
+            || data.id == FndStructureConstants.RevisionManifestStart7FND) {
             data.gosid = deserializeExtendedGUID(); // the rid
             data.idDesc = "rid";
             //LOG.debug("{}gosid {}", getIndent(), 
data.gosid.toString().c_str());
@@ -443,13 +467,13 @@ class OneNotePtr {
             data.subType.globalIdTableEntryFNDX.guid = deserializeGUID();
 
             
document.revisionMap.get(document.currentRevision).globalId.put(data.subType.globalIdTableEntryFNDX.index,
-              data.subType.globalIdTableEntryFNDX.guid);
+                data.subType.globalIdTableEntryFNDX.guid);
         } else if (data.id == FndStructureConstants.GlobalIdTableEntry2FNDX) {
             data.subType.globalIdTableEntry2FNDX.indexMapFrom = 
deserializeLittleEndianInt();
             data.subType.globalIdTableEntry2FNDX.indexMapTo = 
deserializeLittleEndianInt();
 
             ExtendedGUID dependentRevision =
-              document.revisionMap.get(document.currentRevision).dependent;
+                document.revisionMap.get(document.currentRevision).dependent;
             // Get the compactId from the revisionMap's globalId map.
             GUID compactId = 
document.revisionMap.get(dependentRevision).globalId.get(data.subType.globalIdTableEntry2FNDX.indexMapFrom);
             if (compactId == null) {
@@ -471,10 +495,10 @@ class OneNotePtr {
                     throw new TikaException("COMPACT_ID_MISSING");
                 }
                 
document.revisionMap.get(document.currentRevision).globalId.put(data.subType.globalIdTableEntry3FNDX.indexCopyToStart
 + i
-                  , compactId);
+                    , compactId);
             }
         } else if (data.id == 
FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX
-          || data.id == 
FndStructureConstants.CanRevise.ObjectRevisionWithRefCount2FNDX) {
+            || data.id == 
FndStructureConstants.CanRevise.ObjectRevisionWithRefCount2FNDX) {
             data.subType.objectRevisionWithRefCountFNDX.oid = 
deserializeCompactID(); // the oid
 
             if (data.id == 
FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX) {
@@ -501,7 +525,7 @@ class OneNotePtr {
             data.subType.rootObjectReference.rootObjectReferenceBase.rootRole 
= deserializeLittleEndianInt();
 
             LOG.debug("{}Root role {}", getIndent(),
-              
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
+                
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
         } else if (data.id == FndStructureConstants.RootObjectReference3FND) {
             data.idDesc = "oidRoot";
             data.gosid = deserializeExtendedGUID();
@@ -509,9 +533,9 @@ class OneNotePtr {
             data.subType.rootObjectReference.rootObjectReferenceBase.rootRole 
= deserializeLittleEndianInt();
 
             LOG.debug("{}Root role {}", getIndent(),
-              
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
+                
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
         } else if (data.id == FndStructureConstants.RevisionRoleDeclarationFND
-          || data.id == 
FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
+            || data.id == 
FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
             data.gosid = deserializeExtendedGUID();
 
             data.subType.revisionRoleDeclaration.revisionRole = 
deserializeLittleEndianInt();
@@ -521,8 +545,8 @@ class OneNotePtr {
 
             }
             document.registerAdditionalRevisionRole(data.gosid,
-              data.subType.revisionRoleDeclaration.revisionRole,
-              data.gctxid);
+                data.subType.revisionRoleDeclaration.revisionRole,
+                data.gctxid);
             // FIXME: deal with ObjectDataEncryptionKey
         } else if (data.id == 
FndStructureConstants.ObjectInfoDependencyOverridesFND) {
             OneNotePtr content = new OneNotePtr(this);
@@ -544,22 +568,22 @@ class OneNotePtr {
             data.subType.fileDataStoreObjectReference.ref = 
fileDataStorePtr.deserializeFileDataStoreObject();
 
         } else if (data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX
-          || data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX
-          || data.id == 
FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND
-          || data.id == 
FndStructureConstants.CanRevise.ObjectDeclaration2LargeRefCountFND
-          || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND
-          || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
+            || data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX
+            || data.id == 
FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND
+            || data.id == 
FndStructureConstants.CanRevise.ObjectDeclaration2LargeRefCountFND
+            || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND
+            || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
             
data.subType.objectDeclarationWithRefCount.body.file_data_store_reference =
-              false;
+                false;
             if (data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX
-              || data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX) {
+                || data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX) {
                 data.subType.objectDeclarationWithRefCount.body = 
deserializeObjectDeclarationWithRefCountBody();
             } else { // one of the other 4 that use the ObjectDeclaration2Body
                 data.subType.objectDeclarationWithRefCount.body = 
deserializeObjectDeclaration2Body();
             }
             if (data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX
-              || data.id == 
FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND
-              || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND) {
+                || data.id == 
FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND
+                || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND) {
                 long refCnt = deserializeLittleEndianChar();
                 data.subType.objectDeclarationWithRefCount.cRef = refCnt;
             } else {
@@ -567,7 +591,7 @@ class OneNotePtr {
             }
 
             if (data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND
-              || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
+                || data.id == 
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
                 ByteBuffer md5Buffer = ByteBuffer.allocate(16);
                 deserializeBytes(md5Buffer);
                 data.subType.objectDeclarationWithRefCount.readOnly.md5 = 
md5Buffer.array();
@@ -576,9 +600,9 @@ class OneNotePtr {
             postprocessObjectDeclarationContents(data, curPath);
 
             LOG.debug("{}Ref Count JCID {}", getIndent(),
-              data.subType.objectDeclarationWithRefCount.body.jcid);
+                data.subType.objectDeclarationWithRefCount.body.jcid);
         } else if (data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND
-          || data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationFileData3LargeRefCountFND) {
+            || data.id == 
FndStructureConstants.CanRevise.ObjectDeclarationFileData3LargeRefCountFND) {
             data.subType.objectDeclarationWithRefCount.body.oid = 
deserializeCompactID();
 
             long jcid = deserializeLittleEndianInt();
@@ -606,10 +630,10 @@ class OneNotePtr {
             byte[] dataSpaceBufferBytes = dataSpaceBuffer.array();
             offset += dataSpaceBufferBytes.length;
             if (dataSpaceBufferBytes.length == (IFNDF_GUID_LENGTH * 2 + 
IFNDF.length) &&
-              Arrays.equals(IFNDF, Arrays.copyOfRange(dataSpaceBufferBytes, 0, 
IFNDF.length))) {
+                Arrays.equals(IFNDF, Arrays.copyOfRange(dataSpaceBufferBytes, 
0, IFNDF.length))) {
                 
data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = 
true;
                 GUID guid = 
GUID.fromCurlyBraceUTF16Bytes(Arrays.copyOfRange(dataSpaceBufferBytes, 
IFNDF.length,
-                  dataSpaceBufferBytes.length));
+                    dataSpaceBufferBytes.length));
                 ExtendedGUID extendedGUID = new ExtendedGUID(guid, 0);
                 FileChunkReference fileChunk = 
document.getAssocGuidToRef(extendedGUID);
                 if (fileChunk == null) {
@@ -671,11 +695,11 @@ class OneNotePtr {
         --indentLevel;
         if (data.gosid.equals(ExtendedGUID.nil())) {
             LOG.debug("{}End Node {} ({}) - Offset={}, End={}", getIndent(), 
FndStructureConstants.nameOf(data.id), (int) data.id, offset
-              , end);
+                , end);
         } else {
             LOG.debug("{}End Node {} ({}) {}:[{}] - Offset={}, End={}", 
getIndent(), FndStructureConstants.nameOf(data.id), (int) data.id
-              , data.idDesc,
-              data.gosid, offset, end);
+                , data.idDesc,
+                data.gosid, offset, end);
         }
         return data;
     }
@@ -911,7 +935,7 @@ class OneNotePtr {
             data.subType.objectDeclarationWithRefCount.objectRef = 
objectSpacePropSetPtr.deserializeObjectSpaceObjectPropSet();
             ObjectStreamCounters streamCounters = new ObjectStreamCounters();
             data.propertySet = 
objectSpacePropSetPtr.deserializePropertySet(streamCounters,
-              data.subType.objectDeclarationWithRefCount.objectRef);
+                data.subType.objectDeclarationWithRefCount.objectRef);
         } else {
             if 
(!data.subType.objectDeclarationWithRefCount.body.jcid.isFileData) {
                 throw new TikaException("JCID must be file data when 
!isObjectSpaceObjectPropSet.");
@@ -929,12 +953,12 @@ class OneNotePtr {
     }
 
     private PropertySet deserializePropertySet(ObjectStreamCounters counters, 
ObjectSpaceObjectPropSet streams) throws IOException,
-      TikaException {
+        TikaException {
         PropertySet data = new PropertySet();
         long count = deserializeLittleEndianShort();
         data.rgPridsData = Stream.generate(PropertyValue::new)
-          .limit((int) count)
-          .collect(Collectors.toList());
+            .limit((int) count)
+            .collect(Collectors.toList());
         for (int i = 0; i < count; ++i) {
             data.rgPridsData.get(i).propertyId = deserializePropertyID();
             LOG.debug("{}Property {}", getIndent(), 
data.rgPridsData.get(i).propertyId);
@@ -942,7 +966,7 @@ class OneNotePtr {
         LOG.debug("{}{} elements in property set:", getIndent(), count);
         for (int i = 0; i < count; ++i) {
             data.rgPridsData.set(i, deserializePropertyValueFromPropertyID(
-              data.rgPridsData.get(i).propertyId, streams, counters));
+                data.rgPridsData.get(i).propertyId, streams, counters));
         }
         LOG.debug("");
         return data;
@@ -1033,7 +1057,7 @@ class OneNotePtr {
                 case 0xa:
                 case 0xc:
                     if (type == 0x8 || type == 0xa
-                      || type == 0xc) {
+                        || type == 0xc) {
                         val32 = 1;
                     }
                 {
@@ -1055,7 +1079,7 @@ class OneNotePtr {
                         if (index < stream.size()) {
                             data.compactIDs.add(stream.get(index));
                             LOG.debug(" {}[{}]", xtype,
-                              data.compactIDs.get(data.compactIDs.size() - 1));
+                                data.compactIDs.get(data.compactIDs.size() - 
1));
                         } else {
                             throw new TikaException("SEGV");
                         }
@@ -1068,8 +1092,8 @@ class OneNotePtr {
                     OneNotePropertyId propId = deserializePropertyID();
                     LOG.debug(" UnifiedSubPropertySet {} {}", val32, propId);
                     data.propertySet.rgPridsData = 
Stream.generate(PropertyValue::new)
-                      .limit((int) val32)
-                      .collect(Collectors.toList());
+                        .limit((int) val32)
+                        .collect(Collectors.toList());
                     for (int i = 0; i < val32; ++i) {
                         try {
                             data.propertySet.rgPridsData.set(i, 
deserializePropertyValueFromPropertyID(propId, streams, counters));
@@ -1119,7 +1143,7 @@ class OneNotePtr {
     }
 
     private ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs 
deserializeObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs() throws IOException
-      , TikaException {
+        , TikaException {
         ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs data = new 
ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
         long header = deserializeLittleEndianInt();
         data.count = header & 0xffffff;
@@ -1127,10 +1151,10 @@ class OneNotePtr {
         data.extendedStreamsPresent = ((header >> 30) & 0x1);
         if (LOG.isDebugEnabled()) {
             LOG.debug(
-              "{}Deserialized Stream Header count: {} OsidsNotPresent {} 
Extended {}",
-              getIndent(), data.count,
-              data.osidsStreamNotPresent,
-              data.extendedStreamsPresent);
+                "{}Deserialized Stream Header count: {} OsidsNotPresent {} 
Extended {}",
+                getIndent(), data.count,
+                data.osidsStreamNotPresent,
+                data.extendedStreamsPresent);
         }
         for (int i = 0; i < data.count; ++i) {
             CompactID cid;
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
index c72ebec..d5d1639 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
@@ -174,6 +174,31 @@ public class OneNoteParserTest extends TikaTest {
     }
 
     @Test
+    public void testOneNote2007OrEarlier1() throws Exception {
+        Metadata metadata = new Metadata();
+        String txt = getText("testOneNote2007OrEarlier1.one", metadata);
+
+        // utf-16 LE text
+        assertContains("One note is the application.  The notebooks are the 
files within the application.  " +
+            "Each notebook can have an unlimited amount of sections and pages. 
 To create a new notebook, go to file, new, computer, " +
+            "and name it.  It will go to my documents, oneNote Notebooks 
folder.  The notebook doesn't close and you don't have to save.  " +
+            "If it closes, you can go back to it and it will open at the same 
place you left off.  If you are offline and the notebook is " +
+            "being stored on a sharepoint site, you can work on it and it will 
sync when you go back online.", txt);
+        // ascii text
+        assertContains("Correlation between Outlook and OneNote", txt);
+    }
+
+    @Test
+    public void testOneNote2007OrEarlier2() throws Exception {
+        Metadata metadata = new Metadata();
+        String txt = getText("testOneNote2007OrEarlier2.one", metadata);
+
+        // ascii text
+        assertContains("In Outlook meeting notice, select One Note Meeting 
Notes and then select the \"page\" you want to link/share", txt);
+
+    }
+
+    @Test
     public void testOneNoteEmbeddedWordDoc() throws Exception {
         List<Metadata> metadataList = 
getRecursiveMetadata("testOneNoteEmbeddedWordDoc.one");
 
diff --git 
a/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier1.one 
b/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier1.one
new file mode 100755
index 0000000..5ff7128
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier1.one 
differ
diff --git 
a/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier2.one 
b/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier2.one
new file mode 100755
index 0000000..ae8f3d5
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier2.one 
differ

Reply via email to