This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-2224
in repository https://gitbox.apache.org/repos/asf/tika.git.
from 4bfda93 TIKA-2224 - create initial working branch for development of
the one note parser
add f3e5937 OneNote Parser
add 0543d1f add support for .one files to the tests.
new c33e643 Merge branch 'TIKA-2224-ndipiazza' of
https://github.com/nddipiazza/tika into nddipiazza-TIKA-2224-ndipiazza merge
branch for integration of TIKA-2224
new 079fab1 TIKA-2224 - add license headers fix whitespace integrate
embedded file handling handle href
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../java/org/apache/tika/TikaDetectionTest.java | 1 +
.../microsoft/onenote/CheckedFileNodePushBack.java | 45 +-
.../CompactID.java} | 52 +-
.../tika/parser/microsoft/onenote/Error.java | 16 +-
.../parser/microsoft/onenote/ExtendedGUID.java | 86 ++
.../microsoft/onenote/FileChunkReference.java | 99 ++
.../microsoft/onenote/FileDataStoreObject.java | 19 +-
.../onenote/FileDataStoreObjectReference.java | 18 +-
.../tika/parser/microsoft/onenote/FileNode.java | 278 +++++
.../parser/microsoft/onenote/FileNodeList.java | 37 +-
.../microsoft/onenote/FileNodeListHeader.java | 92 ++
.../tika/parser/microsoft/onenote/FileNodePtr.java | 65 ++
.../microsoft/onenote/FileNodePtrBackPush.java | 18 +-
.../parser/microsoft/onenote/FileNodeUnion.java | 140 +++
.../microsoft/onenote/FndStructureConstants.java | 166 +++
.../apache/tika/parser/microsoft/onenote/GUID.java | 129 +++
.../microsoft/onenote/GlobalIdTableEntry2FNDX.java | 27 +-
.../microsoft/onenote/GlobalIdTableEntry3FNDX.java | 34 +
.../microsoft/onenote/GlobalIdTableEntryFNDX.java | 24 +
.../microsoft/onenote/GlobalIdTableStartFNDX.java | 18 +-
.../onenote/IndentUtil.java} | 17 +-
.../tika/parser/microsoft/onenote/Int24.java | 24 +-
.../apache/tika/parser/microsoft/onenote/JCID.java | 143 +++
.../onenote/ObjectDeclarationWithRefCount.java | 75 ++
.../onenote/ObjectDeclarationWithRefCountBody.java | 73 ++
.../onenote/ObjectInfoDependencyOverrideData.java | 74 ++
.../onenote/ObjectInfoDependencyOverrides.java | 18 +-
.../onenote/ObjectRevisionWithRefCountFNDX.java | 70 ++
.../onenote/ObjectSpaceObjectPropSet.java | 60 +
...ctSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java | 63 ++
.../microsoft/onenote/ObjectStreamCounters.java | 15 +-
.../onenote/OneNoteDirectFileResource.java | 87 ++
.../parser/microsoft/onenote/OneNoteDocument.java | 138 +++
.../parser/microsoft/onenote/OneNoteHeader.java | 403 +++++++
.../parser/microsoft/onenote/OneNoteParser.java | 113 +-
.../microsoft/onenote/OneNotePropertyEnum.java | 210 ++++
.../microsoft/onenote/OneNotePropertyId.java | 86 ++
.../tika/parser/microsoft/onenote/OneNotePtr.java | 1158 ++++++++++++++++++++
.../microsoft/onenote/OneNoteTreeWalker.java | 459 ++++++++
.../onenote/OneNoteTreeWalkerOptions.java | 88 ++
.../parser/microsoft/onenote/PropertyIDType.java | 13 +-
.../tika/parser/microsoft/onenote/PropertySet.java | 95 ++
.../parser/microsoft/onenote/PropertyValue.java | 137 +++
.../tika/parser/microsoft/onenote/Revision.java | 72 ++
.../parser/microsoft/onenote/RevisionManifest.java | 60 +
.../onenote/RevisionManifestListStart.java | 18 +-
.../microsoft/onenote/RevisionRoleDeclaration.java | 18 +-
.../microsoft/onenote/RootObjectReference.java | 31 +-
.../microsoft/onenote/RootObjectReferenceBase.java | 18 +-
.../services/org.apache.tika.parser.Parser | 1 +
.../java/org/apache/tika/mime/TestMimeTypes.java | 7 +-
.../microsoft/onenote/OneNoteParserTest.java | 84 +-
.../tika/parser/onenote/OneNoteParserTest.java | 68 ++
.../src/test/resources/test-documents/Sample1.one | Bin 0 -> 360280 bytes
.../test-documents/Section1SheetTitle.one | Bin 0 -> 435128 bytes
.../test-documents/Section2SheetTitle.one | Bin 0 -> 35344 bytes
.../test-documents/Section3SheetTitle.one | Bin 0 -> 43176 bytes
57 files changed, 5113 insertions(+), 247 deletions(-)
copy tika-core/src/main/java/org/apache/tika/mime/AndClause.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/CheckedFileNodePushBack.java
(55%)
copy
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/{ooxml/ParagraphProperties.java
=> onenote/CompactID.java} (50%)
copy
tika-batch/src/main/java/org/apache/tika/batch/StatusReporterFutureResult.java
=>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java
(77%)
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileChunkReference.java
copy tika-core/src/main/java/org/apache/tika/exception/TikaException.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileDataStoreObject.java
(65%)
copy tika-core/src/main/java/org/apache/tika/exception/TikaException.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileDataStoreObjectReference.java
(73%)
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNode.java
copy tika-core/src/main/java/org/apache/tika/mime/OrClause.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
(57%)
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeListHeader.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtr.java
copy tika-example/src/main/java/org/apache/tika/example/Pharmacy.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
(71%)
mode change 100755 => 100644
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeUnion.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FndStructureConstants.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java
copy
tika-core/src/main/java/org/apache/tika/exception/EncryptedDocumentException.java
=>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntry2FNDX.java
(61%)
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntry3FNDX.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntryFNDX.java
copy tika-core/src/main/java/org/apache/tika/exception/TikaException.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableStartFNDX.java
(74%)
copy
tika-parsers/src/main/java/org/apache/tika/parser/{utils/DataURISchemeParseException.java
=> microsoft/onenote/IndentUtil.java} (76%)
copy tika-example/src/main/java/org/apache/tika/example/Pharmacy.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Int24.java
(70%)
mode change 100755 => 100644
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCount.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCountBody.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectInfoDependencyOverrideData.java
copy tika-example/src/main/java/org/apache/tika/example/Pharmacy.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectInfoDependencyOverrides.java
(70%)
mode change 100755 => 100644
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectRevisionWithRefCountFNDX.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectPropSet.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java
copy
tika-batch/src/test/java/org/apache/tika/batch/fs/strawman/StrawmanTest.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectStreamCounters.java
(75%)
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDirectFileResource.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyEnum.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyId.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalkerOptions.java
copy
tika-batch/src/main/java/org/apache/tika/batch/StatusReporterFutureResult.java
=>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyIDType.java
(80%)
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertySet.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyValue.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Revision.java
create mode 100644
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionManifest.java
copy tika-example/src/main/java/org/apache/tika/example/Pharmacy.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionManifestListStart.java
(70%)
mode change 100755 => 100644
copy tika-example/src/main/java/org/apache/tika/example/Pharmacy.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionRoleDeclaration.java
(72%)
mode change 100755 => 100644
copy tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RootObjectReference.java
(55%)
copy tika-core/src/main/java/org/apache/tika/exception/TikaException.java =>
tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RootObjectReferenceBase.java
(74%)
create mode 100644
tika-parsers/src/test/java/org/apache/tika/parser/onenote/OneNoteParserTest.java
create mode 100644 tika-parsers/src/test/resources/test-documents/Sample1.one
create mode 100755
tika-parsers/src/test/resources/test-documents/Section1SheetTitle.one
create mode 100755
tika-parsers/src/test/resources/test-documents/Section2SheetTitle.one
create mode 100755
tika-parsers/src/test/resources/test-documents/Section3SheetTitle.one