This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4646-instrText-hyperlinks
in repository https://gitbox.apache.org/repos/asf/tika.git
from 1926cbb449 TIKA-4646 -- improve hyperlink extraction from ooxml
add b93f1bb3ac TIKA-4646 -- improve hyperlink extraction from ooxml
No new revisions were added by this update.
Summary of changes:
.../main/java/org/apache/tika/metadata/Office.java | 27 ++++
.../microsoft/ooxml/OOXMLTikaBodyPartHandler.java | 13 ++
.../ooxml/OOXMLWordAndPowerPointTextHandler.java | 12 ++
.../ooxml/SXWPFWordExtractorDecorator.java | 177 +++++++++++++++++++++
.../ooxml/XSSFExcelExtractorDecorator.java | 126 ++++++++++++++-
.../xslf/XSLFEventBasedPowerPointExtractor.java | 5 +
.../ooxml/xwpf/XWPFEventBasedWordExtractor.java | 5 +
.../tika/parser/microsoft/ExcelParserTest.java | 19 +++
.../parser/microsoft/ooxml/SXWPFExtractorTest.java | 50 ++++++
.../test-documents/testAttachedTemplate.docx | Bin 0 -> 2284 bytes
.../test/resources/test-documents/testDdeLink.xlsx | Bin 0 -> 3030 bytes
.../resources/test-documents/testFrameset.docx | Bin 0 -> 2328 bytes
.../resources/test-documents/testMailMerge.docx | Bin 0 -> 2306 bytes
.../resources/test-documents/testSubdocument.docx | Bin 0 -> 1980 bytes
14 files changed, 433 insertions(+), 1 deletion(-)
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAttachedTemplate.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDdeLink.xlsx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFrameset.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testMailMerge.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testSubdocument.docx