Repository: tika Updated Branches: refs/heads/2.x dc4ca999c -> 251f68002
Upgrade to POI 3.14-final (TIKA-1853). Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/251f6800 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/251f6800 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/251f6800 Branch: refs/heads/2.x Commit: 251f6800230efb5a7dd18353fd559386c4851b46 Parents: dc4ca99 Author: tballison <[email protected]> Authored: Mon Mar 7 20:22:53 2016 -0500 Committer: tballison <[email protected]> Committed: Mon Mar 7 20:22:53 2016 -0500 ---------------------------------------------------------------------- CHANGES.txt | 2 +- tika-parser-modules/pom.xml | 4 ++-- .../parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java | 4 ++-- .../tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/251f6800/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index d4611f0..03b328f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -21,7 +21,7 @@ Release 1.13 - ??? is still <scope>provided</scope>. You need to include this dependency in order to parse sqlite files. - * Upgrade to POI 3.14-beta1 (TIKA-1799). + * Upgrade to POI 3.14-final (TIKA-1853). * Upgrade to PDFBox 1.8.11 (TIKA-1830). http://git-wip-us.apache.org/repos/asf/tika/blob/251f6800/tika-parser-modules/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml index 8a3435a..6342ebb 100644 --- a/tika-parser-modules/pom.xml +++ b/tika-parser-modules/pom.xml @@ -35,9 +35,9 @@ <url>http://tika.apache.org/</url> <properties> - <poi.version>3.14-beta1</poi.version> + <poi.version>3.14</poi.version> <!-- NOTE: sync codec version with POI --> - <codec.version>1.9</codec.version> + <codec.version>1.10</codec.version> <pdfbox.version>1.8.11</pdfbox.version> </properties> http://git-wip-us.apache.org/repos/asf/tika/blob/251f6800/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java index 71469ce..102a05e 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java @@ -27,7 +27,7 @@ import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.sl.usermodel.SimpleShape; +import org.apache.poi.sl.usermodel.Placeholder; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XSLFCommentAuthors; @@ -151,7 +151,7 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor { for (XSLFShape sh : shapes) { if (sh instanceof XSLFTextShape) { XSLFTextShape txt = (XSLFTextShape) sh; - SimpleShape.Placeholder ph = txt.getTextType(); + Placeholder ph = txt.getTextType(); if (skipPlaceholders && ph != null) { continue; } http://git-wip-us.apache.org/repos/asf/tika/blob/251f6800/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java index c5ca3d9..6117690 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java @@ -187,6 +187,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { throws IOException, SAXException { InputSource sheetSource = new InputSource(sheetInputStream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); + saxFactory.setNamespaceAware(true); try { SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader();
