Repository: tika Updated Branches: refs/heads/2.x 9fb7fa9ab -> de9e28cfa
TIKA-1799: upgrade to POI 3.14-beta1 for 2.x branch Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/de9e28cf Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/de9e28cf Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/de9e28cf Branch: refs/heads/2.x Commit: de9e28cfadef2b8707807175a8f0cae1cc8dedff Parents: 9fb7fa9 Author: tballison <[email protected]> Authored: Tue Feb 2 15:19:54 2016 -0500 Committer: tballison <[email protected]> Committed: Tue Feb 2 15:19:54 2016 -0500 ---------------------------------------------------------------------- CHANGES.txt | 2 ++ tika-bundle/pom.xml | 5 +++-- tika-parser-modules/pom.xml | 2 +- .../microsoft/ooxml/XSLFPowerPointExtractorDecorator.java | 9 ++++----- .../apache/tika/parser/microsoft/PowerPointParserTest.java | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index e452ce0..b2e88e2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -10,6 +10,8 @@ Release 2.0 - Future Development Release 1.13 - ??? + * Upgrade to POI 3.14-beta1 (TIKA-1799). + * Upgrade to PDFBox 1.8.11 (TIKA-1830). Release 1.12 - 01/24/2016 http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-bundle/pom.xml ---------------------------------------------------------------------- diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml index 05cc1d6..f9aa71b 100644 --- a/tika-bundle/pom.xml +++ b/tika-bundle/pom.xml @@ -129,6 +129,7 @@ commons-io, commons-exec, junrar, pdfbox,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on, poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas, + curvesapi, xmlbeans, jackcess, commons-lang, @@ -264,8 +265,8 @@ org.xml.sax.ext;resolution:=optional, org.xml.sax.helpers;resolution:=optional, org.xmlpull.v1;resolution:=optional, - schemasMicrosoftComOfficePowerpoint;resolution:=optional, - schemasMicrosoftComOfficeWord;resolution:=optional, + com.microsoft.schemas.office.powerpoint;resolution:=optional, + com.microsoft.schemas.office.word;resolution:=optional, sun.misc;resolution:=optional, sun.misc;resolution:=optional, ucar.units;resolution:=optional, ucar.httpservices;resolution:=optional, http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml index 196ce58..932a058 100644 --- a/tika-parser-modules/pom.xml +++ b/tika-parser-modules/pom.xml @@ -35,7 +35,7 @@ <url>http://tika.apache.org/</url> <properties> - <poi.version>3.13</poi.version> + <poi.version>3.14-beta1</poi.version> <!-- NOTE: sync codec version with POI --> <codec.version>1.9</codec.version> <pdfbox.version>1.8.11</pdfbox.version> http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java index 53cb6c1..71469ce 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java @@ -27,9 +27,8 @@ import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.sl.usermodel.SimpleShape; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; -import org.apache.poi.xslf.usermodel.Placeholder; import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XSLFCommentAuthors; import org.apache.poi.xslf.usermodel.XSLFComments; @@ -43,6 +42,7 @@ import org.apache.poi.xslf.usermodel.XSLFShape; import org.apache.poi.xslf.usermodel.XSLFSheet; import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.poi.xslf.usermodel.XSLFSlideLayout; +import org.apache.poi.xslf.usermodel.XSLFSlideShow; import org.apache.poi.xslf.usermodel.XSLFTable; import org.apache.poi.xslf.usermodel.XSLFTableCell; import org.apache.poi.xslf.usermodel.XSLFTableRow; @@ -151,7 +151,7 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor { for (XSLFShape sh : shapes) { if (sh instanceof XSLFTextShape) { XSLFTextShape txt = (XSLFTextShape) sh; - Placeholder ph = txt.getTextType(); + SimpleShape.Placeholder ph = txt.getTextType(); if (skipPlaceholders && ph != null) { continue; } @@ -229,10 +229,9 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor { @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<>(); - XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlideShow document = null; try { - document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future + document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java index 35b341d..b1e1241 100644 --- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java +++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java @@ -79,14 +79,14 @@ public class PowerPointParserTest extends TikaTest { for(int row=1;row<=3;row++) { //assertContains("·\tBullet " + row, content); //assertContains("\u00b7\tBullet " + row, content); - assertContains("<p>Bullet " + row, xml); + assertContains("<li>Bullet " + row, xml); } assertContains("Here is a numbered list:", xml); for(int row=1;row<=3;row++) { //assertContains(row + ")\tNumber bullet " + row, content); //assertContains(row + ") Number bullet " + row, content); // TODO: OOXMLExtractor fails to number the bullets: - assertContains("<p>Number bullet " + row, xml); + assertContains("<li>Number bullet " + row, xml); } for(int row=1;row<=2;row++) {
