Repository: tika Updated Branches: refs/heads/master 256209a84 -> 25cee5449
TIKA-1799: upgrade to POI 3.14-beta1 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/25cee544 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/25cee544 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/25cee544 Branch: refs/heads/master Commit: 25cee54499126de2b90f6bd5bde8de470b422349 Parents: 256209a Author: tballison <[email protected]> Authored: Tue Feb 2 15:11:17 2016 -0500 Committer: tballison <[email protected]> Committed: Tue Feb 2 15:11:17 2016 -0500 ---------------------------------------------------------------------- CHANGES.txt | 2 ++ tika-bundle/pom.xml | 6 +++--- tika-parsers/pom.xml | 2 +- .../microsoft/ooxml/XSLFPowerPointExtractorDecorator.java | 8 ++++---- .../apache/tika/parser/microsoft/PowerPointParserTest.java | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/25cee544/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 6b6319a..19cd5c9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Release 1.13 - ??? + * Upgrade to POI 3.14-beta1 (TIKA-1799). + * Upgrade to PDFBox 1.8.11 (TIKA-1830). Release 1.12 - 01/24/2016 http://git-wip-us.apache.org/repos/asf/tika/blob/25cee544/tika-bundle/pom.xml ---------------------------------------------------------------------- diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml index 6303a5b..6a3a742 100644 --- a/tika-bundle/pom.xml +++ b/tika-bundle/pom.xml @@ -129,6 +129,7 @@ commons-io, commons-exec, junrar, pdfbox,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on, poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas, + curvesapi, xmlbeans, jackcess, commons-lang, @@ -264,9 +265,8 @@ org.xml.sax.ext;resolution:=optional, org.xml.sax.helpers;resolution:=optional, org.xmlpull.v1;resolution:=optional, - schemasMicrosoftComOfficePowerpoint;resolution:=optional, - schemasMicrosoftComOfficeWord;resolution:=optional, - sun.misc;resolution:=optional, + com.microsoft.schemas.office.powerpoint;resolution:=optional, + com.microsoft.schemas.office.word;resolution:=optional, sun.misc;resolution:=optional, ucar.units;resolution:=optional, ucar.httpservices;resolution:=optional, ucar.nc2.util;resolution:=optional, http://git-wip-us.apache.org/repos/asf/tika/blob/25cee544/tika-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml index 0442033..156e9ed 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -35,7 +35,7 @@ <url>http://tika.apache.org/</url> <properties> - <poi.version>3.13</poi.version> + <poi.version>3.14-beta1</poi.version> <!-- NOTE: sync codec version with POI --> <codec.version>1.9</codec.version> <!-- NOTE: sync tukaani version with commons-compress --> http://git-wip-us.apache.org/repos/asf/tika/blob/25cee544/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java index d55a417..3bc92f5 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java @@ -27,9 +27,8 @@ import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.sl.usermodel.SimpleShape; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; -import org.apache.poi.xslf.usermodel.Placeholder; import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XSLFCommentAuthors; import org.apache.poi.xslf.usermodel.XSLFComments; @@ -43,6 +42,7 @@ import org.apache.poi.xslf.usermodel.XSLFShape; import org.apache.poi.xslf.usermodel.XSLFSheet; import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.poi.xslf.usermodel.XSLFSlideLayout; +import org.apache.poi.xslf.usermodel.XSLFSlideShow; import org.apache.poi.xslf.usermodel.XSLFTable; import org.apache.poi.xslf.usermodel.XSLFTableCell; import org.apache.poi.xslf.usermodel.XSLFTableRow; @@ -151,7 +151,7 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor { for (XSLFShape sh : shapes) { if (sh instanceof XSLFTextShape) { XSLFTextShape txt = (XSLFTextShape) sh; - Placeholder ph = txt.getTextType(); + SimpleShape.Placeholder ph = txt.getTextType(); if (skipPlaceholders && ph != null) { continue; } @@ -232,7 +232,7 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor { XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlideShow document = null; try { - document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future + document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } http://git-wip-us.apache.org/repos/asf/tika/blob/25cee544/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java index e1f159a..b1618eb 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java @@ -79,14 +79,14 @@ public class PowerPointParserTest extends TikaTest { for(int row=1;row<=3;row++) { //assertContains("·\tBullet " + row, content); //assertContains("\u00b7\tBullet " + row, content); - assertContains("<p>Bullet " + row, xml); + assertContains("<li>Bullet " + row, xml); } assertContains("Here is a numbered list:", xml); for(int row=1;row<=3;row++) { //assertContains(row + ")\tNumber bullet " + row, content); //assertContains(row + ") Number bullet " + row, content); // TODO: OOXMLExtractor fails to number the bullets: - assertContains("<p>Number bullet " + row, xml); + assertContains("<li>Number bullet " + row, xml); } for(int row=1;row<=2;row++) {
