This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit e9623650a37039286604e8ed3a17fdcc0ab04fc1 Author: Bob Paulin <[email protected]> AuthorDate: Wed Apr 29 17:13:10 2020 -0500 TIKA-3094: Add SparseBitSet and xmpcore-shaded to tika-bundle. --- tika-bundle/pom.xml | 7 ++++--- .../test/java/org/apache/tika/bundle/BundleIT.java | 19 +++++++++++++++++++ tika-bundle/src/test/resources/testPPT.pptx | Bin 0 -> 36518 bytes 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml index 9fae42d..07ef3bd 100644 --- a/tika-bundle/pom.xml +++ b/tika-bundle/pom.xml @@ -187,7 +187,7 @@ vorbis-java-tika| isoparser| metadata-extractor| - xmpcore| + xmpcore-shaded| json-simple| boilerpipe| rome| @@ -212,7 +212,9 @@ jcip-annotations| jmatio| guava| - age-predictor-api</Embed-Dependency> + age-predictor-api| + SparseBitSet + </Embed-Dependency> <Embed-Transitive>true</Embed-Transitive> <Bundle-DocURL>${project.url}</Bundle-DocURL> <Export-Package> @@ -258,7 +260,6 @@ com.sun.msv.datatype;resolution:=optional, com.sun.msv.datatype.xsd;resolution:=optional, com.sun.tools.javadoc;resolution:=optional, - com.zaxxer.sparsebits;resolution:=optional, edu.mit.ll.mitie;resolution:=optional, edu.stanford.nlp.*;resolution:=optional, edu.wisc.ssec.mcidas;resolution:=optional, diff --git a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java index 75deb34..54f10ae 100644 --- a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java +++ b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java @@ -283,4 +283,23 @@ public class BundleIT { assertTrue(content.contains("testXML.xml")); assertTrue(content.contains("Rida Benjelloun")); } + + @Test + public void testPoiTikaBundle() throws Exception { + Tika tika = new Tika(); + + // Package extraction + ContentHandler handler = new BodyContentHandler(); + + Parser parser = tika.getParser(); + ParseContext context = new ParseContext(); + context.set(Parser.class, parser); + + try (InputStream stream = TikaInputStream.get(Paths.get("src/test/resources/testPPT.pptx"))) { + parser.parse(stream, handler, new Metadata(), context); + } + + String content = handler.toString(); + assertTrue(content.contains("Attachment Test")); + } } diff --git a/tika-bundle/src/test/resources/testPPT.pptx b/tika-bundle/src/test/resources/testPPT.pptx new file mode 100644 index 0000000..38bc6a2 Binary files /dev/null and b/tika-bundle/src/test/resources/testPPT.pptx differ
