Author: tallison
Date: Thu Oct 8 02:23:46 2015
New Revision: 1707432
URL: http://svn.apache.org/viewvc?rev=1707432&view=rev
Log:
TIKA-1755 make div and other formatting more consistent btwn PPT and PPTX
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.ppt
(with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.pptx
(with props)
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1707432&r1=1707431&r2=1707432&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Thu Oct 8 02:23:46 2015
@@ -1,10 +1,13 @@
Release 1.11 - Current Development
+
+ * Make div and other markup more consistent between PPT and
+ PPTX (TIKA-1755).
* Parse multiple authors from MSOffice's semi-colon delimited
author field (TIKA-1765).
* Include CTAKESConfig.properties within tika-parsers resources
- by default (TIKA-1741)
+ by default (TIKA-1741).
* Prevent infinite recursion when processing inline images
in PDF files by limiting extraction of duplicate images
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java?rev=1707432&r1=1707431&r2=1707432&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
Thu Oct 8 02:23:46 2015
@@ -106,16 +106,27 @@ public class HSLFExtractor extends Abstr
}
// Comments, if present
+ StringBuilder authorStringBuilder = new StringBuilder();
for (Comment comment : slide.getComments()) {
+ authorStringBuilder.setLength(0);
xhtml.startElement("p", "class", "slide-comment");
- if (comment.getAuthor() != null) {
- xhtml.startElement("b");
- xhtml.characters(comment.getAuthor());
- xhtml.endElement("b");
+ if (comment.getAuthor() != null) {
+ authorStringBuilder.append(comment.getAuthor());
+ }
+ if (comment.getAuthorInitials() != null) {
+ if (authorStringBuilder.length() > 0) {
+ authorStringBuilder.append(" ");
+ }
+
authorStringBuilder.append("("+comment.getAuthorInitials()+")");
+ }
+ if (authorStringBuilder.length() > 0) {
if (comment.getText() != null) {
- xhtml.characters(" - ");
+ authorStringBuilder.append(" - ");
}
+ xhtml.startElement("b");
+ xhtml.characters(authorStringBuilder.toString());
+ xhtml.endElement("b");
}
if (comment.getText() != null) {
xhtml.characters(comment.getText());
@@ -136,7 +147,7 @@ public class HSLFExtractor extends Abstr
xhtml.endElement("div");
/* notes */
- xhtml.startElement("div", "class", "slideNotes");
+ xhtml.startElement("div", "class", "slide-notes");
HashSet<Integer> seenNotes = new HashSet<>();
HeadersFooters hf = ss.getNotesHeadersFooters();
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=1707432&r1=1707431&r2=1707432&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
Thu Oct 8 02:23:46 2015
@@ -31,6 +31,7 @@ import org.apache.poi.xslf.XSLFSlideShow
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.Placeholder;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
import org.apache.poi.xslf.usermodel.XSLFComments;
import org.apache.poi.xslf.usermodel.XSLFGraphicFrame;
import org.apache.poi.xslf.usermodel.XSLFGroupShape;
@@ -45,6 +46,7 @@ import org.apache.poi.xslf.usermodel.XSL
import org.apache.poi.xslf.usermodel.XSLFTable;
import org.apache.poi.xslf.usermodel.XSLFTableCell;
import org.apache.poi.xslf.usermodel.XSLFTableRow;
+import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.ParseContext;
@@ -52,6 +54,7 @@ import org.apache.tika.sax.XHTMLContentH
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor;
import org.openxmlformats.schemas.presentationml.x2006.main.CTPicture;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
@@ -68,6 +71,7 @@ public class XSLFPowerPointExtractorDeco
*/
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
IOException {
XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
+ XSLFCommentAuthors commentAuthors = slideShow.getCommentAuthors();
List<XSLFSlide> slides = slideShow.getSlides();
for (XSLFSlide slide : slides) {
@@ -79,12 +83,16 @@ public class XSLFPowerPointExtractorDeco
slideDesc = null;
}
- // slide
+ // slide content
+ xhtml.startElement("div", "class", "slide-content");
extractContent(slide.getShapes(), false, xhtml, slideDesc);
+ xhtml.endElement("div");
// slide layout which is the master sheet for this slide
+ xhtml.startElement("div", "class", "slide-master-content");
XSLFSlideLayout slideLayout = slide.getMasterSheet();
extractContent(slideLayout.getShapes(), true, xhtml, null);
+ xhtml.endElement("div");
// slide master which is the master sheet for all text layouts
XSLFSheet slideMaster = slideLayout.getMasterSheet();
@@ -93,19 +101,46 @@ public class XSLFPowerPointExtractorDeco
// notes (if present)
XSLFNotes slideNotes = slide.getNotes();
if (slideNotes != null) {
+ xhtml.startElement("div", "class", "slide-notes");
+
extractContent(slideNotes.getShapes(), false, xhtml,
slideDesc);
// master sheet for this notes
XSLFNotesMaster notesMaster = slideNotes.getMasterSheet();
extractContent(notesMaster.getShapes(), true, xhtml, null);
+ xhtml.endElement("div");
}
// comments (if present)
XSLFComments comments = slide.getComments();
if (comments != null) {
+ StringBuilder authorStringBuilder = new StringBuilder();
for (int i = 0; i < comments.getNumberOfComments(); i++) {
+ authorStringBuilder.setLength(0);
CTComment comment = comments.getCommentAt(i);
- xhtml.element("p", comment.getText());
+ xhtml.startElement("p", "class", "slide-comment");
+ CTCommentAuthor cta =
commentAuthors.getAuthorById(comment.getAuthorId());
+ if (cta != null) {
+ if (cta.getName() != null) {
+ authorStringBuilder.append(cta.getName());
+ }
+ if (cta.getInitials() != null) {
+ if (authorStringBuilder.length() > 0) {
+ authorStringBuilder.append(" ");
+ }
+
authorStringBuilder.append("("+cta.getInitials()+")");
+ }
+ if (comment.getText() != null &&
authorStringBuilder.length() > 0) {
+ authorStringBuilder.append(" - ");
+ }
+ if (authorStringBuilder.length() > 0) {
+ xhtml.startElement("b");
+ xhtml.characters(authorStringBuilder.toString());
+ xhtml.endElement("b");
+ }
+ }
+ xhtml.characters(comment.getText());
+ xhtml.endElement("p");
}
}
}
@@ -120,17 +155,16 @@ public class XSLFPowerPointExtractorDeco
if (skipPlaceholders && ph != null) {
continue;
}
- xhtml.element("p", txt.getText());
+ for (XSLFTextParagraph p : txt.getTextParagraphs()) {
+ xhtml.element("p", p.getText());
+ }
} else if (sh instanceof XSLFGroupShape) {
// recurse into groups of shapes
XSLFGroupShape group = (XSLFGroupShape) sh;
extractContent(group.getShapes(), skipPlaceholders, xhtml,
slideDesc);
} else if (sh instanceof XSLFTable) {
- XSLFTable tbl = (XSLFTable) sh;
- for (XSLFTableRow row : tbl) {
- List<XSLFTableCell> cells = row.getCells();
- extractContent(cells, skipPlaceholders, xhtml, slideDesc);
- }
+ //unlike tables in Word, ppt/x can't have recursive tables...I
don't think
+ extractTable((XSLFTable)sh, xhtml);
} else if (sh instanceof XSLFGraphicFrame) {
XSLFGraphicFrame frame = (XSLFGraphicFrame) sh;
XmlObject[] sp = frame.getXmlObject().selectPath(
@@ -172,6 +206,22 @@ public class XSLFPowerPointExtractorDeco
}
}
+ private void extractTable(XSLFTable tbl, XHTMLContentHandler xhtml) throws
SAXException {
+ xhtml.startElement("table");
+ for (XSLFTableRow row : tbl) {
+ xhtml.startElement("tr");
+ List<XSLFTableCell> cells = row.getCells();
+ for (XSLFTableCell c : row.getCells()) {
+ xhtml.startElement("td");
+ xhtml.characters(c.getText());
+ xhtml.endElement("td");
+ }
+ xhtml.endElement("tr");
+ }
+ xhtml.endElement("table");
+
+ }
+
/**
* In PowerPoint files, slides have things embedded in them,
* and slide drawings which have the images
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java?rev=1707432&r1=1707431&r2=1707432&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
Thu Oct 8 02:23:46 2015
@@ -55,72 +55,66 @@ public class PowerPointParserTest extend
@Test
public void testVarious() throws Exception {
- ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
-
- try (InputStream stream =
PowerPointParserTest.class.getResourceAsStream(
- "/test-documents/testPPT_various.ppt")) {
- new OfficeParser().parse(stream, handler, metadata, new
ParseContext());
- }
-
- String content = handler.toString();
- //content = content.replaceAll("\\s+"," ");
- assertContains("Footnote appears here", content);
- assertContains("This is a footnote.", content);
- assertContains("This is the header text.", content);
- assertContains("This is the footer text.", content);
- assertContains("Here is a text box", content);
- assertContains("Bold", content);
- assertContains("italic", content);
- assertContains("underline", content);
- assertContains("superscript", content);
- assertContains("subscript", content);
- assertContains("Here is a citation:", content);
- assertContains("Figure 1 This is a caption for Figure 1", content);
- assertContains("(Kramer)", content);
- assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2
Col 2 Row 2 Col 3", content.replaceAll("\\s+"," "));
- assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2
column 2", content.replaceAll("\\s+"," "));
- assertContains("This is a hyperlink", content);
- assertContains("Here is a list:", content);
+ String xml = getXML("testPPT_various.ppt", metadata).xml;
+ assertContains("<p>Footnote appears here", xml);
+ assertContains("<p>[1]This is a footnote.", xml);
+ assertContains("<p>This is the header text.</p>", xml);
+ assertContains("<p>This is the footer text.</p>", xml);
+ assertContains("<p>Here is a text box</p>", xml);
+ //TODO: fix this spacing: assertContains("<p>Bold ", xml);
+ assertContains("italic", xml);
+ assertContains("underline", xml);
+ assertContains("superscript", xml);
+ assertContains("subscript", xml);
+ assertContains("<p>Here is a citation:", xml);
+ assertContains("Figure 1 This is a caption for Figure 1", xml);
+ assertContains("(Kramer)", xml);
+ assertContains("<table><tr>\t<td>Row 1 Col 1</td>", xml);
+ assertContains("<td>Row 2 Col 2</td>\t<td>Row 2 Col 3</td></tr>", xml);
+ assertContains("<p>Row 1 column 1</p>", xml);
+ assertContains("<p>Row 2 column 2</p>", xml);
+ assertContains("<p>This is a hyperlink", xml);
+ assertContains("<p>Here is a list:", xml);
for(int row=1;row<=3;row++) {
//assertContains("·\tBullet " + row, content);
//assertContains("\u00b7\tBullet " + row, content);
- // TODO OfficeParser fails to extract the bullet symbol
- assertContains("Bullet " + row, content);
+ assertContains("<p>Bullet " + row, xml);
}
- assertContains("Here is a numbered list:", content);
+ assertContains("Here is a numbered list:", xml);
for(int row=1;row<=3;row++) {
//assertContains(row + ")\tNumber bullet " + row, content);
//assertContains(row + ") Number bullet " + row, content);
- // TODO: OfficeParser fails to number the bullets:
- assertContains("Number bullet " + row, content);
+ // TODO: OOXMLExtractor fails to number the bullets:
+ assertContains("<p>Number bullet " + row, xml);
}
for(int row=1;row<=2;row++) {
for(int col=1;col<=3;col++) {
- assertContains("Row " + row + " Col " + col, content);
+ assertContains("Row " + row + " Col " + col, xml);
}
}
-
- assertContains("Keyword1 Keyword2", content);
+ assertContains("Keyword1 Keyword2", xml);
assertEquals("Keyword1 Keyword2",
metadata.get(TikaCoreProperties.KEYWORDS));
- assertContains("Subject is here", content);
+ assertContains("Subject is here", xml);
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
// TODO: Remove subject in Tika 2.0
assertEquals("Subject is here",
metadata.get(Metadata.SUBJECT));
- assertContains("Suddenly some Japanese text:", content);
+ assertContains("Suddenly some Japanese text:", xml);
// Special version of (GHQ)
- assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ assertContains("\uff08\uff27\uff28\uff31\uff09", xml);
// 6 other characters
-
assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f",
content);
+
assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f",
+ xml);
- assertContains("And then some Gothic text:", content);
-
assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A",
content);
+ assertContains("And then some Gothic text:", xml);
+
assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A",
+ xml);
}
@Test
@@ -238,4 +232,10 @@ public class PowerPointParserTest extend
"<div class=\"slide-content\"><p>Now</p>",
result.xml);
}
+
+ @Test
+ public void testCommentAuthorship() throws Exception {
+ XMLResult r = getXML("testPPT_comment.ppt");
+ assertContains("<p class=\"slide-comment\"><b>Allison, Timothy B.
(ATB)", r.xml);
+ }
}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1707432&r1=1707431&r2=1707432&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Thu Oct 8 02:23:46 2015
@@ -621,71 +621,71 @@ public class OOXMLParserTest extends Tik
@Test
public void testVariousPPTX() throws Exception {
- ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
-
- try (InputStream stream = OOXMLParserTest.class.getResourceAsStream(
- "/test-documents/testPPT_various.pptx")) {
- new AutoDetectParser().parse(stream, handler, metadata, new
ParseContext());
- }
-
- String content = handler.toString();
- //content = content.replaceAll("\\s+"," ");
- assertContains("Footnote appears here", content);
- assertContains("This is a footnote.", content);
- assertContains("This is the header text.", content);
- assertContains("This is the footer text.", content);
- assertContains("Here is a text box", content);
- assertContains("Bold", content);
- assertContains("italic", content);
- assertContains("underline", content);
- assertContains("superscript", content);
- assertContains("subscript", content);
- assertContains("Here is a citation:", content);
- assertContains("Figure 1 This is a caption for Figure 1", content);
- assertContains("(Kramer)", content);
- assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2
Col 2 Row 2 Col 3", content.replaceAll("\\s+"," "));
- assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2
column 2", content.replaceAll("\\s+"," "));
- assertContains("This is a hyperlink", content);
- assertContains("Here is a list:", content);
+ String xml = getXML("testPPT_various.pptx", metadata).xml;
+ assertContains("<p>Footnote appears here", xml);
+ assertContains("<p>[1] This is a footnote.", xml);
+ assertContains("<p>This is the header text.</p>", xml);
+ assertContains("<p>This is the footer text.</p>", xml);
+ assertContains("<p>Here is a text box</p>", xml);
+ assertContains("<p>Bold", xml);
+ assertContains("italic", xml);
+ assertContains("underline", xml);
+ assertContains("superscript", xml);
+ assertContains("subscript", xml);
+ assertContains("<p>Here is a citation:", xml);
+ assertContains("Figure 1 This is a caption for Figure 1", xml);
+ assertContains("(Kramer)", xml);
+ assertContains("<table><tr>\t<td>Row 1 Col 1</td>", xml);
+ assertContains("<td>Row 2 Col 2</td>\t<td>Row 2 Col 3</td></tr>", xml);
+ assertContains("<p>Row 1 column 1</p>", xml);
+ assertContains("<p>Row 2 column 2</p>", xml);
+ assertContains("<p>This is a hyperlink", xml);
+ assertContains("<p>Here is a list:", xml);
for(int row=1;row<=3;row++) {
//assertContains("·\tBullet " + row, content);
//assertContains("\u00b7\tBullet " + row, content);
- assertContains("Bullet " + row, content);
+ assertContains("<p>Bullet " + row, xml);
}
- assertContains("Here is a numbered list:", content);
+ assertContains("Here is a numbered list:", xml);
for(int row=1;row<=3;row++) {
//assertContains(row + ")\tNumber bullet " + row, content);
//assertContains(row + ") Number bullet " + row, content);
// TODO: OOXMLExtractor fails to number the bullets:
- assertContains("Number bullet " + row, content);
+ assertContains("<p>Number bullet " + row, xml);
}
for(int row=1;row<=2;row++) {
for(int col=1;col<=3;col++) {
- assertContains("Row " + row + " Col " + col, content);
+ assertContains("Row " + row + " Col " + col, xml);
}
}
- assertContains("Keyword1 Keyword2", content);
+ assertContains("Keyword1 Keyword2", xml);
assertEquals("Keyword1 Keyword2",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(Metadata.KEYWORDS));
- assertContains("Subject is here", content);
+ assertContains("Subject is here", xml);
// TODO: Remove subject in Tika 2.0
assertEquals("Subject is here",
metadata.get(Metadata.SUBJECT));
assertEquals("Subject is here",
- metadata.get(OfficeOpenXMLCore.SUBJECT));
+ metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertContains("Suddenly some Japanese text:", content);
+ assertContains("Suddenly some Japanese text:", xml);
// Special version of (GHQ)
- assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ assertContains("\uff08\uff27\uff28\uff31\uff09", xml);
// 6 other characters
-
assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f",
content);
+
assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f",
xml);
- assertContains("And then some Gothic text:", content);
-
assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A",
content);
+ assertContains("And then some Gothic text:", xml);
+
assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A",
xml);
+ }
+
+ @Test
+ public void testCommentPPTX() throws Exception {
+ XMLResult r = getXML("testPPT_comment.pptx");
+ assertContains("<p class=\"slide-comment\"><b>Allison, Timothy B.
(ATB)", r.xml);
}
@Test
@@ -1046,9 +1046,8 @@ public class OOXMLParserTest extends Tik
@Test
public void testPPTXThumbnail() throws Exception {
String xml = getXML("testPPTX_Thumbnail.pptx").xml;
- int a = xml.indexOf("<body><p>This file contains an embedded
thumbnail</p>");
+ int a = xml.indexOf("<body><div class=\"slide-content\"><p>This file
contains an embedded thumbnail");
int b = xml.indexOf("<div class=\"embedded\"
id=\"/docProps/thumbnail.jpeg\" />");
-
assertTrue(a != -1);
assertTrue(b != -1);
assertTrue(a < b);
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.ppt
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.ppt?rev=1707432&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.ppt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.pptx
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.pptx?rev=1707432&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_comment.pptx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream