Author: mikemccand
Date: Wed Oct 10 11:15:31 2012
New Revision: 1396544
URL: http://svn.apache.org/viewvc?rev=1396544&view=rev
Log:
TIKA-997: also leave placeholder for embedded images
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=1396544&r1=1396543&r2=1396544&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
Wed Oct 10 11:15:31 2012
@@ -34,6 +34,7 @@ import org.apache.poi.xslf.usermodel.XML
import org.apache.poi.xslf.usermodel.XSLFComments;
import org.apache.poi.xslf.usermodel.XSLFGraphicFrame;
import org.apache.poi.xslf.usermodel.XSLFGroupShape;
+import org.apache.poi.xslf.usermodel.XSLFPictureShape;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFSheet;
@@ -48,6 +49,7 @@ import org.apache.tika.sax.XHTMLContentH
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTPicture;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
@@ -133,6 +135,20 @@ public class XSLFPowerPointExtractorDeco
}
}
}
+ } else if (sh instanceof XSLFPictureShape) {
+ if (!skipPlaceholders && (sh.getXmlObject() instanceof
CTPicture)) {
+ CTPicture ctPic = ((CTPicture) sh.getXmlObject());
+ if (ctPic.getBlipFill() != null &&
ctPic.getBlipFill().getBlip() != null) {
+ String relID =
ctPic.getBlipFill().getBlip().getEmbed();
+ if (relID != null) {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "class", "class",
"CDATA", "embedded");
+ attributes.addAttribute("", "id", "id", "CDATA",
relID);
+ xhtml.startElement("div", attributes);
+ xhtml.endElement("div");
+ }
+ }
+ }
}
}
}
@@ -167,7 +183,7 @@ public class XSLFPowerPointExtractorDeco
// If it has drawings, return those too
try {
for(PackageRelationship rel :
slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
- if(rel.getTargetMode() == TargetMode.INTERNAL) {
+ if(rel.getTargetMode() == TargetMode.INTERNAL) {
PackagePartName relName =
PackagingURIHelper.createPartName(rel.getTargetURI());
parts.add( rel.getPackage().getPart(relName) );
}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1396544&r1=1396543&r2=1396544&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Wed Oct 10 11:15:31 2012
@@ -885,12 +885,15 @@ public class OOXMLParserTest extends Tik
input.close();
}
String xml = sw.toString();
+ int h = xml.indexOf("<div class=\"embedded\" id=\"rId3\"/>");
int i = xml.indexOf("Send me a note");
int j = xml.indexOf("<div class=\"embedded\" id=\"rId4\"/>");
int k = xml.indexOf("<p>No title</p>");
+ assertTrue(h != -1);
assertTrue(i != -1);
assertTrue(j != -1);
assertTrue(k != -1);
+ assertTrue(h < i);
assertTrue(i < j);
assertTrue(j < k);
}