Author: mikemccand
Date: Wed Oct 10 11:15:31 2012
New Revision: 1396544

URL: http://svn.apache.org/viewvc?rev=1396544&view=rev
Log:
TIKA-997: also leave placeholder for embedded images

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=1396544&r1=1396543&r2=1396544&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
 Wed Oct 10 11:15:31 2012
@@ -34,6 +34,7 @@ import org.apache.poi.xslf.usermodel.XML
 import org.apache.poi.xslf.usermodel.XSLFComments;
 import org.apache.poi.xslf.usermodel.XSLFGraphicFrame;
 import org.apache.poi.xslf.usermodel.XSLFGroupShape;
+import org.apache.poi.xslf.usermodel.XSLFPictureShape;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.apache.poi.xslf.usermodel.XSLFShape;
 import org.apache.poi.xslf.usermodel.XSLFSheet;
@@ -48,6 +49,7 @@ import org.apache.tika.sax.XHTMLContentH
 import org.apache.xmlbeans.XmlException;
 import org.apache.xmlbeans.XmlObject;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTPicture;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
@@ -133,6 +135,20 @@ public class XSLFPowerPointExtractorDeco
                         }
                     }
                 }
+            } else if (sh instanceof XSLFPictureShape) {
+                if (!skipPlaceholders && (sh.getXmlObject() instanceof 
CTPicture)) {
+                    CTPicture ctPic = ((CTPicture) sh.getXmlObject());
+                    if (ctPic.getBlipFill() != null && 
ctPic.getBlipFill().getBlip() != null) {
+                        String relID = 
ctPic.getBlipFill().getBlip().getEmbed();
+                        if (relID != null) {
+                            AttributesImpl attributes = new AttributesImpl();
+                            attributes.addAttribute("", "class", "class", 
"CDATA", "embedded");
+                            attributes.addAttribute("", "id", "id", "CDATA", 
relID);
+                            xhtml.startElement("div", attributes);
+                            xhtml.endElement("div");
+                        }
+                    }
+                }
             }
         }
     }
@@ -167,7 +183,7 @@ public class XSLFPowerPointExtractorDeco
           // If it has drawings, return those too
           try {
              for(PackageRelationship rel : 
slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
-                if(rel.getTargetMode() == TargetMode.INTERNAL) {
+               if(rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = 
PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add( rel.getPackage().getPart(relName) );
                 }

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1396544&r1=1396543&r2=1396544&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 Wed Oct 10 11:15:31 2012
@@ -885,12 +885,15 @@ public class OOXMLParserTest extends Tik
             input.close();
         }
         String xml = sw.toString();
+        int h = xml.indexOf("<div class=\"embedded\" id=\"rId3\"/>");
         int i = xml.indexOf("Send me a note");
         int j = xml.indexOf("<div class=\"embedded\" id=\"rId4\"/>");
         int k = xml.indexOf("<p>No title</p>");
+        assertTrue(h != -1);
         assertTrue(i != -1);
         assertTrue(j != -1);
         assertTrue(k != -1);
+        assertTrue(h < i);
         assertTrue(i < j);
         assertTrue(j < k);
     }


Reply via email to