Author: nick
Date: Tue Aug 30 14:59:56 2011
New Revision: 1163248

URL: http://svn.apache.org/viewvc?rev=1163248&view=rev
Log:
TIKA-700 Upgrade the POI dependency to 3.8 Beta 4

Modified:
    tika/trunk/tika-parsers/pom.xml
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java

Modified: tika/trunk/tika-parsers/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1163248&r1=1163247&r2=1163248&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Tue Aug 30 14:59:56 2011
@@ -35,7 +35,7 @@
   <url>http://tika.apache.org/</url>
 
   <properties>
-    <poi.version>3.8-beta3</poi.version>
+    <poi.version>3.8-beta4</poi.version>
   </properties>
 
   <dependencies>

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=1163248&r1=1163247&r2=1163248&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
 Tue Aug 30 14:59:56 2011
@@ -26,8 +26,8 @@ import org.apache.poi.extractor.Extracto
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xslf.XSLFSlideShow;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
@@ -78,7 +78,7 @@ public class OOXMLExtractorFactory {
                      "Expecting UserModel based POI OOXML extractor with a 
document, but none found. " +
                      "The extractor returned was a " + poiExtractor
                );
-            } else if (document instanceof XSLFSlideShow) {
+            } else if (document instanceof XMLSlideShow) {
                 extractor = new XSLFPowerPointExtractorDecorator(
                         context, (XSLFPowerPointExtractor) poiExtractor);
             } else if (document instanceof XWPFDocument) {

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=1163248&r1=1163247&r2=1163248&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
 Tue Aug 30 14:59:56 2011
@@ -28,10 +28,11 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.openxml4j.opc.TargetMode;
 import org.apache.poi.xslf.XSLFSlideShow;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.DrawingParagraph;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xslf.usermodel.XSLFSlide;
 import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
-import org.apache.poi.xslf.usermodel.DrawingParagraph;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -39,7 +40,6 @@ import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
 import org.xml.sax.SAXException;
 
@@ -55,20 +55,34 @@ public class XSLFPowerPointExtractorDeco
     @Override
     protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
             XmlException, IOException {
-        XSLFSlideShow slideShow = (XSLFSlideShow) extractor.getDocument();
-        XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
+        XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
+        XSLFSlideShow rawSlideShow = null;
+        try {
+           rawSlideShow = slideShow._getXSLFSlideShow(); // TODO Avoid this in 
future
+        } catch(Exception e) {
+           throw new IOException(e);
+        }
 
-        XSLFSlide[] slides = xmlSlideShow.getSlides();
+        XSLFSlide[] slides = slideShow.getSlides();
         for (XSLFSlide slide : slides) {
-            CTSlideIdListEntry slideId = slide._getCTSlideId();
-
-            CTNotesSlide notes = xmlSlideShow._getXSLFSlideShow().getNotes(
-                    slideId);
-            CTCommentList comments = xmlSlideShow._getXSLFSlideShow()
-                    .getSlideComments(slideId);
+           // Find the ID, until we ditch the raw slideshow
+           CTSlideIdListEntry slideId = null;
+           for(CTSlideIdListEntry id : 
rawSlideShow.getSlideReferences().getSldIdList()) {
+              
if(rawSlideShow.getSlidePart(id).getPartName().equals(slide.getPackagePart().getPartName()))
 {
+                 slideId = id;
+              }
+           }
+           if(slideId == null) {
+              // This shouldn't normally happen
+              continue;
+           }
+           
+            CTNotesSlide notes = rawSlideShow.getNotes(slideId);
+            CTCommentList comments = rawSlideShow.getSlideComments(slideId);
 
             xhtml.startElement("div");
-            extractShapeContent(slide.getCommonSlideData(), xhtml);
+            XSLFCommonSlideData common = new 
XSLFCommonSlideData(slide.getXmlObject().getCSld());
+            extractShapeContent(common, xhtml);
 
             if (comments != null) {
                 for (CTComment comment : comments.getCmArray()) {
@@ -97,7 +111,13 @@ public class XSLFPowerPointExtractorDeco
     @Override
     protected List<PackagePart> getMainDocumentParts() throws TikaException {
        List<PackagePart> parts = new ArrayList<PackagePart>();
-       XSLFSlideShow document = (XSLFSlideShow) extractor.getDocument();
+       XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
+       XSLFSlideShow document = null;
+       try {
+          document = slideShow._getXSLFSlideShow(); // TODO Avoid this in 
future
+       } catch(Exception e) {
+          throw new TikaException(e.getMessage());
+       }
        
        for (CTSlideIdListEntry ctSlide : 
document.getSlideReferences().getSldIdList()) {
           // Add the slide
@@ -113,9 +133,7 @@ public class XSLFPowerPointExtractorDeco
           
           // If it has drawings, return those too
           try {
-             // TODO Improve when we upgrade POI
-//             for(PackageRelationship rel : 
slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
-             for(PackageRelationship rel : 
slidePart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing";))
 {
+             for(PackageRelationship rel : 
slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
                 if(rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = 
PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add( rel.getPackage().getPart(relName) );


Reply via email to