Author: jukka
Date: Fri Sep 25 15:55:07 2009
New Revision: 818892

URL: http://svn.apache.org/viewvc?rev=818892&view=rev
Log:
TIKA-283: XWPFWordExtractorDecorator does not extract links in tables

Minor cleanups and clarifications

Modified:
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=818892&r1=818891&r2=818892&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 Fri Sep 25 15:55:07 2009
@@ -29,11 +29,9 @@
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
 import org.xml.sax.SAXException;
 
 public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
@@ -71,8 +69,7 @@
         }
 
         // then all document tables
-        extractTableContent(document, 
document.getDocument().getBody().getTblArray(),
-                xhtml);
+        extractTableContent(document, xhtml);
 
         // footers
         if (hfPolicy.getFirstPageFooter() != null) {
@@ -89,9 +86,9 @@
     /**
      * Low level structured parsing of document tables.
      */
-    private void extractTableContent(XWPFDocument doc, CTTbl[] tables, 
XHTMLContentHandler xhtml)
+    private void extractTableContent(XWPFDocument doc, XHTMLContentHandler 
xhtml)
             throws SAXException {
-        for (CTTbl table : tables) {
+        for (CTTbl table : doc.getDocument().getBody().getTblArray()) {
             xhtml.startElement("table");
             xhtml.startElement("tbody");
             CTRow[] rows = table.getTrArray();
@@ -119,7 +116,11 @@
         }
     }
 
-    private class MyXWPFParagraph extends XWPFParagraph {
+    /**
+     * Private wrapper class that makes the protected {...@link XWPFParagraph}
+     * constructor available.
+     */
+    private static class MyXWPFParagraph extends XWPFParagraph {
         private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
             super(ctp, xwpfDocument);
         }


Reply via email to