Author: tallison
Date: Mon Aug  4 16:51:40 2014
New Revision: 1615675

URL: http://svn.apache.org/r1615675
Log:
TIKA-1317 extract contents from SDTs within cells in tables in XWPF (docx) files

Modified:
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=1615675&r1=1615674&r2=1615675&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 Mon Aug  4 16:51:40 2014
@@ -29,8 +29,10 @@ import org.apache.poi.xwpf.model.XWPFHea
 import org.apache.poi.xwpf.usermodel.BodyType;
 import org.apache.poi.xwpf.usermodel.IBody;
 import org.apache.poi.xwpf.usermodel.IBodyElement;
+import org.apache.poi.xwpf.usermodel.ICell;
 import org.apache.poi.xwpf.usermodel.IRunElement;
 import org.apache.poi.xwpf.usermodel.ISDTContent;
+import org.apache.poi.xwpf.usermodel.XWPFSDTCell;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
 import org.apache.poi.xwpf.usermodel.XWPFHeaderFooter;
 import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
@@ -332,10 +334,14 @@ public class XWPFWordExtractorDecorator 
        xhtml.startElement("tbody");
        for(XWPFTableRow row : table.getRows()) {
           xhtml.startElement("tr");
-          for(XWPFTableCell cell : row.getTableCells()) {
-             xhtml.startElement("td");
-             extractIBodyText(cell, xhtml);
-             xhtml.endElement("td");
+          for(ICell cell : row.getTableICells()){
+              xhtml.startElement("td");
+              if (cell instanceof XWPFTableCell) {
+                  extractIBodyText((XWPFTableCell)cell, xhtml);
+              } else if (cell instanceof XWPFSDTCell) {
+                  xhtml.characters(((XWPFSDTCell)cell).getContent().getText());
+              }
+              xhtml.endElement("td");
           }
           xhtml.endElement("tr");
        }

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1615675&r1=1615674&r2=1615675&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 Mon Aug  4 16:51:40 2014
@@ -987,6 +987,7 @@ public class OOXMLParserTest extends Tik
     /**
      * Test for missing text described in 
      * <a href="https://issues.apache.org/jira/browse/TIKA-1130";>TIKA-1130</a>.
+     * and TIKA-1317
      */
     @Test
     public void testMissingText() throws Exception {
@@ -1002,6 +1003,7 @@ public class OOXMLParserTest extends Tik
                     metadata.get(Metadata.CONTENT_TYPE));
             assertTrue(handler.toString().contains("BigCompany"));
             assertTrue(handler.toString().contains("Seasoned"));
+            assertTrue(handler.toString().contains("Rich_text_in_cell"));
         } finally {
             input.close();
         }


Reply via email to