Author: jukka
Date: Fri Sep 25 15:55:07 2009
New Revision: 818892
URL: http://svn.apache.org/viewvc?rev=818892&view=rev
Log:
TIKA-283: XWPFWordExtractorDecorator does not extract links in tables
Minor cleanups and clarifications
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=818892&r1=818891&r2=818892&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Fri Sep 25 15:55:07 2009
@@ -29,11 +29,9 @@
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.xml.sax.SAXException;
public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
@@ -71,8 +69,7 @@
}
// then all document tables
- extractTableContent(document,
document.getDocument().getBody().getTblArray(),
- xhtml);
+ extractTableContent(document, xhtml);
// footers
if (hfPolicy.getFirstPageFooter() != null) {
@@ -89,9 +86,9 @@
/**
* Low level structured parsing of document tables.
*/
- private void extractTableContent(XWPFDocument doc, CTTbl[] tables,
XHTMLContentHandler xhtml)
+ private void extractTableContent(XWPFDocument doc, XHTMLContentHandler
xhtml)
throws SAXException {
- for (CTTbl table : tables) {
+ for (CTTbl table : doc.getDocument().getBody().getTblArray()) {
xhtml.startElement("table");
xhtml.startElement("tbody");
CTRow[] rows = table.getTrArray();
@@ -119,7 +116,11 @@
}
}
- private class MyXWPFParagraph extends XWPFParagraph {
+ /**
+ * Private wrapper class that makes the protected {...@link XWPFParagraph}
+ * constructor available.
+ */
+ private static class MyXWPFParagraph extends XWPFParagraph {
private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
super(ctp, xwpfDocument);
}