Author: jukka
Date: Fri Sep 25 15:51:29 2009
New Revision: 818890
URL: http://svn.apache.org/viewvc?rev=818890&view=rev
Log:
TIKA-283: XWPFWordExtractorDecorator does not extract links in tables
Applied the patch by Maxim Valyanskiy
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=818890&r1=818889&r2=818890&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Fri Sep 25 15:51:29 2009
@@ -71,7 +71,7 @@
}
// then all document tables
- extractTableContent(document.getDocument().getBody().getTblArray(),
+ extractTableContent(document,
document.getDocument().getBody().getTblArray(),
xhtml);
// footers
@@ -89,7 +89,7 @@
/**
* Low level structured parsing of document tables.
*/
- private void extractTableContent(CTTbl[] tables, XHTMLContentHandler xhtml)
+ private void extractTableContent(XWPFDocument doc, CTTbl[] tables,
XHTMLContentHandler xhtml)
throws SAXException {
for (CTTbl table : tables) {
xhtml.startElement("table");
@@ -102,14 +102,14 @@
xhtml.startElement("td");
CTP[] content = tc.getPArray();
for (CTP ctp : content) {
- CTR[] inner = ctp.getRArray();
- for (CTR ctr : inner) {
- CTText[] text = ctr.getTArray();
- for (CTText textContent : text) {
- xhtml.characters(textContent.getStringValue());
- }
- }
+ XWPFParagraph p = new MyXWPFParagraph(ctp, doc);
+
+ XWPFParagraphDecorator decorator = new
XWPFCommentsDecorator(
+ new XWPFHyperlinkDecorator(p, null, true));
+
+ xhtml.element("p", decorator.getText());
}
+
xhtml.endElement("td");
}
xhtml.endElement("tr");
@@ -118,4 +118,10 @@
xhtml.endElement("table");
}
}
+
+ private class MyXWPFParagraph extends XWPFParagraph {
+ private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
+ super(ctp, xwpfDocument);
+ }
+ }
}