Author: jukka
Date: Fri Oct 2 11:10:17 2009
New Revision: 820962
URL: http://svn.apache.org/viewvc?rev=820962&view=rev
Log:
TIKA-293: XWPFWordExtractorDecorator does not extract bookmarks
Patch by Maxim Valyanskiy.
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=820962&r1=820961&r2=820962&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Fri Oct 2 11:10:17 2009
@@ -28,6 +28,7 @@
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
@@ -70,9 +71,15 @@
new XWPFHeaderFooterPolicy(document, ctSectPr);
extractHeaders(xhtml, headerFooterPolicy);
}
-
+
XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
new XWPFHyperlinkDecorator(paragraph, null, true));
+
+ CTBookmark[] bookmarks =
paragraph.getCTP().getBookmarkStartArray();
+ for (CTBookmark bookmark : bookmarks) {
+ xhtml.element("p", bookmark.getName());
+ }
+
xhtml.element("p", decorator.getText());
if (ctSectPr != null) {