Author: jukka
Date: Fri Oct  2 11:10:17 2009
New Revision: 820962

URL: http://svn.apache.org/viewvc?rev=820962&view=rev
Log:
TIKA-293: XWPFWordExtractorDecorator does not extract bookmarks

Patch by Maxim Valyanskiy.

Modified:
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=820962&r1=820961&r2=820962&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 Fri Oct  2 11:10:17 2009
@@ -28,6 +28,7 @@
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
@@ -70,9 +71,15 @@
                     new XWPFHeaderFooterPolicy(document, ctSectPr);
                 extractHeaders(xhtml, headerFooterPolicy);
             }
-            
+
             XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
                     new XWPFHyperlinkDecorator(paragraph, null, true));
+
+            CTBookmark[] bookmarks = 
paragraph.getCTP().getBookmarkStartArray();
+            for (CTBookmark bookmark : bookmarks) {
+                xhtml.element("p", bookmark.getName());
+            }
+
             xhtml.element("p", decorator.getText());
 
             if (ctSectPr != null) {


Reply via email to