Author: fanningpj
Date: Fri Jul 28 07:42:23 2017
New Revision: 1803250

URL: http://svn.apache.org/viewvc?rev=1803250&view=rev
Log:
[Bug-61354] fix issue with extracting text from Word docs. This closes #66

Added:
    poi/trunk/test-data/document/MultipleBodyBug.docx
Modified:
    poi/trunk/.gitignore
    poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
    
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java

Modified: poi/trunk/.gitignore
URL: 
http://svn.apache.org/viewvc/poi/trunk/.gitignore?rev=1803250&r1=1803249&r2=1803250&view=diff
==============================================================================
--- poi/trunk/.gitignore (original)
+++ poi/trunk/.gitignore Fri Jul 28 07:42:23 2017
@@ -45,3 +45,4 @@ sonar/*/target
 .ant-targets-build.xml
 build
 dist
+lib/

Modified: 
poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java?rev=1803250&r1=1803249&r2=1803250&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java 
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java 
Fri Jul 28 07:42:23 2017
@@ -156,26 +156,34 @@ public class XWPFDocument extends POIXML
 
             // parse the document with cursor and add
             // the XmlObject to its lists
-            XmlCursor cursor = ctDocument.getBody().newCursor();
-            cursor.selectPath("./*");
-            while (cursor.toNextSelection()) {
-                XmlObject o = cursor.getObject();
-                if (o instanceof CTP) {
-                    XWPFParagraph p = new XWPFParagraph((CTP) o, this);
-                    bodyElements.add(p);
-                    paragraphs.add(p);
-                } else if (o instanceof CTTbl) {
-                    XWPFTable t = new XWPFTable((CTTbl) o, this);
-                    bodyElements.add(t);
-                    tables.add(t);
-                } else if (o instanceof CTSdtBlock) {
-                    XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this);
-                    bodyElements.add(c);
-                    contentControls.add(c);
+            XmlCursor docCursor = ctDocument.newCursor();
+            docCursor.selectPath("./*");
+            while (docCursor.toNextSelection()) {
+                XmlObject o = docCursor.getObject();
+                if (o instanceof CTBody) {
+                    XmlCursor bodyCursor = o.newCursor();
+                    bodyCursor.selectPath("./*");
+                    while (bodyCursor.toNextSelection()) {
+                        XmlObject bodyObj = bodyCursor.getObject();
+                        if (bodyObj instanceof CTP) {
+                            XWPFParagraph p = new XWPFParagraph((CTP) bodyObj,
+                                    this);
+                            bodyElements.add(p);
+                            paragraphs.add(p);
+                        } else if (bodyObj instanceof CTTbl) {
+                            XWPFTable t = new XWPFTable((CTTbl) bodyObj, this);
+                            bodyElements.add(t);
+                            tables.add(t);
+                        } else if (bodyObj instanceof CTSdtBlock) {
+                            XWPFSDT c = new XWPFSDT((CTSdtBlock) bodyObj, 
this);
+                            bodyElements.add(c);
+                            contentControls.add(c);
+                        }
+                    }
+                    bodyCursor.dispose();
                 }
             }
-            cursor.dispose();
-
+            docCursor.dispose();
             // Sort out headers and footers
             if (doc.getDocument().getBody().getSectPr() != null)
                 headerFooterPolicy = new XWPFHeaderFooterPolicy(this);

Modified: 
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=1803250&r1=1803249&r2=1803250&view=diff
==============================================================================
--- 
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
 (original)
+++ 
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
 Fri Jul 28 07:42:23 2017
@@ -411,4 +411,14 @@ public class TestXWPFWordExtractor exten
                 "In Sequence:\n|X||_||X|\n", extractor.getText());
         extractor.close();
     }
+    
+    public void testMultipleBodyBug() throws IOException {
+        XWPFDocument doc = 
XWPFTestDataSamples.openSampleDocument("MultipleBodyBug.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+        assertEquals("START BODY 1 The quick, brown fox jumps over a lazy dog. 
END BODY 1.\n"
+                        + "START BODY 2 The quick, brown fox jumps over a lazy 
dog. END BODY 2.\n"
+                        + "START BODY 3 The quick, brown fox jumps over a lazy 
dog. END BODY 3.\n",
+                extractor.getText());
+        extractor.close();
+    }
 }

Added: poi/trunk/test-data/document/MultipleBodyBug.docx
URL: 
http://svn.apache.org/viewvc/poi/trunk/test-data/document/MultipleBodyBug.docx?rev=1803250&view=auto
==============================================================================
Binary files poi/trunk/test-data/document/MultipleBodyBug.docx (added) and 
poi/trunk/test-data/document/MultipleBodyBug.docx Fri Jul 28 07:42:23 2017 
differ



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to