Author: fanningpj
Date: Fri Jul 28 07:42:23 2017
New Revision: 1803250
URL: http://svn.apache.org/viewvc?rev=1803250&view=rev
Log:
[Bug-61354] fix issue with extracting text from Word docs. This closes #66
Added:
poi/trunk/test-data/document/MultipleBodyBug.docx
Modified:
poi/trunk/.gitignore
poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Modified: poi/trunk/.gitignore
URL:
http://svn.apache.org/viewvc/poi/trunk/.gitignore?rev=1803250&r1=1803249&r2=1803250&view=diff
==============================================================================
--- poi/trunk/.gitignore (original)
+++ poi/trunk/.gitignore Fri Jul 28 07:42:23 2017
@@ -45,3 +45,4 @@ sonar/*/target
.ant-targets-build.xml
build
dist
+lib/
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java?rev=1803250&r1=1803249&r2=1803250&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
Fri Jul 28 07:42:23 2017
@@ -156,26 +156,34 @@ public class XWPFDocument extends POIXML
// parse the document with cursor and add
// the XmlObject to its lists
- XmlCursor cursor = ctDocument.getBody().newCursor();
- cursor.selectPath("./*");
- while (cursor.toNextSelection()) {
- XmlObject o = cursor.getObject();
- if (o instanceof CTP) {
- XWPFParagraph p = new XWPFParagraph((CTP) o, this);
- bodyElements.add(p);
- paragraphs.add(p);
- } else if (o instanceof CTTbl) {
- XWPFTable t = new XWPFTable((CTTbl) o, this);
- bodyElements.add(t);
- tables.add(t);
- } else if (o instanceof CTSdtBlock) {
- XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this);
- bodyElements.add(c);
- contentControls.add(c);
+ XmlCursor docCursor = ctDocument.newCursor();
+ docCursor.selectPath("./*");
+ while (docCursor.toNextSelection()) {
+ XmlObject o = docCursor.getObject();
+ if (o instanceof CTBody) {
+ XmlCursor bodyCursor = o.newCursor();
+ bodyCursor.selectPath("./*");
+ while (bodyCursor.toNextSelection()) {
+ XmlObject bodyObj = bodyCursor.getObject();
+ if (bodyObj instanceof CTP) {
+ XWPFParagraph p = new XWPFParagraph((CTP) bodyObj,
+ this);
+ bodyElements.add(p);
+ paragraphs.add(p);
+ } else if (bodyObj instanceof CTTbl) {
+ XWPFTable t = new XWPFTable((CTTbl) bodyObj, this);
+ bodyElements.add(t);
+ tables.add(t);
+ } else if (bodyObj instanceof CTSdtBlock) {
+ XWPFSDT c = new XWPFSDT((CTSdtBlock) bodyObj,
this);
+ bodyElements.add(c);
+ contentControls.add(c);
+ }
+ }
+ bodyCursor.dispose();
}
}
- cursor.dispose();
-
+ docCursor.dispose();
// Sort out headers and footers
if (doc.getDocument().getBody().getSectPr() != null)
headerFooterPolicy = new XWPFHeaderFooterPolicy(this);
Modified:
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=1803250&r1=1803249&r2=1803250&view=diff
==============================================================================
---
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
(original)
+++
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Fri Jul 28 07:42:23 2017
@@ -411,4 +411,14 @@ public class TestXWPFWordExtractor exten
"In Sequence:\n|X||_||X|\n", extractor.getText());
extractor.close();
}
+
+ public void testMultipleBodyBug() throws IOException {
+ XWPFDocument doc =
XWPFTestDataSamples.openSampleDocument("MultipleBodyBug.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+ assertEquals("START BODY 1 The quick, brown fox jumps over a lazy dog.
END BODY 1.\n"
+ + "START BODY 2 The quick, brown fox jumps over a lazy
dog. END BODY 2.\n"
+ + "START BODY 3 The quick, brown fox jumps over a lazy
dog. END BODY 3.\n",
+ extractor.getText());
+ extractor.close();
+ }
}
Added: poi/trunk/test-data/document/MultipleBodyBug.docx
URL:
http://svn.apache.org/viewvc/poi/trunk/test-data/document/MultipleBodyBug.docx?rev=1803250&view=auto
==============================================================================
Binary files poi/trunk/test-data/document/MultipleBodyBug.docx (added) and
poi/trunk/test-data/document/MultipleBodyBug.docx Fri Jul 28 07:42:23 2017
differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]