Author: fanningpj
Date: Sun Aug 28 12:19:08 2022
New Revision: 1903729
URL: http://svn.apache.org/viewvc?rev=1903729&view=rev
Log:
[bug-63575] support capitalized text in XWPFWordExtractor
Added:
poi/trunk/test-data/document/capitalized.docx (with props)
Modified:
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Modified:
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java?rev=1903729&r1=1903728&r2=1903729&view=diff
==============================================================================
---
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
(original)
+++
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
Sun Aug 28 12:19:08 2022
@@ -27,6 +27,7 @@ import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.Locale;
import javax.xml.namespace.QName;
@@ -35,10 +36,7 @@ import org.apache.poi.ooxml.POIXMLExcept
import org.apache.poi.ooxml.util.DocumentHelper;
import org.apache.poi.ooxml.util.POIXMLUnits;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.util.HexDump;
-import org.apache.poi.util.Internal;
-import org.apache.poi.util.Removal;
-import org.apache.poi.util.Units;
+import org.apache.poi.util.*;
import org.apache.poi.wp.usermodel.CharacterRun;
import org.apache.xmlbeans.*;
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
@@ -1381,7 +1379,13 @@ public class XWPFRun implements ISDTCont
// come up as instances of CTText, but we don't want them
// in the normal text output
if (!("instrText".equals(node.getLocalName()) &&
XWPFDocument.NS_OOXML_WP_MAIN.equals(node.getNamespaceURI()))) {
- text.append(((CTText) o).getStringValue());
+ String textValue = ((CTText) o).getStringValue();
+ if (textValue != null) {
+ if (isCapitalized() || isSmallCaps()) {
+ textValue =
textValue.toUpperCase(LocaleUtil.getUserLocale());
+ }
+ text.append(textValue);
+ }
}
}
@@ -1391,7 +1395,9 @@ public class XWPFRun implements ISDTCont
if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
if (ctfldChar.getFfData() != null) {
for (CTFFCheckBox checkBox :
ctfldChar.getFfData().getCheckBoxList()) {
- text.append((checkBox.getDefault() != null &&
POIXMLUnits.parseOnOff(checkBox.getDefault().xgetVal())) ? "|X|" : "|_|");
+ String textValue = checkBox.getDefault() != null &&
POIXMLUnits.parseOnOff(checkBox.getDefault().xgetVal()) ?
+ "|X|" : "|_|";
+ text.append(textValue);
}
}
}
Modified:
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=1903729&r1=1903728&r2=1903729&view=diff
==============================================================================
---
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
(original)
+++
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Sun Aug 28 12:19:08 2022
@@ -478,4 +478,13 @@ class TestXWPFWordExtractor {
assertEquals(expected, actual);
}
}
+
+ @Test
+ void testCapitalizedFlag() throws IOException {
+ try (XWPFDocument doc =
XWPFTestDataSamples.openSampleDocument("capitalized.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
+ String txt = extractor.getText();
+ assertEquals( "The following word is: CAPITALIZED.", txt.trim());
+ }
+ }
}
Added: poi/trunk/test-data/document/capitalized.docx
URL:
http://svn.apache.org/viewvc/poi/trunk/test-data/document/capitalized.docx?rev=1903729&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/document/capitalized.docx
------------------------------------------------------------------------------
--- svn:mime-type (added)
+++ svn:mime-type Sun Aug 28 12:19:08 2022
@@ -0,0 +1 @@
+application/vnd.openxmlformats-officedocument.wordprocessingml.document
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]