Author: fanningpj
Date: Sun Aug 28 12:19:08 2022
New Revision: 1903729

URL: http://svn.apache.org/viewvc?rev=1903729&view=rev
Log:
[bug-63575] support capitalized text in XWPFWordExtractor

Added:
    poi/trunk/test-data/document/capitalized.docx   (with props)
Modified:
    poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
    
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java?rev=1903729&r1=1903728&r2=1903729&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java 
(original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFRun.java 
Sun Aug 28 12:19:08 2022
@@ -27,6 +27,7 @@ import java.math.RoundingMode;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Locale;
 
 import javax.xml.namespace.QName;
 
@@ -35,10 +36,7 @@ import org.apache.poi.ooxml.POIXMLExcept
 import org.apache.poi.ooxml.util.DocumentHelper;
 import org.apache.poi.ooxml.util.POIXMLUnits;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.util.HexDump;
-import org.apache.poi.util.Internal;
-import org.apache.poi.util.Removal;
-import org.apache.poi.util.Units;
+import org.apache.poi.util.*;
 import org.apache.poi.wp.usermodel.CharacterRun;
 import org.apache.xmlbeans.*;
 import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
@@ -1381,7 +1379,13 @@ public class XWPFRun implements ISDTCont
             //  come up as instances of CTText, but we don't want them
             //  in the normal text output
             if (!("instrText".equals(node.getLocalName()) && 
XWPFDocument.NS_OOXML_WP_MAIN.equals(node.getNamespaceURI()))) {
-                text.append(((CTText) o).getStringValue());
+                String textValue = ((CTText) o).getStringValue();
+                if (textValue != null) {
+                    if (isCapitalized() || isSmallCaps()) {
+                        textValue = 
textValue.toUpperCase(LocaleUtil.getUserLocale());
+                    }
+                    text.append(textValue);
+                }
             }
         }
 
@@ -1391,7 +1395,9 @@ public class XWPFRun implements ISDTCont
             if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
                 if (ctfldChar.getFfData() != null) {
                     for (CTFFCheckBox checkBox : 
ctfldChar.getFfData().getCheckBoxList()) {
-                        text.append((checkBox.getDefault() != null && 
POIXMLUnits.parseOnOff(checkBox.getDefault().xgetVal())) ? "|X|" : "|_|");
+                        String textValue = checkBox.getDefault() != null && 
POIXMLUnits.parseOnOff(checkBox.getDefault().xgetVal()) ?
+                                "|X|" : "|_|";
+                        text.append(textValue);
                     }
                 }
             }

Modified: 
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=1903729&r1=1903728&r2=1903729&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
 Sun Aug 28 12:19:08 2022
@@ -478,4 +478,13 @@ class TestXWPFWordExtractor {
             assertEquals(expected, actual);
         }
     }
+
+    @Test
+    void testCapitalizedFlag() throws IOException {
+        try (XWPFDocument doc = 
XWPFTestDataSamples.openSampleDocument("capitalized.docx");
+             XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
+            String txt = extractor.getText();
+            assertEquals( "The following word is: CAPITALIZED.", txt.trim());
+        }
+    }
 }

Added: poi/trunk/test-data/document/capitalized.docx
URL: 
http://svn.apache.org/viewvc/poi/trunk/test-data/document/capitalized.docx?rev=1903729&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/document/capitalized.docx
------------------------------------------------------------------------------
--- svn:mime-type (added)
+++ svn:mime-type Sun Aug 28 12:19:08 2022
@@ -0,0 +1 @@
+application/vnd.openxmlformats-officedocument.wordprocessingml.document



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to