Author: fanningpj
Date: Sun Aug 28 14:16:01 2022
New Revision: 1903738

URL: http://svn.apache.org/viewvc?rev=1903738&view=rev
Log:
[bug-63576] support capitalized text in WordExtractor (HWPF)

Added:
    poi/trunk/test-data/document/capitalized.doc   (with props)
Modified:
    
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
    
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java

Modified: 
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java?rev=1903738&r1=1903737&r2=1903738&view=diff
==============================================================================
--- 
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
 (original)
+++ 
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
 Sun Aug 28 14:16:01 2022
@@ -52,6 +52,7 @@ import org.apache.poi.hwpf.usermodel.Tab
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.util.Beta;
 import org.apache.poi.util.Internal;
+import org.apache.poi.util.LocaleUtil;
 import org.apache.poi.util.StringUtil;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
@@ -445,6 +446,10 @@ public abstract class AbstractWordConver
                 continue;
             }
 
+            if (characterRun.isCapitalized() || characterRun.isSmallCaps()) {
+                text = text.toUpperCase(LocaleUtil.getUserLocale());
+            }
+
             if (characterRun.isSpecialCharacter()) {
                 if (text.charAt(0) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
                     && (wordDocument instanceof HWPFDocument)) {

Modified: 
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java?rev=1903738&r1=1903737&r2=1903738&view=diff
==============================================================================
--- 
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
 (original)
+++ 
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
 Sun Aug 28 14:16:01 2022
@@ -402,6 +402,14 @@ public final class TestWordExtractor {
         }
     }
 
+    @Test
+    void testCapitalized() throws Exception {
+        try (WordExtractor wExt = openExtractor("capitalized.doc")) {
+            String text = wExt.getText().trim();
+            assertEquals("The following word is: CAPITALIZED.", text);
+        }
+    }
+
     private WordExtractor openExtractor(String fileName) throws IOException {
         try (InputStream is = docTests.openResourceAsStream(fileName)) {
             return new WordExtractor(is);

Added: poi/trunk/test-data/document/capitalized.doc
URL: 
http://svn.apache.org/viewvc/poi/trunk/test-data/document/capitalized.doc?rev=1903738&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/document/capitalized.doc
------------------------------------------------------------------------------
    svn:mime-type = application/msword



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to