Author: fanningpj
Date: Sun Aug 28 14:16:01 2022
New Revision: 1903738
URL: http://svn.apache.org/viewvc?rev=1903738&view=rev
Log:
[bug-63576] support capitalized text in WordExtractor (HWPF)
Added:
poi/trunk/test-data/document/capitalized.doc (with props)
Modified:
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
Modified:
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java?rev=1903738&r1=1903737&r2=1903738&view=diff
==============================================================================
---
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
(original)
+++
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
Sun Aug 28 14:16:01 2022
@@ -52,6 +52,7 @@ import org.apache.poi.hwpf.usermodel.Tab
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.util.Beta;
import org.apache.poi.util.Internal;
+import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.StringUtil;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
@@ -445,6 +446,10 @@ public abstract class AbstractWordConver
continue;
}
+ if (characterRun.isCapitalized() || characterRun.isSmallCaps()) {
+ text = text.toUpperCase(LocaleUtil.getUserLocale());
+ }
+
if (characterRun.isSpecialCharacter()) {
if (text.charAt(0) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
&& (wordDocument instanceof HWPFDocument)) {
Modified:
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java?rev=1903738&r1=1903737&r2=1903738&view=diff
==============================================================================
---
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
(original)
+++
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
Sun Aug 28 14:16:01 2022
@@ -402,6 +402,14 @@ public final class TestWordExtractor {
}
}
+ @Test
+ void testCapitalized() throws Exception {
+ try (WordExtractor wExt = openExtractor("capitalized.doc")) {
+ String text = wExt.getText().trim();
+ assertEquals("The following word is: CAPITALIZED.", text);
+ }
+ }
+
private WordExtractor openExtractor(String fileName) throws IOException {
try (InputStream is = docTests.openResourceAsStream(fileName)) {
return new WordExtractor(is);
Added: poi/trunk/test-data/document/capitalized.doc
URL:
http://svn.apache.org/viewvc/poi/trunk/test-data/document/capitalized.doc?rev=1903738&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/document/capitalized.doc
------------------------------------------------------------------------------
svn:mime-type = application/msword
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]