Author: tallison
Date: Tue Apr 11 17:07:04 2017
New Revision: 1791002

URL: http://svn.apache.org/viewvc?rev=1791002&view=rev
Log:
bug 50955 - incorporate info from the DocumentSummaryInformation for
guessing the encoding.  Back off to the old method if DocSummInfo is
not available. Thanks to Andreas Beeker for recommending this direction.

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java?rev=1791002&r1=1791001&r2=1791002&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java 
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java Tue 
Apr 11 17:07:04 2017
@@ -19,8 +19,12 @@ package org.apache.poi.hwpf;
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
 import java.nio.charset.Charset;
 
+import org.apache.poi.hpsf.CustomProperties;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.Section;
 import org.apache.poi.hwmf.record.HwmfFont;
 import org.apache.poi.hwpf.model.ComplexFileTable;
 import org.apache.poi.hwpf.model.FontTable;
@@ -188,7 +192,32 @@ public class HWPFOldDocument extends HWP
      * @return The detected Charset from the old font table
      */
     private Charset guessCodePage(OldFontTable fontTable) {
-
+        //try to get it out of the overall document summary information
+        DocumentSummaryInformation summaryInformation = 
getDocumentSummaryInformation();
+        if (summaryInformation != null) {
+            CustomProperties customProperties = 
summaryInformation.getCustomProperties();
+            if (customProperties != null) {
+                int codePage = customProperties.getCodepage();
+                try {
+                    return 
Charset.forName(CodePageUtil.codepageToEncoding(codePage));
+                } catch (UnsupportedEncodingException e) {
+                    //swallow
+                }
+            }
+            //for now, try to get first valid code page in a valid section
+            for (Section section : summaryInformation.getSections()) {
+                if (section.getOffset() < 0) {
+                    continue;
+                }
+                int codePage = section.getCodepage();
+                try {
+                    return 
Charset.forName(CodePageUtil.codepageToEncoding(codePage));
+                } catch (UnsupportedEncodingException e) {
+                    //swallow
+                }
+            }
+        }
+        //if that still doesn't work, pick the first non-default non symbol 
charset
         for (OldFfn oldFfn : fontTable.getFontNames()) {
             HwmfFont.WmfCharset wmfCharset = 
HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
             if (wmfCharset != null &&



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to