Author: tallison
Date: Tue Apr 11 17:07:04 2017
New Revision: 1791002
URL: http://svn.apache.org/viewvc?rev=1791002&view=rev
Log:
bug 50955 - incorporate info from the DocumentSummaryInformation for
guessing the encoding. Back off to the old method if DocSummInfo is
not available. Thanks to Andreas Beeker for recommending this direction.
Modified:
poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java?rev=1791002&r1=1791001&r2=1791002&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java Tue
Apr 11 17:07:04 2017
@@ -19,8 +19,12 @@ package org.apache.poi.hwpf;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
+import org.apache.poi.hpsf.CustomProperties;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.Section;
import org.apache.poi.hwmf.record.HwmfFont;
import org.apache.poi.hwpf.model.ComplexFileTable;
import org.apache.poi.hwpf.model.FontTable;
@@ -188,7 +192,32 @@ public class HWPFOldDocument extends HWP
* @return The detected Charset from the old font table
*/
private Charset guessCodePage(OldFontTable fontTable) {
-
+ //try to get it out of the overall document summary information
+ DocumentSummaryInformation summaryInformation =
getDocumentSummaryInformation();
+ if (summaryInformation != null) {
+ CustomProperties customProperties =
summaryInformation.getCustomProperties();
+ if (customProperties != null) {
+ int codePage = customProperties.getCodepage();
+ try {
+ return
Charset.forName(CodePageUtil.codepageToEncoding(codePage));
+ } catch (UnsupportedEncodingException e) {
+ //swallow
+ }
+ }
+ //for now, try to get first valid code page in a valid section
+ for (Section section : summaryInformation.getSections()) {
+ if (section.getOffset() < 0) {
+ continue;
+ }
+ int codePage = section.getCodepage();
+ try {
+ return
Charset.forName(CodePageUtil.codepageToEncoding(codePage));
+ } catch (UnsupportedEncodingException e) {
+ //swallow
+ }
+ }
+ }
+ //if that still doesn't work, pick the first non-default non symbol
charset
for (OldFfn oldFfn : fontTable.getFontNames()) {
HwmfFont.WmfCharset wmfCharset =
HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
if (wmfCharset != null &&
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]