Author: tallison
Date: Tue Apr 11 01:30:02 2017
New Revision: 1790904

URL: http://svn.apache.org/viewvc?rev=1790904&view=rev
Log:
bug 50955 - try originally guessed codepoint, backoff to 1252 if that fails

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java?rev=1790904&r1=1790903&r2=1790904&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java 
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java Tue 
Apr 11 01:30:02 2017
@@ -96,25 +96,17 @@ public class HWPFOldDocument extends HWP
         } else {
             // TODO Discover if these older documents can ever hold Unicode 
Strings?
             //  (We think not, because they seem to lack a Piece table)
-            // TODO Build the Piece Descriptor properly
-            //  (We have to fake it, as they don't seem to have a proper Piece 
table)
-            PieceDescriptor pd = new PieceDescriptor(new byte[] {0,0, 
0,0,0,127, 0,0}, 0, guessedCharset);
-            pd.setFilePosition(_fib.getFibBase().getFcMin());
-
-            // Generate a single Text Piece Table, with a single Text Piece
-            //  which covers all the (8 bit only) text in the file
-            tpt = new OldTextPieceTable();
-            byte[] textData = new 
byte[_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin()];
-            System.arraycopy(_mainStream, _fib.getFibBase().getFcMin(), 
textData, 0, textData.length);
-
-            int numChars = textData.length;
-            if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
-                numChars /= 2;
+            //
+            //  What we have here is a wretched hack.  We need to figure out
+            //  how to get the correct charset for the doc.
+            TextPiece tp = null;
+            try {
+                tp = buildTextPiece(guessedCharset);
+            } catch (IllegalStateException e) {
+                //if there was a problem with the guessed charset and the 
length of the
+                //textpiece, back off to win1252. This is effectively what we 
used to do.
+                tp = buildTextPiece(StringUtil.WIN_1252);
             }
-
-            TextPiece tp = new TextPiece(
-                    0, numChars, textData, pd
-            );
             tpt.add(tp);
             
         }
@@ -156,6 +148,33 @@ public class HWPFOldDocument extends HWP
         }
     }
 
+    /**
+     *
+     * @param guessedCharset charset that we think this is
+     * @return a new text piece
+     * @throws IllegalStateException if the length isn't correct
+     */
+    private TextPiece buildTextPiece(Charset guessedCharset) throws 
IllegalStateException {
+        PieceDescriptor pd = new PieceDescriptor(new byte[] {0,0, 0,0,0,127, 
0,0}, 0, guessedCharset);
+        pd.setFilePosition(_fib.getFibBase().getFcMin());
+
+        // Generate a single Text Piece Table, with a single Text Piece
+        //  which covers all the (8 bit only) text in the file
+        tpt = new OldTextPieceTable();
+        byte[] textData = new 
byte[_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin()];
+        System.arraycopy(_mainStream, _fib.getFibBase().getFcMin(), textData, 
0, textData.length);
+
+        int numChars = textData.length;
+        if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
+            numChars /= 2;
+        }
+
+        return new TextPiece(
+                0, numChars, textData, pd
+        );
+
+    }
+
 
     /**
      * Take the first codepage that is not default, ansi or symbol.



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to