Author: leleueri
Date: Tue Jul 10 19:06:08 2012
New Revision: 1359845

URL: http://svn.apache.org/viewvc?rev=1359845&view=rev
Log:
PDFBOX-1357 - Missing character encoding in MacRoman & WinAnsi 

Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java?rev=1359845&r1=1359844&r2=1359845&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
 Tue Jul 10 19:06:08 2012
@@ -16,6 +16,8 @@
  */
 package org.apache.pdfbox.encoding;
 
+import java.io.IOException;
+
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSName;
 
@@ -248,6 +250,19 @@ public class MacRomanEncoding extends En
         addCharacterEncoding( 0172, "z" );
         addCharacterEncoding( 060, "zero" );
     }
+    
+    public String getName( int code ) throws IOException
+       {
+               if (!codeToName.containsKey(code) && code == 0312) {
+                               /*
+                                * The space character is also encoded as 0312 
in MacRoman and 0240 in WinAnsi. 
+                                * The meaning of this duplicate code is 
"nonbreaking space" but it is 
+                                * typographically the same as space. 
+                                */
+                               return "space";
+               }
+               return codeToName.get( code );
+       }
 
     /**
      * Convert this standard java object to a COS object.

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java?rev=1359845&r1=1359844&r2=1359845&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java
 Tue Jul 10 19:06:08 2012
@@ -263,13 +263,30 @@ public class WinAnsiEncoding extends Enc
        public String getName( int code ) throws IOException
        {
                if (!codeToName.containsKey(code) && code > 040) {
-                       /*
-                        * According to the PDFReference Appendix D :
-                        * In WinAnsiEncoding, all unused codes greater than 40 
map to the bullet character. 
-                        * However, only code 0225 is specifically assigned to 
the bullet character;
-                        * other codes are subject to future reassignment
-                        */
-                       return "bullet";
+                       switch (code) {
+                       case 0240:
+                               /*
+                                * The space character is also encoded as 0312 
in MacRoman and 0240 in WinAnsi. 
+                                * The meaning of this duplicate code is 
"nonbreaking space" but it is 
+                                * typographically the same as space. 
+                                */
+                               return "space";
+                       case 0255:
+                               /*
+                                * The hyphen character is also encoded as 0255 
in WinAnsi. 
+                                * The meaning of this duplicate code is "soft 
hyphen" but it is 
+                                * typographically the same as hyphen. 
+                                */
+                               return "hyphen";
+                       default:
+                               /*
+                                * According to the PDFReference Appendix D :
+                                * In WinAnsiEncoding, all unused codes greater 
than 40 map to the bullet character. 
+                                * However, only code 0225 is specifically 
assigned to the bullet character;
+                                * other codes are subject to future 
reassignment
+                                */
+                               return "bullet";
+                       }
                }
                return codeToName.get( code );
        }


Reply via email to