mmidy       02/05/09 15:16:42

  Modified:    java/src/org/apache/xalan/serialize Encodings.java
  Added:       java/src/org/apache/xalan/serialize Encodings.properties
  Log:
  Bugzilla 6356: Add support for "windows-1251" and use a properties file to 
store all the supported encodings. Patch from Sergey Ushakov.
  
  Revision  Changes    Path
  1.9       +79 -92    
xml-xalan/java/src/org/apache/xalan/serialize/Encodings.java
  
  Index: Encodings.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xalan/serialize/Encodings.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- Encodings.java    4 Nov 2001 05:22:27 -0000       1.8
  +++ Encodings.java    9 May 2002 22:16:42 -0000       1.9
  @@ -56,18 +56,24 @@
    */
   package org.apache.xalan.serialize;
   
  +import java.io.InputStream;
   import java.io.Writer;
   import java.io.OutputStream;
   import java.io.OutputStreamWriter;
   import java.io.UnsupportedEncodingException;
   
  +import java.net.URL;
  +
  +import java.util.Enumeration;
  +import java.util.Properties;
  +
   /**
    * Provides information about encodings. Depends on the Java runtime
    * to provides writers for the different encodings, but can be used
    * to override encoding names and provide the last printable character
    * for each encoding.
    *
  - * @version $Revision: 1.8 $ $Date: 2001/11/04 05:22:27 $
  + * @version $Revision: 1.9 $ $Date: 2002/05/09 22:16:42 $
    * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
    */
   public class Encodings extends Object
  @@ -77,6 +83,12 @@
      * The last printable character for unknown encodings.
      */
     static final int m_defaultLastPrintable = 0x7F;
  +  
  +  /**
  +   * Standard filename for properties file with encodings data.
  +   */
  +  static final String ENCODINGS_FILE = "Encodings.properties";
  +
   
     /**
      * Returns a writer for the specified encoding based on
  @@ -313,98 +325,73 @@
   
   
     /**
  -   * Constructs a list of all the supported encodings.
  +   * Load a list of all the supported encodings.
  +   *
  +   * System property "org.apache.xalan.serialize.encodings"
  +   * formatted using URL syntax may define an external encodings list.
  +   * Thanks to Sergey Ushakov for the code contribution!
      */
  -  private static final EncodingInfo[] _encodings = new EncodingInfo[]
  +  private static EncodingInfo[] loadEncodingInfo()
     {
  +    URL url = null;
  +    try {
  +      String urlString =
  +        System.getProperty("org.apache.xalan.serialize.encodings", "");
  +      if (urlString == null || urlString.length() == 0) {
  +        ClassLoader cl = Encodings.class.getClassLoader();
  +        if (cl == null) {
  +          url = ClassLoader.getSystemResource("org/apache/xalan/serialize/" +
  +                                              ENCODINGS_FILE);
  +        } else {
  +          url = cl.getResource(ENCODINGS_FILE);
  +          if (url == null)
  +            url = 
ClassLoader.getSystemResource("org/apache/xalan/serialize/" +
  +                                              ENCODINGS_FILE);
  +        }
  +      } else {
  +        url = new URL (urlString);
  +      }
  +
  +      // ? consider whether we should allow an exception here if resource
  +      // is not found or should we return an empty array ?
  +      InputStream is = url.openStream();
  +      Properties props = new Properties ();
  +      props.load(is);
  +      is.close();
  +
  +      int totalEntries = props.size();
  +      EncodingInfo[] ret = new EncodingInfo[totalEntries];
  +      Enumeration keys = props.keys();
  +      for (int i = 0; i < totalEntries; ++i) {
  +        String mimeName = (String) keys.nextElement();
  +        String val = props.getProperty(mimeName);
  +        int pos = val.indexOf(' ');
  +        String javaName;
  +        int lastPrintable;
  +        if (pos < 0)
  +        {
  +          //throw new Exception
  +          //  ("Last printable character not defined for encoding " +
  +          //   mimeName + " (" + val + ")");
  +          javaName = val;
  +          lastPrintable = 0x00FF;
  +        }
  +        else
  +        {
  +          javaName = val.substring(0, pos);
  +          lastPrintable =
  +                         
Integer.decode(val.substring(pos).trim()).intValue();
  +        }
  +        ret [i] = new EncodingInfo (mimeName, javaName, lastPrintable);
  +      }
  +      return ret;
  +    } catch (java.net.MalformedURLException mue) {
  +      throw new org.apache.xml.utils.WrappedRuntimeException(mue);
  +    }
  +    catch (java.io.IOException ioe) {
  +      throw new org.apache.xml.utils.WrappedRuntimeException(ioe);
  +    }
  +  }
   
  -    //    <preferred MIME name>, <Java encoding name>
  -    // new EncodingInfo( "ISO 8859-1", "CP1252"); // Close enough, I guess
  -    new EncodingInfo("WINDOWS-1250", "Cp1250", 0x00FF),  // Peter Smolik
  -    // Patch attributed to [EMAIL PROTECTED] (H�vard Wigtil)
  -    new EncodingInfo("WINDOWS-1252", "Cp1252", 0x00FF),
  -    new EncodingInfo("UTF-8", "UTF8", 0xFFFF),
  -    new EncodingInfo("US-ASCII", "ISO8859_1", 0x7F),
  -    new EncodingInfo("ISO-8859-1", "ISO8859_1", 0x00FF),
  -    // Patch attributed to [EMAIL PROTECTED] (H�vard Wigtil)
  -    new EncodingInfo("ISO-8859-1", "ISO8859-1", 0x00FF),
  -    new EncodingInfo("ISO-8859-2", "ISO8859_2", 0x00FF),
  -    // I'm going to apply "ISO8859-X" variant to all these, to be safe.
  -    new EncodingInfo("ISO-8859-2", "ISO8859-2", 0x00FF),
  -    new EncodingInfo("ISO-8859-3", "ISO8859_3", 0x00FF),
  -    new EncodingInfo("ISO-8859-3", "ISO8859-3", 0x00FF),
  -    new EncodingInfo("ISO-8859-4", "ISO8859_4", 0x00FF),
  -    new EncodingInfo("ISO-8859-4", "ISO8859-4", 0x00FF),
  -    new EncodingInfo("ISO-8859-5", "ISO8859_5", 0x00FF),
  -    new EncodingInfo("ISO-8859-5", "ISO8859-5", 0x00FF),
  -    new EncodingInfo("ISO-8859-6", "ISO8859_6", 0x00FF),
  -    new EncodingInfo("ISO-8859-6", "ISO8859-6", 0x00FF),
  -    new EncodingInfo("ISO-8859-7", "ISO8859_7", 0x00FF),
  -    new EncodingInfo("ISO-8859-7", "ISO8859-7", 0x00FF),
  -    new EncodingInfo("ISO-8859-8", "ISO8859_8", 0x00FF),
  -    new EncodingInfo("ISO-8859-8", "ISO8859-8", 0x00FF),
  -    new EncodingInfo("ISO-8859-9", "ISO8859_9", 0x00FF),
  -    new EncodingInfo("ISO-8859-9", "ISO8859-9", 0x00FF),
  -    new EncodingInfo("US-ASCII", "8859_1", 0x00FF),  // ?
  -    new EncodingInfo("ISO-8859-1", "8859_1", 0x00FF),
  -    new EncodingInfo("ISO-8859-2", "8859_2", 0x00FF),
  -    new EncodingInfo("ISO-8859-3", "8859_3", 0x00FF),
  -    new EncodingInfo("ISO-8859-4", "8859_4", 0x00FF),
  -    new EncodingInfo("ISO-8859-5", "8859_5", 0x00FF),
  -    new EncodingInfo("ISO-8859-6", "8859_6", 0x00FF),
  -    new EncodingInfo("ISO-8859-7", "8859_7", 0x00FF),
  -    new EncodingInfo("ISO-8859-8", "8859_8", 0x00FF),
  -    new EncodingInfo("ISO-8859-9", "8859_9", 0x00FF),
  -    new EncodingInfo("ISO-8859-1", "8859-1", 0x00FF),
  -    new EncodingInfo("ISO-8859-2", "8859-2", 0x00FF),
  -    new EncodingInfo("ISO-8859-3", "8859-3", 0x00FF),
  -    new EncodingInfo("ISO-8859-4", "8859-4", 0x00FF),
  -    new EncodingInfo("ISO-8859-5", "8859-5", 0x00FF),
  -    new EncodingInfo("ISO-8859-6", "8859-6", 0x00FF),
  -    new EncodingInfo("ISO-8859-7", "8859-7", 0x00FF),
  -    new EncodingInfo("ISO-8859-8", "8859-8", 0x00FF),
  -    new EncodingInfo("ISO-8859-9", "8859-9", 0x00FF),
  -    new EncodingInfo("ISO-2022-JP", "JIS", 0xFFFF),
  -    new EncodingInfo("SHIFT_JIS", "SJIS", 0xFFFF),
  -    new EncodingInfo("EUC-JP", "EUC_JP", 0xFFFF),
  -    new EncodingInfo("EUC-KR", "EUC_KR", 0xFFFF),
  -    new EncodingInfo("EUC-CN", "EUC_CN", 0xFFFF),
  -    new EncodingInfo("EUC-TW", "EUC_TW", 0xFFFF),
  -    new EncodingInfo("GB2312", "EUC_CN", 0xFFFF),
  -    new EncodingInfo("EUC-JP", "EUC-JP", 0xFFFF),
  -    new EncodingInfo("EUC-KR", "EUC-KR", 0xFFFF),
  -    new EncodingInfo("EUC-CN", "EUC-CN", 0xFFFF),
  -    new EncodingInfo("EUC-TW", "EUC-TW", 0xFFFF),
  -    new EncodingInfo("GB2312", "EUC-CN", 0xFFFF),
  -    new EncodingInfo("GB2312", "GB2312", 0xFFFF),
  -    new EncodingInfo("BIG5", "Big5", 0xFFFF),
  -    new EncodingInfo("EUC-JP", "EUCJIS", 0xFFFF),
  -    new EncodingInfo("EUC-KR", "KSC5601", 0xFFFF),
  -    new EncodingInfo("ISO-2022-KR", "ISO2022KR", 0xFFFF),
  -    new EncodingInfo("KOI8-R", "KOI8_R", 0xFFFF),
  -    new EncodingInfo("EBCDIC-CP-US", "Cp037", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-CA", "Cp037", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-NL", "Cp037", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-DK", "Cp277", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-NO", "Cp277", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-FI", "Cp278", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-SE", "Cp278", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-IT", "Cp280", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-ES", "Cp284", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-GB", "Cp285", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-FR", "Cp297", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-AR1", "Cp420", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-HE", "Cp424", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-CH", "Cp500", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-ROECE", "Cp870", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-YU", "Cp870", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-IS", "Cp871", 0x00FF),
  -    new EncodingInfo("EBCDIC-CP-AR2", "Cp918", 0x00FF),
  -    new EncodingInfo("MacRoman", "MacTEC", 0xFF),
  -    new EncodingInfo("ASCII", "ASCII", 0x7F),
  -    new EncodingInfo("ISO-Latin-1", "ASCII", 0xFF),
  -    new EncodingInfo("UTF-8", "UTF8", 0xFFFF),
  -    new EncodingInfo("UNICODE", "Unicode", 0xFFFF),
  -    new EncodingInfo("UTF-16", "Unicode", 0xFFFF)
  -  };
  +  private static final EncodingInfo[] _encodings = loadEncodingInfo();
   }
  
  
  
  1.1                  
xml-xalan/java/src/org/apache/xalan/serialize/Encodings.properties
  
  Index: Encodings.properties
  ===================================================================
  # <PREFERRED name MIME>, <JAVA name encoding>
  # Peter Smolik 
  WINDOWS-1250 Cp1250 0x00FF 
  # Patch attributed to [EMAIL PROTECTED] (H�vard Wigtil) 
  WINDOWS-1251 Cp1251 0x00FF 
  WINDOWS-1252 Cp1252 0x00FF 
  US-ASCII ISO8859_1 0x007F 
  ISO-8859-1 ISO8859_1 0x00FF 
  # Patch attributed to [EMAIL PROTECTED] (H�vard Wigtil) 
  ISO-8859-1 ISO8859-1 0x00FF 
  ISO-8859-2 ISO8859_2 0x00FF 
  # I'm going to apply "ISO8859-X" variant to all these, to be safe. 
  ISO-8859-2 ISO8859-2 0x00FF 
  ISO-8859-3 ISO8859_3 0x00FF 
  ISO-8859-3 ISO8859-3 0x00FF 
  ISO-8859-4 ISO8859_4 0x00FF 
  ISO-8859-4 ISO8859-4 0x00FF 
  ISO-8859-5 ISO8859_5 0x00FF 
  ISO-8859-5 ISO8859-5 0x00FF 
  ISO-8859-6 ISO8859_6 0x00FF 
  ISO-8859-6 ISO8859-6 0x00FF 
  ISO-8859-7 ISO8859_7 0x00FF 
  ISO-8859-7 ISO8859-7 0x00FF 
  ISO-8859-8 ISO8859_8 0x00FF 
  ISO-8859-8 ISO8859-8 0x00FF 
  ISO-8859-9 ISO8859_9 0x00FF 
  ISO-8859-9 ISO8859-9 0x00FF 
  # # ? 
  US-ASCII 8859_1 0x00FF 
  ISO-8859-1 8859_1 0x00FF 
  ISO-8859-2 8859_2 0x00FF 
  ISO-8859-3 8859_3 0x00FF 
  ISO-8859-4 8859_4 0x00FF 
  ISO-8859-5 8859_5 0x00FF 
  ISO-8859-6 8859_6 0x00FF 
  ISO-8859-7 8859_7 0x00FF 
  ISO-8859-8 8859_8 0x00FF 
  ISO-8859-9 8859_9 0x00FF 
  ISO-8859-1 8859-1 0x00FF 
  ISO-8859-2 8859-2 0x00FF 
  ISO-8859-3 8859-3 0x00FF 
  ISO-8859-4 8859-4 0x00FF 
  ISO-8859-5 8859-5 0x00FF 
  ISO-8859-6 8859-6 0x00FF 
  ISO-8859-7 8859-7 0x00FF 
  ISO-8859-8 8859-8 0x00FF 
  ISO-8859-9 8859-9 0x00FF 
  ISO-2022-JP JIS 0xFFFF 
  ISO-2022-KR ISO2022KR 0xFFFF 
  SHIFT_JIS SJIS 0xFFFF 
  EUC-JP EUC_JP 0xFFFF 
  EUC-KR EUC_KR 0xFFFF 
  EUC-CN EUC_CN 0xFFFF 
  EUC-TW EUC_TW 0xFFFF 
  GB2312 EUC_CN 0xFFFF 
  EUC-JP EUC-JP 0xFFFF 
  EUC-KR EUC-KR 0xFFFF 
  EUC-CN EUC-CN 0xFFFF 
  EUC-TW EUC-TW 0xFFFF 
  GB2312 EUC-CN 0xFFFF 
  GB2312 GB2312 0xFFFF 
  BIG5 Big5 0xFFFF 
  EUC-JP EUCJIS 0xFFFF 
  EUC-KR KSC5601 0xFFFF 
  KOI8-R KOI8_R 0xFFFF 
  EBCDIC-CP-US Cp037 0x00FF 
  EBCDIC-CP-CA Cp037 0x00FF 
  EBCDIC-CP-NL Cp037 0x00FF 
  EBCDIC-CP-DK Cp277 0x00FF 
  EBCDIC-CP-NO Cp277 0x00FF 
  EBCDIC-CP-FI Cp278 0x00FF 
  EBCDIC-CP-SE Cp278 0x00FF 
  EBCDIC-CP-IT Cp280 0x00FF 
  EBCDIC-CP-ES Cp284 0x00FF 
  EBCDIC-CP-GB Cp285 0x00FF 
  EBCDIC-CP-FR Cp297 0x00FF 
  EBCDIC-CP-AR1 Cp420 0x00FF 
  EBCDIC-CP-HE Cp424 0x00FF 
  EBCDIC-CP-CH Cp500 0x00FF 
  EBCDIC-CP-ROECE Cp870 0x00FF 
  EBCDIC-CP-YU Cp870 0x00FF 
  EBCDIC-CP-IS Cp871 0x00FF 
  EBCDIC-CP-AR2 Cp918 0x00FF 
  MacRoman MacTEC 0x00FF 
  ASCII ASCII 0x007F 
  ISO-Latin-1 ASCII 0x00FF 
  UNICODE Unicode 0xFFFF 
  UTF-8 UTF8 0xFFFF 
  UTF-16 Unicode 0xFFFF
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to