> > I have some issues with this patch. > > > > 1) Please don't require the driver to perform yet another round trip to > > the server to get the database encoding. You changed the following code > > which accessed the server once at connection creation: > [snip] > > 2) I don't like having test specific code in Encoding.java, specifically > > the dummy encoding 'X-UNIT-TESTS'. Ok, now I've made the changes, and added a comment about why the code for (1) should stay optimized. :) /Anders _____________________________________________________________________ A n d e r s B e n g t s s o n [EMAIL PROTECTED] Stockholm, Sweden
package org.postgresql.test.jdbc2; import junit.framework.*; import org.postgresql.core.Encoding; /** * Tests for the Encoding class. * * $Id$ */ public class EncodingTest extends TestCase { public EncodingTest(String name) { super(name); } public void testCreation() throws Exception { Encoding encoding; encoding = Encoding.forDatabaseEncoding("UNICODE"); assertEquals("UTF", encoding.name().substring(0, 3).toUpperCase()); encoding = Encoding.forDatabaseEncoding("SQL_ASCII"); assert(encoding.name().toUpperCase().indexOf("ASCII") != -1); assertEquals(Encoding.defaultEncoding(), Encoding.forDatabaseEncoding("UNKNOWN")); } public void testTransformations() throws Exception { Encoding encoding = Encoding.forDatabaseEncoding("UNICODE"); assertEquals("ab", encoding.decode(new byte[] { 97, 98 })); assertEquals(2, encoding.encode("ab").length); assertEquals(97, encoding.encode("a")[0]); assertEquals(98, encoding.encode("b")[0]); encoding = Encoding.defaultEncoding(); assertEquals("a".getBytes()[0], encoding.encode("a")[0]); assertEquals(new String(new byte[] { 97 }), encoding.decode(new byte[] { 97 })); } }
package org.postgresql.core; import java.io.UnsupportedEncodingException; import java.util.*; import java.sql.SQLException; import org.postgresql.util.*; /** * Converts to and from the character encoding used by the backend. * * $Id$ */ public class Encoding { private static final Encoding DEFAULT_ENCODING = new Encoding(null); /** * Preferred JVM encodings for backend encodings. */ private static final Hashtable encodings = new Hashtable(); static { encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" }); encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" }); encodings.put("LATIN1", new String[] { "ISO8859_1" }); encodings.put("LATIN2", new String[] { "ISO8859_2" }); encodings.put("LATIN3", new String[] { "ISO8859_3" }); encodings.put("LATIN4", new String[] { "ISO8859_4" }); encodings.put("LATIN5", new String[] { "ISO8859_5" }); encodings.put("LATIN6", new String[] { "ISO8859_6" }); encodings.put("LATIN7", new String[] { "ISO8859_7" }); encodings.put("LATIN8", new String[] { "ISO8859_8" }); encodings.put("LATIN9", new String[] { "ISO8859_9" }); encodings.put("EUC_JP", new String[] { "EUC_JP" }); encodings.put("EUC_CN", new String[] { "EUC_CN" }); encodings.put("EUC_KR", new String[] { "EUC_KR" }); encodings.put("EUC_TW", new String[] { "EUC_TW" }); encodings.put("WIN", new String[] { "Cp1252" }); // We prefer KOI8-U, since it is a superset of KOI8-R encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" }); // Database is not encoding-aware, so we'll settle with the default encoding encodings.put("UNKNOWN", new String[0]); } private final String encoding; public Encoding(String encoding) { this.encoding = encoding; } /** * Get an Encoding matching the given database encoding. */ public static Encoding forDatabaseEncoding(String databaseEncoding) { // If the backend encoding is known and there is a suitable // encoding in the JVM we use that. Otherwise we fall back // to the default encoding of the JVM. if (encodings.containsKey(databaseEncoding)) { String[] candidates = (String[]) encodings.get(databaseEncoding); for (int i = 0; i < candidates.length; i++) { if (isAvailable(candidates[i])) { return new Encoding(candidates[i]); } } } return defaultEncoding(); } /** * Name of the (JVM) encoding used. */ public String name() { return encoding; } /** * Encode a string to an array of bytes. */ public byte[] encode(String s) throws SQLException { try { if (encoding == null) { return s.getBytes(); } else { return s.getBytes(encoding); } } catch (UnsupportedEncodingException e) { throw new PSQLException("postgresql.stream.encoding", e); } } /** * Decode an array of bytes into a string. */ public String decode(byte[] encodedString, int offset, int length) throws SQLException { try { if (encoding == null) { return new String(encodedString, offset, length); } else { return new String(encodedString, offset, length, encoding); } } catch (UnsupportedEncodingException e) { throw new PSQLException("postgresql.stream.encoding", e); } } /** * Decode an array of bytes into a string. */ public String decode(byte[] encodedString) throws SQLException { return decode(encodedString, 0, encodedString.length); } /** * Get an Encoding using the default encoding for the JVM. */ public static Encoding defaultEncoding() { return DEFAULT_ENCODING; } /** * Test if an encoding is available in the JVM. */ public static boolean isAvailable(String encodingName) { try { "DUMMY".getBytes(encodingName); return true; } catch (UnsupportedEncodingException e) { return false; } } }
Index: src/interfaces/jdbc/Implementation =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/Implementation,v retrieving revision 1.2 diff -u -r1.2 Implementation --- src/interfaces/jdbc/Implementation 2001/01/18 14:50:14 1.2 +++ src/interfaces/jdbc/Implementation 2001/07/10 12:19:05 @@ -151,6 +151,7 @@ MemoryPool Interface for managing MemoryPools. Not used (yet). ObjectPool Interface for an Object Pool SimpleObjectPool Class that implements ObjectPool and used by BytePoolDim# +Encoding Character encoding logic, mainly for Connection and PG_Stream. Package org.postgresql.fastpath --------------------------- Index: src/interfaces/jdbc/org/postgresql/Connection.java =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/org/postgresql/Connection.java,v retrieving revision 1.17 diff -u -r1.17 Connection.java --- src/interfaces/jdbc/org/postgresql/Connection.java 2001/06/07 00:09:32 1.17 +++ src/interfaces/jdbc/org/postgresql/Connection.java 2001/07/10 12:19:07 @@ -8,6 +8,7 @@ import org.postgresql.fastpath.*; import org.postgresql.largeobject.*; import org.postgresql.util.*; +import org.postgresql.core.Encoding; /** * $Id: Connection.java,v 1.17 2001/06/07 00:09:32 momjian Exp $ @@ -33,11 +34,8 @@ /** * The encoding to use for this connection. - * If <b>null</b>, the encoding has not been specified by the - * user, and the default encoding for the platform should be - * used. */ - private String encoding; + private Encoding encoding = Encoding.defaultEncoding(); public boolean CONNECTION_OK = true; public boolean CONNECTION_BAD = false; @@ -168,7 +166,7 @@ // "User authentication failed" // throw new SQLException(pg_stream.ReceiveString - (receive_sbuf, 4096, getEncoding())); + (receive_sbuf, 4096, encoding)); case 'R': // Get the type of request @@ -239,7 +237,7 @@ case 'E': case 'N': throw new SQLException(pg_stream.ReceiveString - (receive_sbuf, 4096, getEncoding())); + (receive_sbuf, 4096, encoding)); default: throw new PSQLException("postgresql.con.setup"); } @@ -251,110 +249,35 @@ break; case 'E': case 'N': - throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096, getEncoding())); + throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096, +encoding)); default: throw new PSQLException("postgresql.con.setup"); } - // Originally we issued a SHOW DATESTYLE statement to find the databases default - // datestyle. However, this caused some problems with timestamps, so in 6.5, we - // went the way of ODBC, and set the connection to ISO. - // - // This may cause some clients to break when they assume anything other than ISO, - // but then - they should be using the proper methods ;-) - // - // We also ask the DB for certain properties (i.e. DatabaseEncoding at this time) - // firstWarning = null; - java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; " + - "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else getdatabaseencoding() end"); + String dbEncoding; - String dbEncoding = null; - //retrieve DB properties - if(initrset.next()) { - - //handle DatabaseEncoding - dbEncoding = initrset.getString(1); - //convert from the PostgreSQL name to the Java name - if (dbEncoding.equals("SQL_ASCII")) { - dbEncoding = "ASCII"; - } else if (dbEncoding.equals("UNICODE")) { - dbEncoding = "UTF8"; - } else if (dbEncoding.equals("LATIN1")) { - dbEncoding = "ISO8859_1"; - } else if (dbEncoding.equals("LATIN2")) { - dbEncoding = "ISO8859_2"; - } else if (dbEncoding.equals("LATIN3")) { - dbEncoding = "ISO8859_3"; - } else if (dbEncoding.equals("LATIN4")) { - dbEncoding = "ISO8859_4"; - } else if (dbEncoding.equals("LATIN5")) { - dbEncoding = "ISO8859_5"; - } else if (dbEncoding.equals("LATIN6")) { - dbEncoding = "ISO8859_6"; - } else if (dbEncoding.equals("LATIN7")) { - dbEncoding = "ISO8859_7"; - } else if (dbEncoding.equals("LATIN8")) { - dbEncoding = "ISO8859_8"; - } else if (dbEncoding.equals("LATIN9")) { - dbEncoding = "ISO8859_9"; - } else if (dbEncoding.equals("EUC_JP")) { - dbEncoding = "EUC_JP"; - } else if (dbEncoding.equals("EUC_CN")) { - dbEncoding = "EUC_CN"; - } else if (dbEncoding.equals("EUC_KR")) { - dbEncoding = "EUC_KR"; - } else if (dbEncoding.equals("EUC_TW")) { - dbEncoding = "EUC_TW"; - } else if (dbEncoding.equals("KOI8")) { - // try first if KOI8_U is present, it's a superset of KOI8_R - try { - dbEncoding = "KOI8_U"; - "test".getBytes(dbEncoding); - } - catch(UnsupportedEncodingException uee) { - // well, KOI8_U is still not in standard JDK, falling back to KOI8_R :( - dbEncoding = "KOI8_R"; - } + // Set datestyle and fetch db encoding in a single call, to avoid making + // more than one round trip to the backend during connection startup. - } else if (dbEncoding.equals("WIN")) { - dbEncoding = "Cp1252"; - } else if (dbEncoding.equals("UNKNOWN")) { - //This isn't a multibyte database so we don't have an encoding to use - //We leave dbEncoding null which will cause the default encoding for the - //JVM to be used - dbEncoding = null; - } else { - dbEncoding = null; - } + java.sql.ResultSet resultSet = + ExecSQL("set datestyle to 'ISO'; " + + "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then +'UNKNOWN' else getdatabaseencoding() end"); + if (! resultSet.next()) { + throw new PSQLException("postgresql.con.failed", "failed getting backend +encoding"); } - + dbEncoding = resultSet.getString(1); - //Set the encoding for this connection - //Since the encoding could be specified or obtained from the DB we use the - //following order: - // 1. passed as a property - // 2. value from DB if supported by current JVM - // 3. default for JVM (leave encoding null) - String passedEncoding = info.getProperty("charSet"); // could be null - + String passedEncoding = info.getProperty("charSet"); if (passedEncoding != null) { - encoding = passedEncoding; + if (Encoding.isAvailable(passedEncoding)) { + encoding = new Encoding(passedEncoding); + } else { + encoding = Encoding.defaultEncoding(); + } } else { - if (dbEncoding != null) { - //test DB encoding - try { - "TEST".getBytes(dbEncoding); - //no error the encoding is supported by the current JVM - encoding = dbEncoding; - } catch (UnsupportedEncodingException uee) { - //dbEncoding is not supported by the current JVM - encoding = null; - } - } else { - encoding = null; - } + encoding = Encoding.forDatabaseEncoding(dbEncoding); } // Initialise object handling @@ -454,23 +377,8 @@ int update_count = 1; int insert_oid = 0; SQLException final_error = null; - - // Commented out as the backend can now handle queries - // larger than 8K. Peter June 6 2000 - //if (sql.length() > 8192) - //throw new PSQLException("postgresql.con.toolong",sql); - - if (getEncoding() == null) - buf = sql.getBytes(); - else { - try { - buf = sql.getBytes(getEncoding()); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.con.encoding", - unse); - } - } + buf = encoding.encode(sql); try { pg_stream.SendChar('Q'); @@ -491,7 +399,7 @@ { case 'A': // Asynchronous Notify pid = pg_stream.ReceiveInteger(4); - msg = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); + msg = pg_stream.ReceiveString(receive_sbuf,8192,encoding); break; case 'B': // Binary Data Transfer if (fields == null) @@ -502,7 +410,7 @@ tuples.addElement(tup); break; case 'C': // Command Status - recv_status = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); + recv_status = +pg_stream.ReceiveString(receive_sbuf,8192,encoding); // Now handle the update count correctly. if(recv_status.startsWith("INSERT") || recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") || recv_status.startsWith("MOVE")) { @@ -544,7 +452,7 @@ tuples.addElement(tup); break; case 'E': // Error Message - msg = pg_stream.ReceiveString(receive_sbuf,4096,getEncoding()); + msg = pg_stream.ReceiveString(receive_sbuf,4096,encoding); final_error = new SQLException(msg); hfr = true; break; @@ -559,10 +467,10 @@ hfr = true; break; case 'N': // Error Notification - addWarning(pg_stream.ReceiveString(receive_sbuf,4096,getEncoding())); + +addWarning(pg_stream.ReceiveString(receive_sbuf,4096,encoding)); break; case 'P': // Portal Name - String pname = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); + String pname = +pg_stream.ReceiveString(receive_sbuf,8192,encoding); break; case 'T': // MetaData Field Description if (fields != null) @@ -595,7 +503,7 @@ for (i = 0 ; i < nf ; ++i) { - String typname = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); + String typname = pg_stream.ReceiveString(receive_sbuf,8192,encoding); int typid = pg_stream.ReceiveIntegerR(4); int typlen = pg_stream.ReceiveIntegerR(2); int typmod = pg_stream.ReceiveIntegerR(4); @@ -665,7 +573,7 @@ * default encoding. */ public String getEncoding() throws SQLException { - return encoding; + return encoding.name(); } /** Index: src/interfaces/jdbc/org/postgresql/PG_Stream.java =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/org/postgresql/PG_Stream.java,v retrieving revision 1.8 diff -u -r1.8 PG_Stream.java --- src/interfaces/jdbc/org/postgresql/PG_Stream.java 2001/07/06 18:01:22 1.8 +++ src/interfaces/jdbc/org/postgresql/PG_Stream.java 2001/07/10 12:19:08 @@ -10,7 +10,7 @@ import org.postgresql.util.*; /** - * @version 1.0 15-APR-1997 + * $Id$ * * This class is used by Connection & PGlobj for communicating with the * backend. @@ -211,7 +211,7 @@ public String ReceiveString(int maxsiz) throws SQLException { byte[] rst = bytePoolDim1.allocByte(maxsiz); - return ReceiveString(rst, maxsiz, null); + return ReceiveString(rst, maxsiz, Encoding.defaultEncoding()); } /** @@ -225,7 +225,7 @@ * @return string from back end * @exception SQLException if an I/O error occurs */ - public String ReceiveString(int maxsiz, String encoding) throws SQLException + public String ReceiveString(int maxsiz, Encoding encoding) throws SQLException { byte[] rst = bytePoolDim1.allocByte(maxsiz); return ReceiveString(rst, maxsiz, encoding); @@ -243,9 +243,12 @@ * @return string from back end * @exception SQLException if an I/O error occurs */ - public String ReceiveString(byte rst[], int maxsiz, String encoding) + public String ReceiveString(byte rst[], int maxsiz, Encoding encoding) throws SQLException { + if (encoding == null) + encoding = Encoding.defaultEncoding(); + int s = 0; try @@ -265,18 +268,8 @@ throw new PSQLException("postgresql.stream.toomuch"); } catch (IOException e) { throw new PSQLException("postgresql.stream.ioerror",e); - } - String v = null; - if (encoding == null) - v = new String(rst, 0, s); - else { - try { - v = new String(rst, 0, s, encoding); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.stream.encoding", unse); - } } - return v; + return encoding.decode(rst, 0, s); } /** Index: src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java,v retrieving revision 1.3 diff -u -r1.3 JDBC2Tests.java --- src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java 2001/02/14 17:45:17 1.3 +++ src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java 2001/07/10 12:19:08 @@ -195,6 +195,7 @@ suite.addTestSuite(DriverTest.class); suite.addTestSuite(ConnectionTest.class); suite.addTestSuite(DatabaseMetaDataTest.class); + suite.addTestSuite(EncodingTest.class); // Connectivity/Protocols
---------------------------(end of broadcast)--------------------------- TIP 2: you can get off all lists at once with the unregister command (send "unregister YourEmailAddressHere" to [EMAIL PROTECTED])