Hello,
With this patch I've done an attempt to make the handling of character
encoding in the JDBC driver a little clearer.
* Cleans up the logic to select a JVM encoding for a backend encoding.
* Makes the connection setup code easier to read.
* Gathers character encoding and decoding in a single place.
* Adds unit tests for encoding.
* Introduces a new class, org.postgresql.core.Encoding, and the
corresponding unit test class, org.postgresql.test.jdbc2.EncodingTest.
It shouldn't change the external behavior of the driver.
/Anders
_____________________________________________________________________
A n d e r s B e n g t s s o n [EMAIL PROTECTED]
Stockholm, Sweden
package org.postgresql.test.jdbc2;
import junit.framework.*;
import org.postgresql.core.Encoding;
/**
* Tests for the Encoding class.
*
* $Id$
*/
public class EncodingTest extends TestCase {
public EncodingTest(String name) {
super(name);
}
public void testCreation() throws Exception {
Encoding encoding;
encoding = Encoding.forDatabaseEncoding("UNICODE");
assertEquals("UTF", encoding.name().substring(0, 3).toUpperCase());
encoding = Encoding.forDatabaseEncoding("SQL_ASCII");
assert(encoding.name().toUpperCase().indexOf("ASCII") != -1);
encoding = Encoding.forDatabaseEncoding("X-UNIT-TESTS");
assertEquals("ascii", encoding.name());
}
public void testTransformations() throws Exception {
Encoding encoding = Encoding.forDatabaseEncoding("UNICODE");
assertEquals("ab", encoding.decode(new byte[] { 97, 98 }));
assertEquals(2, encoding.encode("ab").length);
assertEquals(97, encoding.encode("a")[0]);
assertEquals(98, encoding.encode("b")[0]);
encoding = Encoding.defaultEncoding();
assertEquals("a".getBytes()[0], encoding.encode("a")[0]);
assertEquals(new String(new byte[] { 97 }),
encoding.decode(new byte[] { 97 }));
}
}
package org.postgresql.core;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.sql.SQLException;
import org.postgresql.util.*;
/**
* Converts to and from the character encoding used by the backend.
*
* $Id$
*/
public class Encoding {
private static final Encoding DEFAULT_ENCODING = new Encoding(null);
/**
* Preferred JVM encodings for backend encodings.
*/
private static final Hashtable encodings = new Hashtable();
static {
encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" });
encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" });
encodings.put("LATIN1", new String[] { "ISO8859_1" });
encodings.put("LATIN2", new String[] { "ISO8859_2" });
encodings.put("LATIN3", new String[] { "ISO8859_3" });
encodings.put("LATIN4", new String[] { "ISO8859_4" });
encodings.put("LATIN5", new String[] { "ISO8859_5" });
encodings.put("LATIN6", new String[] { "ISO8859_6" });
encodings.put("LATIN7", new String[] { "ISO8859_7" });
encodings.put("LATIN8", new String[] { "ISO8859_8" });
encodings.put("LATIN9", new String[] { "ISO8859_9" });
encodings.put("EUC_JP", new String[] { "EUC_JP" });
encodings.put("EUC_CN", new String[] { "EUC_CN" });
encodings.put("EUC_KR", new String[] { "EUC_KR" });
encodings.put("EUC_TW", new String[] { "EUC_TW" });
encodings.put("WIN", new String[] { "Cp1252" });
// We prefer KOI8-U, since it is a superset of KOI8-R
encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" });
// Database is not encoding-aware, so we'll settle with the default encoding
encodings.put("UNKNOWN", new String[0]);
// Data for unit tests
encodings.put("X-UNIT-TESTS", new String[] { "dummy1", "dummy2", "ascii" });
}
private final String encoding;
public Encoding(String encoding) {
this.encoding = encoding;
}
/**
* Get an Encoding matching the given database encoding.
*/
public static Encoding forDatabaseEncoding(String databaseEncoding) {
// If the backend encoding is known and there is a suitable
// encoding in the JVM we use that. Otherwise we fall back
// to the default encoding of the JVM.
if (encodings.containsKey(databaseEncoding)) {
String[] candidates = (String[]) encodings.get(databaseEncoding);
for (int i = 0; i < candidates.length; i++) {
if (isAvailable(candidates[i])) {
return new Encoding(candidates[i]);
}
}
}
return defaultEncoding();
}
/**
* Name of the (JVM) encoding used.
*/
public String name() {
return encoding;
}
/**
* Encode a string to an array of bytes.
*/
public byte[] encode(String s) throws SQLException {
try {
if (encoding == null) {
return s.getBytes();
} else {
return s.getBytes(encoding);
}
} catch (UnsupportedEncodingException e) {
throw new PSQLException("postgresql.stream.encoding", e);
}
}
/**
* Decode an array of bytes into a string.
*/
public String decode(byte[] encodedString, int offset, int length) throws
SQLException {
try {
if (encoding == null) {
return new String(encodedString, offset, length);
} else {
return new String(encodedString, offset, length, encoding);
}
} catch (UnsupportedEncodingException e) {
throw new PSQLException("postgresql.stream.encoding", e);
}
}
/**
* Decode an array of bytes into a string.
*/
public String decode(byte[] encodedString) throws SQLException {
return decode(encodedString, 0, encodedString.length);
}
/**
* Get an Encoding using the default encoding for the JVM.
*/
public static Encoding defaultEncoding() {
return DEFAULT_ENCODING;
}
/**
* Test if an encoding is available in the JVM.
*/
public static boolean isAvailable(String encodingName) {
try {
"DUMMY".getBytes(encodingName);
return true;
} catch (UnsupportedEncodingException e) {
return false;
}
}
}
Index: src/interfaces/jdbc/Implementation
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/Implementation,v
retrieving revision 1.2
diff -u -r1.2 Implementation
--- src/interfaces/jdbc/Implementation 2001/01/18 14:50:14 1.2
+++ src/interfaces/jdbc/Implementation 2001/07/08 15:00:57
@@ -151,6 +151,7 @@
MemoryPool Interface for managing MemoryPools. Not used (yet).
ObjectPool Interface for an Object Pool
SimpleObjectPool Class that implements ObjectPool and used by BytePoolDim#
+Encoding Character encoding logic, mainly for Connection and PG_Stream.
Package org.postgresql.fastpath
---------------------------
Index: src/interfaces/jdbc/org/postgresql/Connection.java
===================================================================
RCS file:
/home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/org/postgresql/Connection.java,v
retrieving revision 1.17
diff -u -r1.17 Connection.java
--- src/interfaces/jdbc/org/postgresql/Connection.java 2001/06/07 00:09:32 1.17
+++ src/interfaces/jdbc/org/postgresql/Connection.java 2001/07/08 15:06:26
@@ -8,6 +8,7 @@
import org.postgresql.fastpath.*;
import org.postgresql.largeobject.*;
import org.postgresql.util.*;
+import org.postgresql.core.Encoding;
/**
* $Id: Connection.java,v 1.17 2001/06/07 00:09:32 momjian Exp $
@@ -33,11 +34,8 @@
/**
* The encoding to use for this connection.
- * If <b>null</b>, the encoding has not been specified by the
- * user, and the default encoding for the platform should be
- * used.
*/
- private String encoding;
+ private Encoding encoding = Encoding.defaultEncoding();
public boolean CONNECTION_OK = true;
public boolean CONNECTION_BAD = false;
@@ -168,7 +166,7 @@
// "User authentication failed"
//
throw new SQLException(pg_stream.ReceiveString
- (receive_sbuf, 4096, getEncoding()));
+ (receive_sbuf, 4096, encoding));
case 'R':
// Get the type of request
@@ -239,7 +237,7 @@
case 'E':
case 'N':
throw new SQLException(pg_stream.ReceiveString
- (receive_sbuf, 4096, getEncoding()));
+ (receive_sbuf, 4096, encoding));
default:
throw new PSQLException("postgresql.con.setup");
}
@@ -251,110 +249,28 @@
break;
case 'E':
case 'N':
- throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096,
getEncoding()));
+ throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096,
+encoding));
default:
throw new PSQLException("postgresql.con.setup");
}
- // Originally we issued a SHOW DATESTYLE statement to find the databases
default
- // datestyle. However, this caused some problems with timestamps, so in 6.5, we
- // went the way of ODBC, and set the connection to ISO.
- //
- // This may cause some clients to break when they assume anything other than
ISO,
- // but then - they should be using the proper methods ;-)
- //
- // We also ask the DB for certain properties (i.e. DatabaseEncoding at this
time)
- //
firstWarning = null;
- java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; " +
- "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else
getdatabaseencoding() end");
+ ExecSQL("set datestyle to 'ISO'");
- String dbEncoding = null;
- //retrieve DB properties
- if(initrset.next()) {
-
- //handle DatabaseEncoding
- dbEncoding = initrset.getString(1);
- //convert from the PostgreSQL name to the Java name
- if (dbEncoding.equals("SQL_ASCII")) {
- dbEncoding = "ASCII";
- } else if (dbEncoding.equals("UNICODE")) {
- dbEncoding = "UTF8";
- } else if (dbEncoding.equals("LATIN1")) {
- dbEncoding = "ISO8859_1";
- } else if (dbEncoding.equals("LATIN2")) {
- dbEncoding = "ISO8859_2";
- } else if (dbEncoding.equals("LATIN3")) {
- dbEncoding = "ISO8859_3";
- } else if (dbEncoding.equals("LATIN4")) {
- dbEncoding = "ISO8859_4";
- } else if (dbEncoding.equals("LATIN5")) {
- dbEncoding = "ISO8859_5";
- } else if (dbEncoding.equals("LATIN6")) {
- dbEncoding = "ISO8859_6";
- } else if (dbEncoding.equals("LATIN7")) {
- dbEncoding = "ISO8859_7";
- } else if (dbEncoding.equals("LATIN8")) {
- dbEncoding = "ISO8859_8";
- } else if (dbEncoding.equals("LATIN9")) {
- dbEncoding = "ISO8859_9";
- } else if (dbEncoding.equals("EUC_JP")) {
- dbEncoding = "EUC_JP";
- } else if (dbEncoding.equals("EUC_CN")) {
- dbEncoding = "EUC_CN";
- } else if (dbEncoding.equals("EUC_KR")) {
- dbEncoding = "EUC_KR";
- } else if (dbEncoding.equals("EUC_TW")) {
- dbEncoding = "EUC_TW";
- } else if (dbEncoding.equals("KOI8")) {
- // try first if KOI8_U is present, it's a superset of KOI8_R
- try {
- dbEncoding = "KOI8_U";
- "test".getBytes(dbEncoding);
- }
- catch(UnsupportedEncodingException uee) {
- // well, KOI8_U is still not in standard JDK, falling back to KOI8_R :(
- dbEncoding = "KOI8_R";
- }
-
- } else if (dbEncoding.equals("WIN")) {
- dbEncoding = "Cp1252";
- } else if (dbEncoding.equals("UNKNOWN")) {
- //This isn't a multibyte database so we don't have an encoding to use
- //We leave dbEncoding null which will cause the default encoding for the
- //JVM to be used
- dbEncoding = null;
- } else {
- dbEncoding = null;
- }
- }
-
-
- //Set the encoding for this connection
- //Since the encoding could be specified or obtained from the DB we use the
- //following order:
- // 1. passed as a property
- // 2. value from DB if supported by current JVM
- // 3. default for JVM (leave encoding null)
- String passedEncoding = info.getProperty("charSet"); // could be null
-
+ String passedEncoding = info.getProperty("charSet");
if (passedEncoding != null) {
- encoding = passedEncoding;
+ if (Encoding.isAvailable(passedEncoding)) {
+ encoding = new Encoding(passedEncoding);
+ } else {
+ encoding = Encoding.defaultEncoding();
+ }
} else {
- if (dbEncoding != null) {
- //test DB encoding
- try {
- "TEST".getBytes(dbEncoding);
- //no error the encoding is supported by the current JVM
- encoding = dbEncoding;
- } catch (UnsupportedEncodingException uee) {
- //dbEncoding is not supported by the current JVM
- encoding = null;
- }
- } else {
- encoding = null;
- }
+ java.sql.ResultSet resultSet = ExecSQL("select case when
+pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else getdatabaseencoding() end");
+ if (resultSet.next()) {
+ String dbEncoding = resultSet.getString(1);
+ encoding = Encoding.forDatabaseEncoding(dbEncoding);
+ }
}
// Initialise object handling
@@ -454,23 +370,8 @@
int update_count = 1;
int insert_oid = 0;
SQLException final_error = null;
-
- // Commented out as the backend can now handle queries
- // larger than 8K. Peter June 6 2000
- //if (sql.length() > 8192)
- //throw new PSQLException("postgresql.con.toolong",sql);
-
- if (getEncoding() == null)
- buf = sql.getBytes();
- else {
- try {
- buf = sql.getBytes(getEncoding());
- } catch (UnsupportedEncodingException unse) {
- throw new PSQLException("postgresql.con.encoding",
- unse);
- }
- }
+ buf = encoding.encode(sql);
try
{
pg_stream.SendChar('Q');
@@ -491,7 +392,7 @@
{
case 'A': // Asynchronous Notify
pid = pg_stream.ReceiveInteger(4);
- msg =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
+ msg = pg_stream.ReceiveString(receive_sbuf,8192,encoding);
break;
case 'B': // Binary Data Transfer
if (fields == null)
@@ -502,7 +403,7 @@
tuples.addElement(tup);
break;
case 'C': // Command Status
- recv_status =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
+ recv_status =
+pg_stream.ReceiveString(receive_sbuf,8192,encoding);
// Now handle the update count correctly.
if(recv_status.startsWith("INSERT") ||
recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") ||
recv_status.startsWith("MOVE")) {
@@ -544,7 +445,7 @@
tuples.addElement(tup);
break;
case 'E': // Error Message
- msg =
pg_stream.ReceiveString(receive_sbuf,4096,getEncoding());
+ msg = pg_stream.ReceiveString(receive_sbuf,4096,encoding);
final_error = new SQLException(msg);
hfr = true;
break;
@@ -559,10 +460,10 @@
hfr = true;
break;
case 'N': // Error Notification
-
addWarning(pg_stream.ReceiveString(receive_sbuf,4096,getEncoding()));
+
+addWarning(pg_stream.ReceiveString(receive_sbuf,4096,encoding));
break;
case 'P': // Portal Name
- String pname =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
+ String pname =
+pg_stream.ReceiveString(receive_sbuf,8192,encoding);
break;
case 'T': // MetaData Field Description
if (fields != null)
@@ -595,7 +496,7 @@
for (i = 0 ; i < nf ; ++i)
{
- String typname =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
+ String typname = pg_stream.ReceiveString(receive_sbuf,8192,encoding);
int typid = pg_stream.ReceiveIntegerR(4);
int typlen = pg_stream.ReceiveIntegerR(2);
int typmod = pg_stream.ReceiveIntegerR(4);
@@ -665,7 +566,7 @@
* default encoding.
*/
public String getEncoding() throws SQLException {
- return encoding;
+ return encoding.name();
}
/**
Index: src/interfaces/jdbc/org/postgresql/PG_Stream.java
===================================================================
RCS file:
/home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/org/postgresql/PG_Stream.java,v
retrieving revision 1.8
diff -u -r1.8 PG_Stream.java
--- src/interfaces/jdbc/org/postgresql/PG_Stream.java 2001/07/06 18:01:22 1.8
+++ src/interfaces/jdbc/org/postgresql/PG_Stream.java 2001/07/08 15:06:30
@@ -10,7 +10,7 @@
import org.postgresql.util.*;
/**
- * @version 1.0 15-APR-1997
+ * $Id$
*
* This class is used by Connection & PGlobj for communicating with the
* backend.
@@ -211,7 +211,7 @@
public String ReceiveString(int maxsiz) throws SQLException
{
byte[] rst = bytePoolDim1.allocByte(maxsiz);
- return ReceiveString(rst, maxsiz, null);
+ return ReceiveString(rst, maxsiz, Encoding.defaultEncoding());
}
/**
@@ -225,7 +225,7 @@
* @return string from back end
* @exception SQLException if an I/O error occurs
*/
- public String ReceiveString(int maxsiz, String encoding) throws SQLException
+ public String ReceiveString(int maxsiz, Encoding encoding) throws SQLException
{
byte[] rst = bytePoolDim1.allocByte(maxsiz);
return ReceiveString(rst, maxsiz, encoding);
@@ -243,9 +243,12 @@
* @return string from back end
* @exception SQLException if an I/O error occurs
*/
- public String ReceiveString(byte rst[], int maxsiz, String encoding)
+ public String ReceiveString(byte rst[], int maxsiz, Encoding encoding)
throws SQLException
{
+ if (encoding == null)
+ encoding = Encoding.defaultEncoding();
+
int s = 0;
try
@@ -265,18 +268,8 @@
throw new PSQLException("postgresql.stream.toomuch");
} catch (IOException e) {
throw new PSQLException("postgresql.stream.ioerror",e);
- }
- String v = null;
- if (encoding == null)
- v = new String(rst, 0, s);
- else {
- try {
- v = new String(rst, 0, s, encoding);
- } catch (UnsupportedEncodingException unse) {
- throw new PSQLException("postgresql.stream.encoding", unse);
- }
}
- return v;
+ return encoding.decode(rst, 0, s);
}
/**
Index: src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java
===================================================================
RCS file:
/home/projects/pgsql/cvsroot/pgsql/src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java,v
retrieving revision 1.3
diff -u -r1.3 JDBC2Tests.java
--- src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java 2001/02/14 17:45:17
1.3
+++ src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java 2001/07/08 15:06:31
@@ -195,6 +195,7 @@
suite.addTestSuite(DriverTest.class);
suite.addTestSuite(ConnectionTest.class);
suite.addTestSuite(DatabaseMetaDataTest.class);
+ suite.addTestSuite(EncodingTest.class);
// Connectivity/Protocols
---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster