Author: cito
Date: Sat Nov 21 23:00:14 2015
New Revision: 585
Log:
Make non-ascii query results work with Python 3
Modified:
trunk/module/TEST_PyGreSQL_classic_connection.py
trunk/module/pgmodule.c
Modified: trunk/module/TEST_PyGreSQL_classic_connection.py
==============================================================================
--- trunk/module/TEST_PyGreSQL_classic_connection.py Sat Nov 21 20:13:22
2015 (r584)
+++ trunk/module/TEST_PyGreSQL_classic_connection.py Sat Nov 21 23:00:14
2015 (r585)
@@ -31,6 +31,8 @@
except NameError: # Python >= 3.0
long = int
+unicode_strings = str is not bytes
+
# We need a database to test against. If LOCAL_PyGreSQL.py exists we will
# get our information from that. Otherwise we use the defaults.
dbname = 'unittest'
@@ -399,7 +401,7 @@
def testBigGetresult(self):
num_cols = 100
num_rows = 100
- q = "select " + ','.join(map(str, xrange(num_cols)))
+ q = "select " + ','.join(map(str, range(num_cols)))
q = ' union all '.join((q,) * num_rows)
r = self.c.query(q).getresult()
result = [tuple(range(num_cols))] * num_rows
@@ -568,7 +570,7 @@
).getresult(), [(3,)])
self.assertEqual(query("select $1::integer+$2", [1, 2]
).getresult(), [(3,)])
- self.assertEqual(query("select 0+$1+$2+$3+$4+$5+$6", range(6)
+ self.assertEqual(query("select 0+$1+$2+$3+$4+$5+$6", list(range(6))
).getresult(), [(15,)])
def testQueryWithStrParams(self):
@@ -594,29 +596,37 @@
query = self.c.query
query('set client_encoding = utf8')
self.assertEqual(query("select $1||', '||$2||'!'",
- ('Hello', u'w\xf6rld')).getresult(), [('Hello, w\xc3\xb6rld!',)])
+ ('Hello', u'wörld')).getresult(), [('Hello, wörld!',)])
self.assertEqual(query("select $1||', '||$2||'!'",
- ('Hello', u'\u043c\u0438\u0440')).getresult(),
- [('Hello, \xd0\xbc\xd0\xb8\xd1\x80!',)])
+ ('Hello', u'мир')).getresult(),
+ [('Hello, мир!',)])
query('set client_encoding = latin1')
- self.assertEqual(query("select $1||', '||$2||'!'",
- ('Hello', u'w\xf6rld')).getresult(), [('Hello, w\xf6rld!',)])
+ r = query("select $1||', '||$2||'!'", ('Hello', u'wörld')).getresult()
+ if unicode_strings:
+ self.assertEqual(r, [('Hello, wörld!',)])
+ else:
+ self.assertEqual(r, [(u'Hello, wörld!'.encode('latin1'),)])
self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
- ('Hello', u'\u043c\u0438\u0440'))
+ ('Hello', u'мир'))
query('set client_encoding = iso_8859_1')
- self.assertEqual(query("select $1||', '||$2||'!'",
- ('Hello', u'w\xf6rld')).getresult(), [('Hello, w\xf6rld!',)])
+ r = query("select $1||', '||$2||'!'", ('Hello', u'wörld')).getresult()
+ if unicode_strings:
+ self.assertEqual(r, [('Hello, wörld!',)])
+ else:
+ self.assertEqual(r, [(u'Hello, wörld!'.encode('latin1'),)])
self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
- ('Hello', u'\u043c\u0438\u0440'))
+ ('Hello', u'мир'))
query('set client_encoding = iso_8859_5')
self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
- ('Hello', u'w\xf6rld'))
- self.assertEqual(query("select $1||', '||$2||'!'",
- ('Hello', u'\u043c\u0438\u0440')).getresult(),
- [('Hello, \xdc\xd8\xe0!',)])
+ ('Hello', u'wörld'))
+ r = query("select $1||', '||$2||'!'", ('Hello', u'мир')).getresult()
+ if unicode_strings:
+ self.assertEqual(r, [('Hello, мир!',)])
+ else:
+ self.assertEqual(r, [(u'Hello, мир!'.encode('cyrillic'),)])
query('set client_encoding = sql_ascii')
self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
- ('Hello', u'w\xf6rld'))
+ ('Hello', u'wörld'))
def testQueryWithMixedParams(self):
self.assertEqual(self.c.query("select $1+2,$2||', world!'",
@@ -642,7 +652,11 @@
def testUnicodeQuery(self):
query = self.c.query
self.assertEqual(query(u"select 1+1").getresult(), [(2,)])
- self.assertRaises(TypeError, query, u"select 'Hello, w\xf6rld!'")
+ if unicode_strings:
+ self.assertEqual(query("select 'Hello, wörld!'").getresult(),
+ [('Hello, wörld!',)])
+ else:
+ self.assertRaises(TypeError, query, u"select 'Hello, wörld!'")
class TestInserttable(unittest.TestCase):
Modified: trunk/module/pgmodule.c
==============================================================================
--- trunk/module/pgmodule.c Sat Nov 21 20:13:22 2015 (r584)
+++ trunk/module/pgmodule.c Sat Nov 21 23:00:14 2015 (r585)
@@ -110,6 +110,10 @@
*namedresult = NULL; /* function for getting
named results */
static char *decimal_point = "."; /* decimal point used in money values */
+static int pg_encoding_utf8 = 0;
+static int pg_encoding_latin1 = 0;
+static int pg_encoding_ascii = 0;
+
/*
OBJECTS
=======
@@ -180,9 +184,7 @@
{
PyObject_HEAD
PGresult *result; /* result content */
- int result_type; /* type of previous result */
- long current_pos; /* current position in last result */
- long num_rows; /* number of (affected) rows */
+ int encoding; /* client encoding */
} queryObject;
#define is_queryObject(v) (PyType(v) == &queryType)
@@ -1077,7 +1079,9 @@
PyObject *oargs = NULL;
PGresult *result;
queryObject *npgobj;
- int status,
+ const char* encoding_name=NULL;
+ int encoding,
+ status,
nparms = 0;
if (!self->cnx)
@@ -1108,12 +1112,18 @@
nparms = (int)PySequence_Size(oargs);
}
+ encoding = PQclientEncoding(self->cnx);
+ if (encoding != pg_encoding_utf8 && encoding != pg_encoding_latin1
+ && encoding != pg_encoding_ascii)
+ /* should be translated to Python here */
+ encoding_name = pg_encoding_to_char(encoding);
+
/* gets result */
if (nparms)
{
/* prepare arguments */
PyObject **str, **s, *obj = PySequence_GetItem(oargs, 0);
- char **parms, **p, *enc=NULL;
+ char **parms, **p;
int *lparms, *l;
register int i;
@@ -1149,17 +1159,15 @@
}
else if (PyUnicode_Check(obj))
{
- if (!enc)
- enc = (char *)pg_encoding_to_char(
- PQclientEncoding(self->cnx));
- if (!strcmp(enc, "UTF8"))
+ if (encoding == pg_encoding_utf8)
*s = PyUnicode_AsUTF8String(obj);
- else if (!strcmp(enc, "LATIN1"))
+ else if (encoding == pg_encoding_latin1)
*s = PyUnicode_AsLatin1String(obj);
- else if (!strcmp(enc, "SQL_ASCII"))
+ else if (encoding == pg_encoding_ascii)
*s = PyUnicode_AsASCIIString(obj);
else
- *s = PyUnicode_AsEncodedString(obj,
enc, "strict");
+ *s = PyUnicode_AsEncodedString(obj,
+ encoding_name, "strict");
if (*s == NULL)
{
free(lparms); free(parms); free(str);
@@ -1284,6 +1292,7 @@
/* stores result and returns object */
npgobj->result = result;
+ npgobj->encoding = encoding;
return (PyObject *) npgobj;
}
@@ -1419,7 +1428,8 @@
char *table,
*buffer,
*bufpt;
- char *enc=NULL;
+ const char *encoding_name=NULL;
+ int encoding;
size_t bufsiz;
PyObject *list,
*sublist,
@@ -1486,6 +1496,12 @@
return NULL;
}
+ encoding = PQclientEncoding(self->cnx);
+ if (encoding != pg_encoding_utf8 && encoding != pg_encoding_latin1
+ && encoding != pg_encoding_ascii)
+ /* should be translated to Python here */
+ encoding_name = pg_encoding_to_char(encoding);
+
PQclear(result);
n = 0; /* not strictly necessary but avoids warning */
@@ -1565,17 +1581,15 @@
else if (PyUnicode_Check(item))
{
PyObject *s;
- if (!enc)
- enc = (char *)pg_encoding_to_char(
- PQclientEncoding(self->cnx));
- if (!strcmp(enc, "UTF8"))
+ if (encoding == pg_encoding_utf8)
s = PyUnicode_AsUTF8String(item);
- else if (!strcmp(enc, "LATIN1"))
+ else if (encoding == pg_encoding_latin1)
s = PyUnicode_AsLatin1String(item);
- else if (!strcmp(enc, "SQL_ASCII"))
+ else if (encoding == pg_encoding_ascii)
s = PyUnicode_AsASCIIString(item);
else
- s = PyUnicode_AsEncodedString(item,
enc, "strict");
+ s = PyUnicode_AsEncodedString(item,
+ encoding_name, "strict");
const char* t = PyBytes_AsString(s);
while (*t && bufsiz)
{
@@ -3205,6 +3219,10 @@
m,
n,
*typ;
+#if IS_PY3
+ int encoding;
+ const char *encoding_name=NULL;
+#endif
/* checks args (args == NULL for an internal call) */
if (args && !PyArg_ParseTuple(args, ""))
@@ -3214,6 +3232,14 @@
return NULL;
}
+#if IS_PY3
+ encoding = self->encoding;
+ if (encoding != pg_encoding_utf8 && encoding != pg_encoding_latin1
+ && encoding != pg_encoding_ascii)
+ /* should be translated to Python here */
+ encoding_name = pg_encoding_to_char(encoding);
+#endif
+
/* stores result in tuple */
m = PQntuples(self->result);
n = PQnfields(self->result);
@@ -3254,7 +3280,7 @@
break;
case 3: /* float/double */
- tmp_obj = PyBytes_FromString(s);
+ tmp_obj = PyStr_FromString(s);
#if IS_PY3
val =
PyFloat_FromString(tmp_obj);
#else
@@ -3287,7 +3313,7 @@
}
else
{
- tmp_obj =
PyBytes_FromString(s);
+ tmp_obj =
PyStr_FromString(s);
#if IS_PY3
val =
PyFloat_FromString(tmp_obj);
#else
@@ -3298,7 +3324,21 @@
break;
default:
- val = PyStr_FromString(s);
+#if IS_PY3
+ if (encoding ==
pg_encoding_utf8)
+ val =
PyUnicode_DecodeUTF8(s, strlen(s), "strict");
+ else if (encoding ==
pg_encoding_latin1)
+ val =
PyUnicode_DecodeLatin1(s, strlen(s), "strict");
+ else if (encoding ==
pg_encoding_ascii)
+ val =
PyUnicode_DecodeASCII(s, strlen(s), "strict");
+ else
+ val =
PyUnicode_Decode(s, strlen(s),
+ encoding_name,
"strict");
+ if (!val)
+ val =
PyBytes_FromString(s);
+#else
+ val = PyBytes_FromString(s);
+#endif
break;
}
@@ -4359,6 +4399,12 @@
pg_default_passwd = Py_None;
#endif /* DEFAULT_VARS */
+ /* store common pg encoding ids */
+
+ pg_encoding_utf8 = pg_char_to_encoding("UTF8");
+ pg_encoding_latin1 = pg_char_to_encoding("LATIN1");
+ pg_encoding_ascii = pg_char_to_encoding("SQL_ASCII");
+
/* Check for errors */
if (PyErr_Occurred())
return NULL;
_______________________________________________
PyGreSQL mailing list
[email protected]
https://mail.vex.net/mailman/listinfo.cgi/pygresql