Committed by Greg Sabino Mullane <[email protected]>
Another way of handling the UTF8 mess, per discussions on The Channel.
---
Pg.pm | 19 +++++----
dbdimp.c | 134 +++++++++++++++++++++++++++++++++++++-------------------------
dbdimp.h | 3 +-
3 files changed, 92 insertions(+), 64 deletions(-)
diff --git a/Pg.pm b/Pg.pm
index 989a245..9459f63 100644
--- a/Pg.pm
+++ b/Pg.pm
@@ -1625,7 +1625,7 @@ use 5.006001;
pg_bool_tf => undef,
pg_db => undef,
pg_default_port => undef,
- pg_unicode => undef,
+ pg_utf8_flag => undef,
pg_enable_utf8 => undef,
pg_errorlevel => undef,
pg_expand_array => undef,
@@ -3122,19 +3122,20 @@ DBD::Pg specific attribute. Defaults to false. When
true, question marks inside
are not treated as L<placeholders|/Placeholders>. Useful for statements that
contain unquoted question
marks, such as geometric operators.
-=head3 B<pg_unicode> (boolean)
+=head3 B<pg_utf8_flag> (boolean)
DBD::Pg specific attribute. In normal use, this should not be needed, as it
will be set
-automatically according to the server encoding. SQL_ASCII will set this to
false, while
-everything else will set it to true. If you force it off, then everything will
be returned
-as byte soup, even data from UTF-8 databases, which is very likely not what
you want. If
-you force it on for SQL_ASCII databases, the results will be unpredictable. It
is recommended
-that you only use this attribute as a last resort and with a full
understanding of what
-it does.
+automatically according to the client encoding. If the client_encoding is
'UTF8', this
+attribute will be turned on, which will cause strings coming back from the
database to
+be marked with Perl's internal utf8 flag. If you set this flag, then no
checking of
+client_encoding will ever be done. Do not use this flag unless you really know
what
+you are doing, and understand how utf8 differs from UTF8. Setting to 1 will
always
+cause the flag to be set. Setting to 0 will prevent the flag from ever being
set.
+Setting to -1 will switch to the default behavior of checking the
client_encoding.
=head3 B<pg_enable_utf8> (boolean)
-Deprecated, please use pg_unicode instead.
+Deprecated.
=head3 B<pg_errorlevel> (integer)
diff --git a/dbdimp.c b/dbdimp.c
index 4a151b7..eb54f11 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -78,6 +78,7 @@ typedef enum
static void pg_error(pTHX_ SV *h, int error_num, const char *error_msg);
static void pg_warn (void * arg, const char * message);
+static void check_client_encoding(pTHX_ imp_dbh_t *imp_dbh);
static ExecStatusType _result(pTHX_ imp_dbh_t *imp_dbh, const char *sql);
static ExecStatusType _sqlstate(pTHX_ imp_dbh_t *imp_dbh, PGresult *result);
static int pg_db_rollback_commit (pTHX_ SV *dbh, imp_dbh_t *imp_dbh, int
action);
@@ -108,9 +109,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
bool inquote = DBDPG_FALSE;
STRLEN connect_string_size;
ConnStatusType connstatus;
- int unicode;
- const char * server_encoding;
- const char * client_encoding;
if (TSTART) TRC(DBILOGFP, "%sBegin dbd_db_login\n", THEADER);
@@ -213,33 +211,22 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
TRACE_PQPROTOCOLVERSION;
imp_dbh->pg_protocol = PQprotocolVersion(imp_dbh->conn);
- /* Check the value of the pg_unicode attribute. Default to not set (-1)
*/
- unicode = -1;
- DBD_ATTRIB_GET_IV(attr, "pg_unicode", 10, svp, unicode);
-
- /*
- We need to see if we are treating things with utf8 respect, or as
byte soup
- The rules are:
- - An explicit pg_unicode setting trumps everything else
- - A server_encoding of SQL_ASCII is always byte soup
- - If the client_encoding matches the server_encoding, set unicode on
- - Otherwise, we leave things alone
- */
- client_encoding = PQparameterStatus(imp_dbh->conn, "client_encoding");
+ /* Check the value of the pg_utf8_flag attribute */
+ imp_dbh->pg_utf8_flag = -1;
+ DBD_ATTRIB_GET_IV(attr, "pg_utf8_flag", 12, svp, imp_dbh->pg_utf8_flag);
+ if (imp_dbh->pg_utf8_flag == -1) { /* Has not been explicitly set by
the user */
+ /*
+ Check the client_encoding. If UTF-8, set the flag on, else off
+ */
+ imp_dbh->utf8_flag = (0 ==
strncmp(PQparameterStatus(imp_dbh->conn, "client_encoding"), "UTF8", 4))
+ ? 0 : 1;
+ }
+ else {
+ /* We allow -1 and 0 direct, and force everything else to 1 */
+ if (imp_dbh->pg_utf8_flag < -1 || imp_dbh->pg_utf8_flag > 1)
+ imp_dbh->pg_utf8_flag = imp_dbh->pg_utf8_flag ? 1 : 0;
- if (unicode > 1) { /* Force it on, no matter what */
- imp_dbh->unicode = DBDPG_TRUE;
- }
- else {
- if (unicode == 0) { /* Force it off, no matter what */
- imp_dbh->unicode = DBDPG_FALSE;
- }
- else { /* Neither is set, so check the encodings */
- server_encoding = PQparameterStatus(imp_dbh->conn,
"server_encoding");
- /* If they match, set unicode to true, otherwise, false
*/
- imp_dbh->unicode = (0==strcmp(server_encoding,
client_encoding))
- ? DBDPG_TRUE : DBDPG_FALSE;
- }
+ imp_dbh->utf8_flag = imp_dbh->pg_utf8_flag;
}
/* Figure out this particular backend's version */
@@ -286,12 +273,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
/* Tell DBI that we should call disconnect when the handle dies */
DBIc_ACTIVE_on(imp_dbh);
- /* If needed, set the client_encoding to UTF-8 */
- if (imp_dbh->unicode &&
- (0 != strncmp(client_encoding, "UTF-8", 5))) {
- PQexec(imp_dbh->conn, "SET client_encoding = 'UTF-8'");
- }
-
if (TEND) TRC(DBILOGFP, "%sEnd dbd_db_login\n", THEADER);
return 1;
@@ -323,7 +304,7 @@ static void pg_error (pTHX_ SV * h, int error_num, const
char * error_msg)
sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
/* Set as utf-8 */
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(DBIc_ERRSTR(imp_xxh));
if (TEND) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER);
@@ -387,7 +368,7 @@ static ExecStatusType _result(pTHX_ imp_dbh_t * imp_dbh,
const char * sql)
if (TSQL) TRC(DBILOGFP, "%s;\n\n", sql);
/* Upgrade to a true UTF-8 string in place as needed */
- if (imp_dbh->unicode) {
+ if (imp_dbh->utf8_flag) {
// upgrade_utf8 magic on 'sql'
}
@@ -396,6 +377,8 @@ static ExecStatusType _result(pTHX_ imp_dbh_t * imp_dbh,
const char * sql)
status = _sqlstate(aTHX_ imp_dbh, result);
+ check_client_encoding(aTHX_ imp_dbh);
+
TRACE_PQCLEAR;
PQclear(result);
@@ -749,7 +732,7 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV
* keysv)
}
break;
- case 10: /* AutoCommit pg_bool_tf pg_pid_number pg_options
pg_unicode */
+ case 10: /* AutoCommit pg_bool_tf pg_pid_number pg_options */
if (strEQ("AutoCommit", key))
retsv = boolSV(DBIc_has(imp_dbh, DBIcf_AutoCommit));
@@ -761,8 +744,6 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV
* keysv)
TRACE_PQOPTIONS;
retsv = newSVpv(PQoptions(imp_dbh->conn),0);
}
- else if (strEQ("pg_unicode", key))
- retsv = newSViv((IV)imp_dbh->unicode);
break;
case 11: /* pg_INV_READ pg_protocol */
@@ -773,10 +754,12 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv)
retsv = newSViv((IV)imp_dbh->pg_protocol);
break;
- case 12: /* pg_INV_WRITE */
+ case 12: /* pg_INV_WRITE pg_utf8_flag */
if (strEQ("pg_INV_WRITE", key))
retsv = newSViv((IV) INV_WRITE );
+ else if (strEQ("pg_utf8_flag", key))
+ retsv = newSViv((IV)imp_dbh->utf8_flag);
break;
case 13: /* pg_errorlevel */
@@ -870,7 +853,7 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV
* keysv, SV * valuesv
}
break;
- case 10: /* AutoCommit pg_bool_tf pg_unicode*/
+ case 10: /* AutoCommit pg_bool_tf */
if (strEQ("AutoCommit", key)) {
if (newval != DBIc_has(imp_dbh, DBIcf_AutoCommit)) {
@@ -885,16 +868,28 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv, SV * valuesv
else if (strEQ("pg_bool_tf", key)) {
imp_dbh->pg_bool_tf = newval!=0 ? DBDPG_TRUE :
DBDPG_FALSE;
+ /* Only a few valid values */
+ if (imp_dbh->pg_utf8_flag == -1) {
+ /* Do nothing: same as if it is not set */
+ }
+ else if (imp_dbh->pg_utf8_flag == 0) {
+ imp_dbh->utf8_flag = 0;
+ }
+ else { /* Everything else is 'true' */
+ imp_dbh->utf8_flag = 1;
+ imp_dbh->pg_utf8_flag = 1;
+ }
retval = 1;
}
- else if (strEQ("pg_unicode", key)) {
- imp_dbh->unicode = newval!=0 ? DBDPG_TRUE : DBDPG_FALSE;
- retval = 1;
- }
+ break;
+ case 12: /* pg_utf8_flag */
- break;
+ if (strEQ("pg_utf8_flag", key)) {
+ imp_dbh->pg_utf8_flag = (unsigned)SvIV(valuesv);
+ retval = 1;
+ }
case 13: /* pg_errorlevel */
@@ -1139,7 +1134,7 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t * imp_sth,
SV * keysv)
TRACE_PQFNAME;
fieldname = PQfname(imp_sth->result, fields);
sv_fieldname = newSVpv(fieldname,0);
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(sv_fieldname);
(void)av_store(av, fields, sv_fieldname);
}
@@ -2713,7 +2708,7 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t
*imp_dbh, unsigned char * input
av_push(currentav, newSViv('t' ==
*string ? 1 : 0));
else {
SV *sv = newSVpvn(string, section_size);
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(sv);
av_push(currentav, sv);
}
@@ -2842,14 +2837,17 @@ int pg_quickexec (SV * dbh, const char * sql, const int
asyncflag)
if (TSQL) TRC(DBILOGFP, "%s;\n\n", sql);
/* Upgrade to a true UTF-8 string in place as needed */
- if (imp_dbh->unicode) {
+ if (imp_dbh->utf8_flag) {
// upgrade_utf8 magic on 'sql'
}
TRACE_PQEXEC;
result = PQexec(imp_dbh->conn, sql);
+
status = _sqlstate(aTHX_ imp_dbh, result);
+ check_client_encoding(aTHX_ imp_dbh);
+
imp_dbh->copystate = 0; /* Assume not in copy mode until told otherwise
*/
if (TRACE4) TRC(DBILOGFP, "%sGot a status of %d\n", THEADER, status);
@@ -3313,6 +3311,8 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
status = _sqlstate(aTHX_ imp_dbh, imp_sth->result);
+ check_client_encoding(aTHX_ imp_dbh);
+
imp_dbh->copystate = 0; /* Assume not in copy mode until told otherwise
*/
if (PGRES_TUPLES_OK == status) {
TRACE_PQNFIELDS;
@@ -3396,6 +3396,31 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
} /* end of dbd_st_execute */
+static void check_client_encoding(pTHX_ imp_dbh_t * imp_dbh)
+{
+
+ /* See if the client_encoding has changed */
+ if (imp_dbh->pg_utf8_flag == -1) { /* Only check if they have not set
it themselves */
+ if (imp_dbh->utf8_flag) {
+ if (0 != strncmp(PQparameterStatus(imp_dbh->conn,
"client_encoding"), "UTF8", 4)) {
+ imp_dbh->utf8_flag = 0;
+ if (TRACE4)
+ TRC(DBILOGFP, "%sclient_encoding change
caused utf8 flag to change from on to off\n",
+ THEADER);
+ }
+ }
+ else {
+ if (0 == strncmp(PQparameterStatus(imp_dbh->conn,
"client_encoding"), "UTF8", 4)) {
+ imp_dbh->utf8_flag = 1;
+ if (TRACE4)
+ TRC(DBILOGFP, "%sclient_encoding change
caused utf8 flag to change from off to on\n",
+ THEADER);
+ }
+ }
+ }
+}
+
+
/* ================================================================== */
AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
{
@@ -3495,7 +3520,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
break;
default:
sv_setpvn(sv, (char *)value,
value_len);
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(sv);
}
}
@@ -3503,7 +3528,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
value_len = strlen((char *)value);
sv_setpvn(sv, (char *)value, value_len);
/* Check for specific types here? */
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(sv);
}
@@ -3533,7 +3558,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
*/
const char * const s = SvPV(AvARRAY(av)[i],len);
sv_setpvn(currph->inout, s, len);
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(currph->inout);
}
}
@@ -3879,7 +3904,7 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int async)
if (copystatus > 0) {
sv_setpv(dataline, tempbuf);
- if (imp_dbh->unicode)
+ if (imp_dbh->utf8_flag)
SvUTF8_on(dataline);
TRACE_PQFREEMEM;
PQfreemem(tempbuf);
@@ -4688,6 +4713,7 @@ int pg_db_result (SV *h, imp_dbh_t *imp_dbh)
while ((result = PQgetResult(imp_dbh->conn)) != NULL) {
/* TODO: Better multiple result-set handling */
status = _sqlstate(aTHX_ imp_dbh, result);
+ check_client_encoding(aTHX_ imp_dbh);
switch (status) {
case PGRES_TUPLES_OK:
TRACE_PQNTUPLES;
diff --git a/dbdimp.h b/dbdimp.h
index b30ceaf..a5176d2 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -24,14 +24,15 @@ struct imp_dbh_st {
int pg_errorlevel; /* PQsetErrorVerbosity. Set by user,
defaults to 1 */
int server_prepare; /* do we want to use PQexecPrepared? 0=no
1=yes 2=smart. Can be changed by user */
int async_status; /* 0=no async 1=async started -1=async has
been cancelled */
+ int pg_utf8_flag; /* what the user has set pg_utf8_flag to. -1
means not set */
imp_sth_t *async_sth; /* current async statement handle */
AV *savepoints; /* list of savepoints */
PGconn *conn; /* connection structure */
char *sqlstate; /* from the last result */
+ bool utf8_flag; /* are we setting the internal Perl utf8 flag
on for incoming data? */
bool pg_bool_tf; /* do bools return 't'/'f'? Set by user,
default is 0 */
- bool unicode; /* do we force client_encoding to UTF-8 and set
the Perl utf8 string on returned data? */
bool pg_enable_utf8; /* (DEPRECATED) should we attempt to make
utf8 strings? Set by user, default is 0 */
bool prepare_now; /* force immediate prepares, even with
placeholders. Set by user, default is 0 */
bool done_begin; /* have we done a begin? (e.g. are we in a
transaction?) */
--
1.7.0.5