Committed by Greg Sabino Mullane <[email protected]>

Another way of handling the UTF8 mess, per discussions on The Channel.

---
 Pg.pm    |   19 +++++----
 dbdimp.c |  134 +++++++++++++++++++++++++++++++++++++-------------------------
 dbdimp.h |    3 +-
 3 files changed, 92 insertions(+), 64 deletions(-)

diff --git a/Pg.pm b/Pg.pm
index 989a245..9459f63 100644
--- a/Pg.pm
+++ b/Pg.pm
@@ -1625,7 +1625,7 @@ use 5.006001;
                                pg_bool_tf                     => undef,
                                pg_db                          => undef,
                                pg_default_port                => undef,
-                               pg_unicode                     => undef,
+                               pg_utf8_flag                   => undef,
                                pg_enable_utf8                 => undef,
                                pg_errorlevel                  => undef,
                                pg_expand_array                => undef,
@@ -3122,19 +3122,20 @@ DBD::Pg specific attribute. Defaults to false. When 
true, question marks inside
 are not treated as L<placeholders|/Placeholders>. Useful for statements that 
contain unquoted question 
 marks, such as geometric operators.
 
-=head3 B<pg_unicode> (boolean)
+=head3 B<pg_utf8_flag> (boolean)
 
 DBD::Pg specific attribute. In normal use, this should not be needed, as it 
will be set 
-automatically according to the server encoding. SQL_ASCII will set this to 
false, while 
-everything else will set it to true. If you force it off, then everything will 
be returned 
-as byte soup, even data from UTF-8 databases, which is very likely not what 
you want. If 
-you force it on for SQL_ASCII databases, the results will be unpredictable. It 
is recommended 
-that you only use this attribute as a last resort and with a full 
understanding of what 
-it does.
+automatically according to the client encoding. If the client_encoding is 
'UTF8', this 
+attribute will be turned on, which will cause strings coming back from the 
database to 
+be marked with Perl's internal utf8 flag. If you set this flag, then no 
checking of 
+client_encoding will ever be done. Do not use this flag unless you really know 
what 
+you are doing, and understand how utf8 differs from UTF8. Setting to 1 will 
always 
+cause the flag to be set. Setting to 0 will prevent the flag from ever being 
set. 
+Setting to -1 will switch to the default behavior of checking the 
client_encoding.
 
 =head3 B<pg_enable_utf8> (boolean)
 
-Deprecated, please use pg_unicode instead.
+Deprecated.
 
 =head3 B<pg_errorlevel> (integer)
 
diff --git a/dbdimp.c b/dbdimp.c
index 4a151b7..eb54f11 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -78,6 +78,7 @@ typedef enum
 
 static void pg_error(pTHX_ SV *h, int error_num, const char *error_msg);
 static void pg_warn (void * arg, const char * message);
+static void check_client_encoding(pTHX_ imp_dbh_t *imp_dbh);
 static ExecStatusType _result(pTHX_ imp_dbh_t *imp_dbh, const char *sql);
 static ExecStatusType _sqlstate(pTHX_ imp_dbh_t *imp_dbh, PGresult *result);
 static int pg_db_rollback_commit (pTHX_ SV *dbh, imp_dbh_t *imp_dbh, int 
action);
@@ -108,9 +109,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        bool           inquote = DBDPG_FALSE;
        STRLEN         connect_string_size;
        ConnStatusType connstatus;
-       int            unicode;
-       const char *   server_encoding;
-       const char *   client_encoding;
 
        if (TSTART) TRC(DBILOGFP, "%sBegin dbd_db_login\n", THEADER);
   
@@ -213,33 +211,22 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        TRACE_PQPROTOCOLVERSION;
        imp_dbh->pg_protocol = PQprotocolVersion(imp_dbh->conn);
 
-       /* Check the value of the pg_unicode attribute. Default to not set (-1) 
*/
-       unicode = -1;
-       DBD_ATTRIB_GET_IV(attr, "pg_unicode", 10, svp, unicode);
-
-       /*
-         We need to see if we are treating things with utf8 respect, or as 
byte soup
-         The rules are:
-         - An explicit pg_unicode setting trumps everything else
-         - A server_encoding of SQL_ASCII is always byte soup
-      - If the client_encoding matches the server_encoding, set unicode on
-         - Otherwise, we leave things alone
-       */
-       client_encoding = PQparameterStatus(imp_dbh->conn, "client_encoding");
+       /* Check the value of the pg_utf8_flag attribute */
+       imp_dbh->pg_utf8_flag = -1;
+       DBD_ATTRIB_GET_IV(attr, "pg_utf8_flag", 12, svp, imp_dbh->pg_utf8_flag);
+       if (imp_dbh->pg_utf8_flag == -1) { /* Has not been explicitly set by 
the user */
+               /*
+                 Check the client_encoding. If UTF-8, set the flag on, else off
+               */
+               imp_dbh->utf8_flag = (0 == 
strncmp(PQparameterStatus(imp_dbh->conn, "client_encoding"), "UTF8", 4))
+                       ? 0 : 1;
+       }
+       else {
+               /* We allow -1 and 0 direct, and force everything else to 1 */
+               if (imp_dbh->pg_utf8_flag < -1 || imp_dbh->pg_utf8_flag > 1)
+                       imp_dbh->pg_utf8_flag = imp_dbh->pg_utf8_flag ? 1 : 0;
 
-       if (unicode > 1) { /* Force it on, no matter what */
-         imp_dbh->unicode = DBDPG_TRUE;
-    }
-    else {
-               if (unicode == 0) { /* Force it off, no matter what */
-                       imp_dbh->unicode = DBDPG_FALSE;
-               }
-               else { /* Neither is set, so check the encodings */
-                       server_encoding = PQparameterStatus(imp_dbh->conn, 
"server_encoding");
-                       /* If they match, set unicode to true, otherwise, false 
*/
-                       imp_dbh->unicode = (0==strcmp(server_encoding, 
client_encoding))
-                                          ? DBDPG_TRUE : DBDPG_FALSE;
-               }
+               imp_dbh->utf8_flag = imp_dbh->pg_utf8_flag;
        }
 
        /* Figure out this particular backend's version */
@@ -286,12 +273,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        /* Tell DBI that we should call disconnect when the handle dies */
        DBIc_ACTIVE_on(imp_dbh);
 
-       /* If needed, set the client_encoding to UTF-8 */
-       if (imp_dbh->unicode &&
-               (0 != strncmp(client_encoding, "UTF-8", 5))) {
-               PQexec(imp_dbh->conn, "SET client_encoding = 'UTF-8'");
-       }
-
        if (TEND) TRC(DBILOGFP, "%sEnd dbd_db_login\n", THEADER);
 
        return 1;
@@ -323,7 +304,7 @@ static void pg_error (pTHX_ SV * h, int error_num, const 
char * error_msg)
        sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
 
        /* Set as utf-8 */
-       if (imp_dbh->unicode)
+       if (imp_dbh->utf8_flag)
                SvUTF8_on(DBIc_ERRSTR(imp_xxh));
 
        if (TEND) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER);
@@ -387,7 +368,7 @@ static ExecStatusType _result(pTHX_ imp_dbh_t * imp_dbh, 
const char * sql)
        if (TSQL) TRC(DBILOGFP, "%s;\n\n", sql);
 
        /* Upgrade to a true UTF-8 string in place as needed */
-       if (imp_dbh->unicode) {
+       if (imp_dbh->utf8_flag) {
                // upgrade_utf8 magic on 'sql'
        }
 
@@ -396,6 +377,8 @@ static ExecStatusType _result(pTHX_ imp_dbh_t * imp_dbh, 
const char * sql)
 
        status = _sqlstate(aTHX_ imp_dbh, result);
 
+       check_client_encoding(aTHX_ imp_dbh);
+
        TRACE_PQCLEAR;
        PQclear(result);
 
@@ -749,7 +732,7 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                }
                break;
 
-       case 10: /* AutoCommit  pg_bool_tf  pg_pid_number  pg_options  
pg_unicode */
+       case 10: /* AutoCommit  pg_bool_tf  pg_pid_number  pg_options  */
 
                if (strEQ("AutoCommit", key))
                        retsv = boolSV(DBIc_has(imp_dbh, DBIcf_AutoCommit));
@@ -761,8 +744,6 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                        TRACE_PQOPTIONS;
                        retsv = newSVpv(PQoptions(imp_dbh->conn),0);
                }
-               else if (strEQ("pg_unicode", key))
-                       retsv = newSViv((IV)imp_dbh->unicode);
                break;
 
        case 11: /* pg_INV_READ  pg_protocol */
@@ -773,10 +754,12 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv)
                        retsv = newSViv((IV)imp_dbh->pg_protocol);
                break;
 
-       case 12: /* pg_INV_WRITE */
+       case 12: /* pg_INV_WRITE pg_utf8_flag */
 
                if (strEQ("pg_INV_WRITE", key))
                        retsv = newSViv((IV) INV_WRITE );
+               else if (strEQ("pg_utf8_flag", key))
+                       retsv = newSViv((IV)imp_dbh->utf8_flag);
                break;
 
        case 13: /* pg_errorlevel */
@@ -870,7 +853,7 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv, SV * valuesv
                }
                break;
 
-       case 10: /* AutoCommit  pg_bool_tf  pg_unicode*/
+       case 10: /* AutoCommit  pg_bool_tf */
 
                if (strEQ("AutoCommit", key)) {
                        if (newval != DBIc_has(imp_dbh, DBIcf_AutoCommit)) {
@@ -885,16 +868,28 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv, SV * valuesv
 
                else if (strEQ("pg_bool_tf", key)) {
                        imp_dbh->pg_bool_tf = newval!=0 ? DBDPG_TRUE : 
DBDPG_FALSE;
+                       /* Only a few valid values */
+                       if (imp_dbh->pg_utf8_flag == -1) {
+                               /* Do nothing: same as if it is not set */
+                       }
+                       else if (imp_dbh->pg_utf8_flag == 0) {
+                               imp_dbh->utf8_flag = 0;
+                       }
+                       else { /* Everything else is 'true' */
+                               imp_dbh->utf8_flag = 1;
+                               imp_dbh->pg_utf8_flag = 1;
+                       }
                        retval = 1;
                }
 
-               else if (strEQ("pg_unicode", key)) {
-                       imp_dbh->unicode = newval!=0 ? DBDPG_TRUE : DBDPG_FALSE;
-                       retval = 1;
-               }
+               break;
 
+       case 12: /* pg_utf8_flag */
 
-               break;
+               if (strEQ("pg_utf8_flag", key)) {
+                       imp_dbh->pg_utf8_flag = (unsigned)SvIV(valuesv);
+                       retval = 1;
+               }
 
        case 13: /* pg_errorlevel */
 
@@ -1139,7 +1134,7 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t * imp_sth, 
SV * keysv)
                                TRACE_PQFNAME;
                                fieldname = PQfname(imp_sth->result, fields);
                                sv_fieldname = newSVpv(fieldname,0);
-                               if (imp_dbh->unicode)
+                               if (imp_dbh->utf8_flag)
                                        SvUTF8_on(sv_fieldname);
                                (void)av_store(av, fields, sv_fieldname);
                        }
@@ -2713,7 +2708,7 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t 
*imp_dbh, unsigned char * input
                                        av_push(currentav, newSViv('t' == 
*string ? 1 : 0));
                                else {
                                        SV *sv = newSVpvn(string, section_size);
-                                       if (imp_dbh->unicode)
+                                       if (imp_dbh->utf8_flag)
                                                SvUTF8_on(sv);
                                        av_push(currentav, sv);
                                }
@@ -2842,14 +2837,17 @@ int pg_quickexec (SV * dbh, const char * sql, const int 
asyncflag)
        if (TSQL) TRC(DBILOGFP, "%s;\n\n", sql);
 
        /* Upgrade to a true UTF-8 string in place as needed */
-       if (imp_dbh->unicode) {
+       if (imp_dbh->utf8_flag) {
                // upgrade_utf8 magic on 'sql'
        }
 
        TRACE_PQEXEC;
        result = PQexec(imp_dbh->conn, sql);
+
        status = _sqlstate(aTHX_ imp_dbh, result);
 
+       check_client_encoding(aTHX_ imp_dbh);
+
        imp_dbh->copystate = 0; /* Assume not in copy mode until told otherwise 
*/
 
        if (TRACE4) TRC(DBILOGFP, "%sGot a status of %d\n", THEADER, status);
@@ -3313,6 +3311,8 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
 
        status = _sqlstate(aTHX_ imp_dbh, imp_sth->result);
 
+       check_client_encoding(aTHX_ imp_dbh);
+
        imp_dbh->copystate = 0; /* Assume not in copy mode until told otherwise 
*/
        if (PGRES_TUPLES_OK == status) {
                TRACE_PQNFIELDS;
@@ -3396,6 +3396,31 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
 } /* end of dbd_st_execute */
 
 
+static void check_client_encoding(pTHX_ imp_dbh_t * imp_dbh)
+{
+
+       /* See if the client_encoding has changed */
+       if (imp_dbh->pg_utf8_flag == -1) { /* Only check if they have not set 
it themselves */
+               if (imp_dbh->utf8_flag) {
+                       if (0 != strncmp(PQparameterStatus(imp_dbh->conn, 
"client_encoding"), "UTF8", 4)) {
+                               imp_dbh->utf8_flag = 0;
+                               if (TRACE4)
+                                       TRC(DBILOGFP, "%sclient_encoding change 
caused utf8 flag to change from on to off\n",
+                                               THEADER);
+                       }
+               }
+               else {
+                       if (0 == strncmp(PQparameterStatus(imp_dbh->conn, 
"client_encoding"), "UTF8", 4)) {
+                               imp_dbh->utf8_flag = 1;
+                               if (TRACE4)
+                                       TRC(DBILOGFP, "%sclient_encoding change 
caused utf8 flag to change from off to on\n",
+                                               THEADER);
+                       }
+               }
+       }
+}
+
+
 /* ================================================================== */
 AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
 {
@@ -3495,7 +3520,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                                break;
                                        default:
                                                sv_setpvn(sv, (char *)value, 
value_len);
-                                               if (imp_dbh->unicode)
+                                               if (imp_dbh->utf8_flag)
                                                        SvUTF8_on(sv);
                                        }
                                }
@@ -3503,7 +3528,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                        value_len = strlen((char *)value);
                                        sv_setpvn(sv, (char *)value, value_len);
                                        /* Check for specific types here? */
-                                       if (imp_dbh->unicode)
+                                       if (imp_dbh->utf8_flag)
                                                SvUTF8_on(sv);
                                }
                        
@@ -3533,7 +3558,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                */
                                const char * const s = SvPV(AvARRAY(av)[i],len);
                                sv_setpvn(currph->inout, s, len);
-                               if (imp_dbh->unicode)
+                               if (imp_dbh->utf8_flag)
                                        SvUTF8_on(currph->inout);
                        }
                }
@@ -3879,7 +3904,7 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int async)
 
        if (copystatus > 0) {
                sv_setpv(dataline, tempbuf);
-               if (imp_dbh->unicode)
+               if (imp_dbh->utf8_flag)
                        SvUTF8_on(dataline);
                TRACE_PQFREEMEM;
                PQfreemem(tempbuf);
@@ -4688,6 +4713,7 @@ int pg_db_result (SV *h, imp_dbh_t *imp_dbh)
        while ((result = PQgetResult(imp_dbh->conn)) != NULL) {
                /* TODO: Better multiple result-set handling */
                status = _sqlstate(aTHX_ imp_dbh, result);
+               check_client_encoding(aTHX_ imp_dbh);
                switch (status) {
                case PGRES_TUPLES_OK:
                        TRACE_PQNTUPLES;
diff --git a/dbdimp.h b/dbdimp.h
index b30ceaf..a5176d2 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -24,14 +24,15 @@ struct imp_dbh_st {
        int     pg_errorlevel;     /* PQsetErrorVerbosity. Set by user, 
defaults to 1 */
        int     server_prepare;    /* do we want to use PQexecPrepared? 0=no 
1=yes 2=smart. Can be changed by user */
        int     async_status;      /* 0=no async 1=async started -1=async has 
been cancelled */
+    int     pg_utf8_flag;      /* what the user has set pg_utf8_flag to. -1 
means not set */
 
     imp_sth_t *async_sth;      /* current async statement handle */
        AV      *savepoints;       /* list of savepoints */
        PGconn  *conn;             /* connection structure */
        char    *sqlstate;         /* from the last result */
 
+    bool    utf8_flag;         /* are we setting the internal Perl utf8 flag 
on for incoming data? */
        bool    pg_bool_tf;        /* do bools return 't'/'f'? Set by user, 
default is 0 */
-    bool    unicode;           /* do we force client_encoding to UTF-8 and set 
the Perl utf8 string on returned data? */
        bool    pg_enable_utf8;    /* (DEPRECATED) should we attempt to make 
utf8 strings? Set by user, default is 0 */
        bool    prepare_now;       /* force immediate prepares, even with 
placeholders. Set by user, default is 0 */
        bool    done_begin;        /* have we done a begin? (e.g. are we in a 
transaction?) */
-- 
1.7.0.5

Reply via email to