Committed by Greg Sabino Mullane <[email protected]>

Quick partial implmentation of the new UTF-8 plan.

---
 dbdimp.c      |   95 ++++++++++++++++++++++++++++++++++++--------------------
 dbdimp.h      |    2 +-
 testme.tmp.pl |    6 +++
 3 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/dbdimp.c b/dbdimp.c
index 30567d0..076bcbc 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -108,7 +108,9 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        bool           inquote = DBDPG_FALSE;
        STRLEN         connect_string_size;
        ConnStatusType connstatus;
-       int            utf8int;
+       int            unicode;
+       const char *   server_encoding;
+       const char *   client_encoding;
 
        if (TSTART) TRC(DBILOGFP, "%sBegin dbd_db_login\n", THEADER);
   
@@ -211,28 +213,32 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        TRACE_PQPROTOCOLVERSION;
        imp_dbh->pg_protocol = PQprotocolVersion(imp_dbh->conn);
 
-       /* Check the value of the pg_utf8_strings attribute. Default to not set 
(-1) */
-       utf8int = -1;
-       DBD_ATTRIB_GET_IV(attr, "pg_utf8_strings", 15, svp, utf8int);
+       /* Check the value of the pg_unicode attribute. Default to not set (-1) 
*/
+       unicode = -1;
+       DBD_ATTRIB_GET_IV(attr, "pg_unicode", 10, svp, unicode);
 
        /*
          We need to see if we are treating things with utf8 respect, or as 
byte soup
          The rules are:
-         - pg_utf8_strings trumps everything else
-         - SQL_ASCII is always byte soup
-         - Everything else is not
+         - An explicit pg_unicode setting trumps everything else
+         - A server_encoding of SQL_ASCII is always byte soup
+      - If the client_encoding matches the server_encoding, set unicode on
+         - Otherwise, we leave things alone
        */
-       if (utf8int > 1) { /* Force it on, no matter what */
-         imp_dbh->utf8_strings = DBDPG_TRUE;
+       client_encoding = PQparameterStatus(imp_dbh->conn, "client_encoding");
+
+       if (unicode > 1) { /* Force it on, no matter what */
+         imp_dbh->unicode = DBDPG_TRUE;
     }
     else {
-               if (utf8int == 0) { /* Force it off, no matter what */
-                       imp_dbh->utf8_strings = DBDPG_FALSE;
+               if (unicode == 0) { /* Force it off, no matter what */
+                       imp_dbh->unicode = DBDPG_FALSE;
                }
-               else { /* Neither is set, so use the server_encoding */
-                       imp_dbh->utf8_strings = 
-                               (0 == strncmp(PQparameterStatus(imp_dbh->conn, 
"server_encoding"), "SQL_ASCII", 9))
-                               ? DBDPG_FALSE : DBDPG_TRUE;
+               else { /* Neither is set, so check the encodings */
+                       server_encoding = PQparameterStatus(imp_dbh->conn, 
"server_encoding");
+                       /* If they match, set unicode to true, otherwise, false 
*/
+                       imp_dbh->unicode = (0==strcmp(server_encoding, 
client_encoding))
+                                          ? DBDPG_TRUE : DBDPG_FALSE;
                }
        }
 
@@ -271,7 +277,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        /* Deprecated: */
        imp_dbh->pg_enable_utf8  = DBDPG_FALSE;
 
-
        /* If using server version 7.4, switch to "smart" */
        imp_dbh->server_prepare = PGLIBVERSION >= 80000 ? 1 : 2;
 
@@ -281,6 +286,12 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        /* Tell DBI that we should call disconnect when the handle dies */
        DBIc_ACTIVE_on(imp_dbh);
 
+       /* If needed, set the client_encoding to UTF-8 */
+       if (imp_dbh->unicode &&
+               (0 == strncmp(client_encoding, "UTF-8", 5))) {
+               PQexec(imp_dbh->conn, "SET client_encoding = 'UTF-8'");
+       }
+
        if (TEND) TRC(DBILOGFP, "%sEnd dbd_db_login\n", THEADER);
 
        return 1;
@@ -312,7 +323,7 @@ static void pg_error (pTHX_ SV * h, int error_num, const 
char * error_msg)
        sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
 
        /* Set as utf-8 */
-       if (imp_dbh->utf8_strings)
+       if (imp_dbh->unicode)
                SvUTF8_on(DBIc_ERRSTR(imp_xxh));
 
        if (TEND) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER);
@@ -375,6 +386,11 @@ static ExecStatusType _result(pTHX_ imp_dbh_t * imp_dbh, 
const char * sql)
 
        if (TSQL) TRC(DBILOGFP, "%s;\n\n", sql);
 
+       /* Upgrade to a true UTF-8 string in place as needed */
+       if (imp_dbh->unicode) {
+               // upgrade_utf8 magic on 'sql'
+       }
+
        TRACE_PQEXEC;
        result = PQexec(imp_dbh->conn, sql);
 
@@ -733,7 +749,7 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                }
                break;
 
-       case 10: /* AutoCommit  pg_bool_tf  pg_pid_number  pg_options */
+       case 10: /* AutoCommit  pg_bool_tf  pg_pid_number  pg_options  
pg_unicode */
 
                if (strEQ("AutoCommit", key))
                        retsv = boolSV(DBIc_has(imp_dbh, DBIcf_AutoCommit));
@@ -745,6 +761,8 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                        TRACE_PQOPTIONS;
                        retsv = newSVpv(PQoptions(imp_dbh->conn),0);
                }
+               else if (strEQ("pg_unicode", key))
+                       retsv = newSViv((IV)imp_dbh->unicode);
                break;
 
        case 11: /* pg_INV_READ  pg_protocol */
@@ -777,7 +795,7 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                        retsv = newSViv((IV)imp_dbh->pg_enable_utf8);
                break;
 
-       case 15: /* pg_default_port pg_async_status pg_expand_array 
pg_utf8_strings */
+       case 15: /* pg_default_port pg_async_status pg_expand_array */
 
                if (strEQ("pg_default_port", key))
                        retsv = newSViv((IV) PGDEFPORT );
@@ -785,8 +803,6 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                        retsv = newSViv((IV)imp_dbh->async_status);
                else if (strEQ("pg_expand_array", key))
                        retsv = newSViv((IV)imp_dbh->expand_array);
-               else if (strEQ("pg_utf8_strings", key))
-                       retsv = newSViv((IV)imp_dbh->utf8_strings);
                break;
 
        case 17: /* pg_server_prepare  pg_server_version */
@@ -854,7 +870,7 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv, SV * valuesv
                }
                break;
 
-       case 10: /* AutoCommit  pg_bool_tf */
+       case 10: /* AutoCommit  pg_bool_tf  pg_unicode*/
 
                if (strEQ("AutoCommit", key)) {
                        if (newval != DBIc_has(imp_dbh, DBIcf_AutoCommit)) {
@@ -866,10 +882,18 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv, SV * valuesv
                        }
                        retval = 1;
                }
+
                else if (strEQ("pg_bool_tf", key)) {
                        imp_dbh->pg_bool_tf = newval!=0 ? DBDPG_TRUE : 
DBDPG_FALSE;
                        retval = 1;
                }
+
+               else if (strEQ("pg_unicode", key)) {
+                       imp_dbh->unicode = newval!=0 ? DBDPG_TRUE : DBDPG_FALSE;
+                       retval = 1;
+               }
+
+
                break;
 
        case 13: /* pg_errorlevel */
@@ -902,18 +926,13 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv, SV * valuesv
 
                break;
 
-       case 15: /* pg_expand_array pg_utf8_strings */
+       case 15: /* pg_expand_array */
 
                if (strEQ("pg_expand_array", key)) {
                        imp_dbh->expand_array = newval ? DBDPG_TRUE : 
DBDPG_FALSE;
                        retval = 1;
                }
 
-               else if (strEQ("pg_utf8_strings", key)) {
-                       imp_dbh->utf8_strings = newval!=0 ? DBDPG_TRUE : 
DBDPG_FALSE;
-                       retval = 1;
-               }
-
                break;
 
        case 17: /* pg_server_prepare */
@@ -1120,7 +1139,7 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t * imp_sth, 
SV * keysv)
                                TRACE_PQFNAME;
                                fieldname = PQfname(imp_sth->result, fields);
                                sv_fieldname = newSVpv(fieldname,0);
-                               if (imp_dbh->utf8_strings)
+                               if (imp_dbh->unicode)
                                        SvUTF8_on(sv_fieldname);
                                (void)av_store(av, fields, sv_fieldname);
                        }
@@ -2694,7 +2713,7 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t 
*imp_dbh, unsigned char * input
                                        av_push(currentav, newSViv('t' == 
*string ? 1 : 0));
                                else {
                                        SV *sv = newSVpvn(string, section_size);
-                                       if (imp_dbh->utf8_strings)
+                                       if (imp_dbh->unicode)
                                                SvUTF8_on(sv);
                                        av_push(currentav, sv);
                                }
@@ -2822,6 +2841,11 @@ int pg_quickexec (SV * dbh, const char * sql, const int 
asyncflag)
 
        if (TSQL) TRC(DBILOGFP, "%s;\n\n", sql);
 
+       /* Upgrade to a true UTF-8 string in place as needed */
+       if (imp_dbh->unicode) {
+               // upgrade_utf8 magic on 'sql'
+       }
+
        TRACE_PQEXEC;
        result = PQexec(imp_dbh->conn, sql);
        status = _sqlstate(aTHX_ imp_dbh, result);
@@ -3145,6 +3169,7 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
                }
                else {
                        TRACE_PQEXECPREPARED;
+                       // upgrade utf8 magic: prepare_name, PQvals
                        imp_sth->result = PQexecPrepared
                                (imp_dbh->conn, imp_sth->prepare_name, 
imp_sth->numphs, imp_sth->PQvals, imp_sth->PQlens, imp_sth->PQfmts, 0);
                }
@@ -3224,6 +3249,7 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
                        }
                        else {
                                TRACE_PQEXECPARAMS;
+                               // upgrade utf8 magic: statement, PQvals
                                imp_sth->result = PQexecParams
                                        (imp_dbh->conn, statement, 
imp_sth->numphs, imp_sth->PQoids, imp_sth->PQvals, imp_sth->PQlens, 
imp_sth->PQfmts, 0);
                        }
@@ -3263,6 +3289,7 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
                        }
                        else {
                                TRACE_PQEXEC;
+                               // upgrade utf8 magic: statement
                                imp_sth->result = PQexec(imp_dbh->conn, 
statement);
                        }
 
@@ -3468,7 +3495,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                                break;
                                        default:
                                                sv_setpvn(sv, (char *)value, 
value_len);
-                                               if (imp_dbh->utf8_strings)
+                                               if (imp_dbh->unicode)
                                                        SvUTF8_on(sv);
                                        }
                                }
@@ -3476,7 +3503,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                        value_len = strlen((char *)value);
                                        sv_setpvn(sv, (char *)value, value_len);
                                        /* Check for specific types here? */
-                                       if (imp_dbh->utf8_strings)
+                                       if (imp_dbh->unicode)
                                                SvUTF8_on(sv);
                                }
                        
@@ -3506,7 +3533,7 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                */
                                const char * const s = SvPV(AvARRAY(av)[i],len);
                                sv_setpvn(currph->inout, s, len);
-                               if (imp_dbh->utf8_strings)
+                               if (imp_dbh->unicode)
                                        SvUTF8_on(currph->inout);
                        }
                }
@@ -3852,7 +3879,7 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int async)
 
        if (copystatus > 0) {
                sv_setpv(dataline, tempbuf);
-               if (imp_dbh->utf8_strings)
+               if (imp_dbh->unicode)
                        SvUTF8_on(dataline);
                TRACE_PQFREEMEM;
                PQfreemem(tempbuf);
diff --git a/dbdimp.h b/dbdimp.h
index 6310514..b30ceaf 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -31,7 +31,7 @@ struct imp_dbh_st {
        char    *sqlstate;         /* from the last result */
 
        bool    pg_bool_tf;        /* do bools return 't'/'f'? Set by user, 
default is 0 */
-    bool    utf8_strings;      /* so we set the utf8 flag on data from the 
database? */
+    bool    unicode;           /* do we force client_encoding to UTF-8 and set 
the Perl utf8 string on returned data? */
        bool    pg_enable_utf8;    /* (DEPRECATED) should we attempt to make 
utf8 strings? Set by user, default is 0 */
        bool    prepare_now;       /* force immediate prepares, even with 
placeholders. Set by user, default is 0 */
        bool    done_begin;        /* have we done a begin? (e.g. are we in a 
transaction?) */
diff --git a/testme.tmp.pl b/testme.tmp.pl
index 713f5f8..d540714 100755
--- a/testme.tmp.pl
+++ b/testme.tmp.pl
@@ -19,6 +19,7 @@ my $tracelevel = shift || 0;
 $ENV{DBI_TRACE} = $tracelevel;
 
 my $dbname = 'latin';
+$dbname = 'greg';
 my $DSN = "DBI:Pg:dbname=$dbname";
 
 my $dbh = DBI->connect($DSN, '', '', 
{AutoCommit=>0,RaiseError=>1,PrintError=>0})
@@ -27,6 +28,11 @@ my $dbh = DBI->connect($DSN, '', '', 
{AutoCommit=>0,RaiseError=>1,PrintError=>0}
 my $me = $dbh->{Driver}{Name};
 print "DBI is version $DBI::VERSION, I am $me, version of DBD::Pg is 
$DBD::Pg::VERSION\n";
 
+my $SQL = 'SHOW client_encoding';
+my $enc = $dbh->selectall_arrayref($SQL)->[0][0];
+print "Client encoding: $enc\n";
+$dbh->commit();
+
 exit;
 
 sub memory_leak_test_bug_65734 {
-- 
1.7.0.5

Reply via email to