Committed by Greg Sabino Mullane <[email protected]>

Another stab at the UTF-8 system, this time simplified as much as possible.
See the pod for pg_enable_utf8 for an explanation.
Note that this commit will probably be picked out later, as we
want to release a new minor version before releasing such a big
change

---
 Pg.pm    |   25 ++++++++++++++---
 dbdimp.c |   88 +++++++++++++++++++++++++++++++++++--------------------------
 dbdimp.h |    6 +++-
 3 files changed, 75 insertions(+), 44 deletions(-)

diff --git a/Pg.pm b/Pg.pm
index f04b1b9..7faf481 100644
--- a/Pg.pm
+++ b/Pg.pm
@@ -1633,6 +1633,7 @@ use 5.006001;
                                pg_db                          => undef,
                                pg_default_port                => undef,
                                pg_enable_utf8                 => undef,
+                               pg_utf8_flag                   => undef,
                                pg_errorlevel                  => undef,
                                pg_expand_array                => undef,
                                pg_host                        => undef,
@@ -3128,12 +3129,26 @@ DBD::Pg specific attribute. Defaults to false. When 
true, question marks inside
 are not treated as L<placeholders|/Placeholders>. Useful for statements that 
contain unquoted question 
 marks, such as geometric operators.
 
-=head3 B<pg_enable_utf8> (boolean)
+=head3 B<pg_enable_utf8> (integer)
 
-DBD::Pg specific attribute. If true, then the C<utf8> flag will be turned on
-for returned character data (if the data is valid UTF-8). For details about
-the C<utf8> flag, see the C<Encode> module. This attribute is only relevant 
under
-perl 5.8 and later.
+DBD::Pg specific attribute. The behavior of DBD::Pg with regards to this flag 
has 
+changed as of version xxx. The default value for this attribute, -1, indicates 
+that the internal C<utf8> flag will be turned on for all strings coming back 
+from the database if the client_encoding is set to 'UTF8'. Use of this default 
+is highly encouraged, and you should not need to use this attribute except 
+for the following two conditions:
+
+If this attribute is set to 0, then the internal C<utf8> flag will *never* be 
+turned on for returned data, regardless of the current client_encoding.
+
+If this attribute is set to -1, then the internal C<utf8> flag will *always* 
+be turned on for returned data, regardless of the current client_encoding 
+(with the exception of bytea data).
+
+Note that the value of client_encoding is only checked on connection time. If 
+you change the client_encoding to/from 'UTF8' after connecting, you can set 
+pg_enable_utf8 to -1 to force DBD::Pg to read in the new client_encoding and 
+act accordingly.
 
 =head3 B<pg_errorlevel> (integer)
 
diff --git a/dbdimp.c b/dbdimp.c
index 494033f..1f293e6 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -224,8 +224,15 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
                }
        }
 
-       imp_dbh->pg_bool_tf      = DBDPG_FALSE;
-       imp_dbh->pg_enable_utf8  = DBDPG_FALSE;
+       imp_dbh->client_encoding_utf8 =
+               (0 == strncmp(PQparameterStatus(imp_dbh->conn, 
"client_encoding"), "UTF8", 4))
+               ? DBDPG_TRUE : DBDPG_FALSE;
+
+       /* If the client_encoding is UTF8, flip the utf8 flag until convinced 
otherwise */
+       imp_dbh->pg_utf8_flag = imp_dbh->client_encoding_utf8;
+
+       imp_dbh->pg_enable_utf8  = -1;
+
        imp_dbh->prepare_now     = DBDPG_FALSE;
        imp_dbh->done_begin      = DBDPG_FALSE;
        imp_dbh->dollaronly      = DBDPG_FALSE;
@@ -278,10 +285,8 @@ static void pg_error (pTHX_ SV * h, int error_num, const 
char * error_msg)
        sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
 
        /* Set as utf-8 */
-#ifdef is_utf8_string
-       if (imp_dbh->pg_enable_utf8)
+       if (imp_dbh->pg_utf8_flag)
                SvUTF8_on(DBIc_ERRSTR(imp_xxh));
-#endif
 
        if (TEND_slow) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER_slow);
 
@@ -725,10 +730,12 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv)
                        retsv = newSViv((IV)imp_dbh->pg_protocol);
                break;
 
-       case 12: /* pg_INV_WRITE */
+       case 12: /* pg_INV_WRITE pg_utf8_flag */
 
                if (strEQ("pg_INV_WRITE", key))
                        retsv = newSViv((IV) INV_WRITE );
+               else if (strEQ("pg_utf8_flag", key))
+                       retsv = newSViv((IV)imp_dbh->pg_utf8_flag);
                break;
 
        case 13: /* pg_errorlevel */
@@ -743,10 +750,8 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv)
                        retsv = newSViv((IV) PGLIBVERSION );
                else if (strEQ("pg_prepare_now", key))
                        retsv = newSViv((IV)imp_dbh->prepare_now);
-#ifdef is_utf8_string
                else if (strEQ("pg_enable_utf8", key))
                        retsv = newSViv((IV)imp_dbh->pg_enable_utf8);
-#endif
                break;
 
        case 15: /* pg_default_port pg_async_status pg_expand_array */
@@ -865,12 +870,33 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv, SV * valuesv
                        retval = 1;
                }
 
-#ifdef is_utf8_string
+               /* 
+                  We don't want to check the client_encoding every single time 
we talk to the database,
+                  so we only do it here, which allows people to signal DBD::Pg 
that something 
+                  may have changed, so could you please rescan client_encoding?
+               */
                else if (strEQ("pg_enable_utf8", key)) {
-                       imp_dbh->pg_enable_utf8 = newval!=0 ? DBDPG_TRUE : 
DBDPG_FALSE;
+                       /* Technically, we only allow -1, 0, and 1 */
+                       imp_dbh->pg_enable_utf8 = newval;
+
+                       /* Never use the utf8 flag, no matter what */
+                       if (0 == imp_dbh->pg_enable_utf8) {
+                               imp_dbh->pg_utf8_flag = DBDPG_FALSE;
+                       }
+                       /* Always use the flag, no matter what */
+                       else if (1 == imp_dbh->pg_enable_utf8) {
+                               imp_dbh->pg_utf8_flag = DBDPG_TRUE;
+                       }
+                       /* Do The Right Thing */
+                       else {
+                               imp_dbh->client_encoding_utf8 =
+                                       (0 == 
strncmp(PQparameterStatus(imp_dbh->conn, "client_encoding"), "UTF8", 4))
+                                       ? DBDPG_TRUE : DBDPG_FALSE;
+                               imp_dbh->pg_enable_utf8 = -1;
+                               imp_dbh->pg_utf8_flag = 
imp_dbh->client_encoding_utf8;
+                       }
                        retval = 1;
                }
-#endif
                break;
 
        case 15: /* pg_expand_array */
@@ -1084,10 +1110,8 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t * imp_sth, 
SV * keysv)
                                TRACE_PQFNAME;
                                fieldname = PQfname(imp_sth->result, fields);
                                sv_fieldname = newSVpv(fieldname,0);
-#ifdef is_utf8_string
                                if (is_high_bit_set(aTHX_ (unsigned char 
*)fieldname, strlen(fieldname)) && is_utf8_string((unsigned char *)fieldname, 
strlen(fieldname)))
                                        SvUTF8_on(sv_fieldname);
-#endif
                                (void)av_store(av, fields, sv_fieldname);
                        }
                }
@@ -2677,14 +2701,9 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t 
*imp_dbh, unsigned char * input
                                        av_push(currentav, newSViv('t' == 
*string ? 1 : 0));
                                else {
                                        SV *sv = newSVpvn(string, section_size);
-#ifdef is_utf8_string
-                                       if (imp_dbh->pg_enable_utf8) {
-                                               SvUTF8_off(sv);
-                                               if (is_high_bit_set(aTHX_ 
(unsigned char *)string, section_size) && is_utf8_string((unsigned 
char*)string, section_size)) {
-                                                       SvUTF8_on(sv);
-                                               }
+                                       if (imp_dbh->pg_utf8_flag) {
+                                               SvUTF8_on(sv);
                                        }
-#endif
                                        av_push(currentav, sv);
 
                                }
@@ -3490,23 +3509,18 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                        }
                                }
                        }
-#ifdef is_utf8_string
-                       if (imp_dbh->pg_enable_utf8 && type_info) {
-                               SvUTF8_off(sv);
-                               switch (type_info->type_id) {
-                               case PG_CHAR:
-                               case PG_TEXT:
-                               case PG_BPCHAR:
-                               case PG_VARCHAR:
-                                       if (is_high_bit_set(aTHX_ value, 
value_len) && is_utf8_string((unsigned char*)value, value_len)) {
-                                               SvUTF8_on(sv);
-                                       }
-                                       break;
-                               default:
-                                       break;
+                       if (imp_dbh->pg_utf8_flag) {
+                               /*
+                                 The only exception to our rule about setting 
utf8 if the client_encoding
+                                 is set to UTF8 is bytea.
+                               */
+                               if (type_info && PG_BYTEA == 
type_info->type_id) {
+                                       SvUTF8_off(sv);
+                               }
+                               else {
+                                       SvUTF8_on(sv);
                                }
                        }
-#endif
                }
        }
        
@@ -3870,10 +3884,8 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int 
async)
 
        if (copystatus > 0) {
                sv_setpv(dataline, tempbuf);
-#ifdef is_utf8_string
-               if (imp_dbh->pg_enable_utf8)
+               if (imp_dbh->pg_utf8_flag)
                        SvUTF8_on(dataline);
-#endif
                TRACE_PQFREEMEM;
                PQfreemem(tempbuf);
        }
diff --git a/dbdimp.h b/dbdimp.h
index 1310e16..6058ff1 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -30,13 +30,17 @@ struct imp_dbh_st {
        PGconn  *conn;             /* connection structure */
        char    *sqlstate;         /* from the last result */
 
+
        bool    pg_bool_tf;        /* do bools return 't'/'f'? Set by user, 
default is 0 */
-       bool    pg_enable_utf8;    /* should we attempt to make utf8 strings? 
Set by user, default is 0 */
        bool    prepare_now;       /* force immediate prepares, even with 
placeholders. Set by user, default is 0 */
        bool    done_begin;        /* have we done a begin? (e.g. are we in a 
transaction?) */
        bool    dollaronly;        /* only consider $1, $2 ... as valid 
placeholders */
        bool    expand_array;      /* transform arrays from the db into Perl 
arrays? Default is 1 */
        bool    txn_read_only;     /* are we in read-only mode? Set with 
$dbh->{ReadOnly} */
+
+       int     pg_enable_utf8;    /* legacy utf8 flag: force utf8 flag on or 
off, regardless of client_encoding */
+       bool    pg_utf8_flag;      /* are we currently flipping the utf8 flag 
on? */
+    bool    client_encoding_utf8; /* is the client_encoding utf8 last we 
checked? */
 };
 
 
-- 
1.7.1

Reply via email to