Committed by Greg Sabino Mullane <[email protected]>

First pass at new UTF-8 support.
Deprecate pg_enable_utf8 completely.
Use the new pg_utf8_strings.
Defaults that to on in all cases except when the server_encoding is SQL_ASCII

---
 Pg.pm         |   16 ++++++--
 TODO          |    2 -
 dbdimp.c      |  109 ++++++++++++++++++++------------------------------------
 dbdimp.h      |    5 +--
 t/02attribs.t |    8 ++--
 t/09arrays.t  |    5 +--
 testme.tmp.pl |    6 ++--
 7 files changed, 62 insertions(+), 89 deletions(-)

diff --git a/Pg.pm b/Pg.pm
index ac4d9ba..f6afbeb 100644
--- a/Pg.pm
+++ b/Pg.pm
@@ -1626,6 +1626,7 @@ use 5.006001;
                                pg_bool_tf                     => undef,
                                pg_db                          => undef,
                                pg_default_port                => undef,
+                               pg_utf8_strings                => undef,
                                pg_enable_utf8                 => undef,
                                pg_errorlevel                  => undef,
                                pg_expand_array                => undef,
@@ -3122,12 +3123,19 @@ DBD::Pg specific attribute. Defaults to false. When 
true, question marks inside
 are not treated as L<placeholders|/Placeholders>. Useful for statements that 
contain unquoted question 
 marks, such as geometric operators.
 
+=head3 B<pg_utf8_strings> (boolean)
+
+DBD::Pg specific attribute. In normal use, this should not be needed, as it 
will be set 
+automatically according to the server encoding. SQL_ASCII will set this to 
false, while 
+everything else will set it to true. If you force it off, then everything will 
be returned 
+as byte soup, even data from UTF-8 databases, which is very likely not what 
you want. If 
+you force it on for SQL_ASCII databases, the results will be unpredictable. It 
is recommended 
+that you only use this attribute as a last resort and with a full 
understanding of what 
+it does.
+
 =head3 B<pg_enable_utf8> (boolean)
 
-DBD::Pg specific attribute. If true, then the C<utf8> flag will be turned on
-for returned character data (if the data is valid UTF-8). For details about
-the C<utf8> flag, see the C<Encode> module. This attribute is only relevant 
under
-perl 5.8 and later.
+Deprecated, please us pg_utf8_strings instead.
 
 =head3 B<pg_errorlevel> (integer)
 
diff --git a/TODO b/TODO
index 1afec40..7ace7e1 100644
--- a/TODO
+++ b/TODO
@@ -19,8 +19,6 @@ http://rt.cpan.org/Public/Dist/Display.html?Name=DBD-Pg
    supported as a server encoding (e.g. BIG5)
 - Support passing hashrefs in and out for custom types.
 - Support a flag for behind-the-scenes CURSOR to emulate partial fetches.
-- Handle unicode conversion better and perhaps eliminate the need for 
-  the pg_enable_utf8 attribute.
 - Fix this: http://nntp.x.perl.org/group/perl.cpan.testers/2698430
 - Composite type support: 
http://www.postgresql.org/docs/current/interactive/rowtypes.html
 - Full support for execute_array, e.g. the return values
diff --git a/dbdimp.c b/dbdimp.c
index 09e7b8d..81a697e 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -85,7 +85,6 @@ static ExecStatusType _sqlstate(pTHX_ imp_dbh_t *imp_dbh, 
PGresult *result);
 static int pg_db_rollback_commit (pTHX_ SV *dbh, imp_dbh_t *imp_dbh, int 
action);
 static void pg_st_split_statement (pTHX_ imp_sth_t *imp_sth, int version, char 
*statement);
 static int pg_st_prepare_statement (pTHX_ SV *sth, imp_sth_t *imp_sth);
-static int is_high_bit_set(pTHX_ const unsigned char *val, STRLEN size);
 static int pg_st_deallocate_statement(pTHX_ SV *sth, imp_sth_t *imp_sth);
 static PGTransactionStatusType pg_db_txn_status (pTHX_ imp_dbh_t *imp_dbh);
 static int pg_db_start_txn (pTHX_ SV *dbh, imp_dbh_t *imp_dbh);
@@ -214,31 +213,28 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        TRACE_PQPROTOCOLVERSION;
        imp_dbh->pg_protocol = PQprotocolVersion(imp_dbh->conn);
 
-       /* Grab the server encoding so we can set out utf8 flags intelligently 
*/
-       imp_dbh->server_encoding = PQparameterStatus(imp_dbh->conn, 
"server_encoding");
-
-       /* Check the value of the pg_enable_utf8 attribute. Default to not 
there or -1 */
+       /* Check the value of the pg_utf8_strings attribute. Default to not set 
(-1) */
        utf8int = -1;
-       DBD_ATTRIB_GET_IV(attr, "pg_enable_utf8", 14, svp, utf8int);
+       DBD_ATTRIB_GET_IV(attr, "pg_utf8_strings", 15, svp, utf8int);
 
        /*
          We need to see if we are treating things with utf8 respect, or as 
byte soup
          The rules are:
-         pg_enable_utf8 trumps everything else
-         SQL_ASCII is always byte soup
-         Everything else is not
+         - pg_utf8_strings trumps everything else
+         - SQL_ASCII is always byte soup
+         - Everything else is not
        */
        if (utf8int > 1) { /* Force it on, no matter what */
-         imp_dbh->utf8 = 1;
+         imp_dbh->utf8_strings = DBDPG_TRUE;
     }
     else {
                if (utf8int == 0) { /* Force it off, no matter what */
-                       imp_dbh->utf8 = 0;
+                       imp_dbh->utf8_strings = DBDPG_FALSE;
                }
-               else {
-                       imp_dbh->utf8 = 
-                               (0 == strncmp(imp_dbh->server_encoding, 
"SQL_ASCII", 9))
-                               ? 0 : 1;
+               else { /* Neither is set, so use the server_encoding */
+                       imp_dbh->utf8_strings = 
+                               (0 == strncmp(PQparameterStatus(imp_dbh->conn, 
"server_encoding"), "SQL_ASCII", 9))
+                               ? DBDPG_FALSE : DBDPG_TRUE;
                }
        }
 
@@ -262,7 +258,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
 
        /* Set all the defaults for this database handle */
        imp_dbh->pg_bool_tf      = DBDPG_FALSE;
-       imp_dbh->pg_enable_utf8  = DBDPG_FALSE;
        imp_dbh->prepare_now     = DBDPG_FALSE;
        imp_dbh->done_begin      = DBDPG_FALSE;
        imp_dbh->dollaronly      = DBDPG_FALSE;
@@ -275,6 +270,10 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char * 
dbname, char * uid, cha
        imp_dbh->async_status    = 0;
        imp_dbh->async_sth       = NULL;
 
+       /* Deprecated: */
+       imp_dbh->pg_enable_utf8  = DBDPG_FALSE;
+
+
        /* If using server version 7.4, switch to "smart" */
        imp_dbh->server_prepare = PGLIBVERSION >= 80000 ? 1 : 2;
 
@@ -315,10 +314,8 @@ static void pg_error (pTHX_ SV * h, int error_num, const 
char * error_msg)
        sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
 
        /* Set as utf-8 */
-#ifdef is_utf8_string
-       if (imp_dbh->pg_enable_utf8)
+       if (imp_dbh->utf8_strings)
                SvUTF8_on(DBIc_ERRSTR(imp_xxh));
-#endif
 
        if (TEND) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER);
 
@@ -777,13 +774,11 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv)
                        retsv = newSViv((IV) PGLIBVERSION );
                else if (strEQ("pg_prepare_now", key))
                        retsv = newSViv((IV)imp_dbh->prepare_now);
-#ifdef is_utf8_string
                else if (strEQ("pg_enable_utf8", key))
                        retsv = newSViv((IV)imp_dbh->pg_enable_utf8);
-#endif
                break;
 
-       case 15: /* pg_default_port pg_async_status pg_expand_array */
+       case 15: /* pg_default_port pg_async_status pg_expand_array 
pg_utf8_strings */
 
                if (strEQ("pg_default_port", key))
                        retsv = newSViv((IV) PGDEFPORT );
@@ -791,6 +786,8 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV 
* keysv)
                        retsv = newSViv((IV)imp_dbh->async_status);
                else if (strEQ("pg_expand_array", key))
                        retsv = newSViv((IV)imp_dbh->expand_array);
+               else if (strEQ("pg_utf8_strings", key))
+                       retsv = newSViv((IV)imp_dbh->utf8_strings);
                break;
 
        case 17: /* pg_server_prepare  pg_server_version */
@@ -896,20 +893,25 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh, 
SV * keysv, SV * valuesv
                        retval = 1;
                }
 
-#ifdef is_utf8_string
                else if (strEQ("pg_enable_utf8", key)) {
                        imp_dbh->pg_enable_utf8 = newval!=0 ? DBDPG_TRUE : 
DBDPG_FALSE;
                        retval = 1;
                }
-#endif
+
                break;
 
-       case 15: /* pg_expand_array */
+       case 15: /* pg_expand_array pg_utf8_strings */
 
                if (strEQ("pg_expand_array", key)) {
                        imp_dbh->expand_array = newval ? DBDPG_TRUE : 
DBDPG_FALSE;
                        retval = 1;
                }
+
+               else if (strEQ("pg_utf8_strings", key)) {
+                       imp_dbh->utf8_strings = newval!=0 ? DBDPG_TRUE : 
DBDPG_FALSE;
+                       retval = 1;
+               }
+
                break;
 
        case 17: /* pg_server_prepare */
@@ -1107,20 +1109,20 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t * 
imp_sth, SV * keysv)
                        AV *av = newAV();
                        char *fieldname;
                        SV * sv_fieldname;
+                       D_imp_dbh_from_sth;
                        retsv = newRV_inc(sv_2mortal((SV*)av));
                        while(--fields >= 0) {
                                TRACE_PQFNAME;
                                fieldname = PQfname(imp_sth->result, fields);
                                sv_fieldname = newSVpv(fieldname,0);
-#ifdef is_utf8_string
-                               if (is_high_bit_set(aTHX_ (unsigned char 
*)fieldname, strlen(fieldname)) && is_utf8_string((unsigned char *)fieldname, 
strlen(fieldname)))
+                               if (imp_dbh->utf8_strings)
                                        SvUTF8_on(sv_fieldname);
-#endif
                                (void)av_store(av, fields, sv_fieldname);
                        }
                }
                else if (strEQ("TYPE", key)) {
                        /* Need to convert the Pg type to ANSI/SQL type. */
+                       /* None of this should ever be non-ASCII, so don't 
worry about utf8 here */
                        sql_type_info_t * type_info;
                        AV *av = newAV();
                        retsv = newRV_inc(sv_2mortal((SV*)av));
@@ -2680,16 +2682,9 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t 
*imp_dbh, unsigned char * input
                                        av_push(currentav, newSViv('t' == 
*string ? 1 : 0));
                                else {
                                        SV *sv = newSVpvn(string, section_size);
-#ifdef is_utf8_string
-                                       if (imp_dbh->pg_enable_utf8) {
-                                               SvUTF8_off(sv);
-                                               if (is_high_bit_set(aTHX_ 
(unsigned char *)string, section_size) && is_utf8_string((unsigned 
char*)string, section_size)) {
-                                                       SvUTF8_on(sv);
-                                               }
-                                       }
-#endif
+                                       if (imp_dbh->utf8_strings)
+                                               SvUTF8_on(sv);
                                        av_push(currentav, sv);
-
                                }
                        }
                        section_size = 0;
@@ -3362,16 +3357,6 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
 
 
 /* ================================================================== */
-static int is_high_bit_set(pTHX_ const unsigned char * val, STRLEN size)
-{
-       if (TSTART) TRC(DBILOGFP, "%sBegin is_high_bit_set\n", THEADER);
-       while (*val && size--)
-               if (*val++ & 0x80) return 1;
-       return 0;
-}
-
-
-/* ================================================================== */
 AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
 {
        dTHX;
@@ -3470,11 +3455,16 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                                break;
                                        default:
                                                sv_setpvn(sv, (char *)value, 
value_len);
+                                               if (imp_dbh->utf8_strings)
+                                                       SvUTF8_on(sv);
                                        }
                                }
                                else {
                                        value_len = strlen((char *)value);
                                        sv_setpvn(sv, (char *)value, value_len);
+                                       /* Check for specific types here? */
+                                       if (imp_dbh->utf8_strings)
+                                               SvUTF8_on(sv);
                                }
                        
                                if (type_info && (PG_BPCHAR == 
type_info->type_id) && chopblanks) {
@@ -3488,23 +3478,6 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                        }
                                }
                        }
-#ifdef is_utf8_string
-                       if (imp_dbh->pg_enable_utf8 && type_info) {
-                               SvUTF8_off(sv);
-                               switch (type_info->type_id) {
-                               case PG_CHAR:
-                               case PG_TEXT:
-                               case PG_BPCHAR:
-                               case PG_VARCHAR:
-                                       if (is_high_bit_set(aTHX_ value, 
value_len) && is_utf8_string((unsigned char*)value, value_len)) {
-                                               SvUTF8_on(sv);
-                                       }
-                                       break;
-                               default:
-                                       break;
-                               }
-                       }
-#endif
                }
        }
        
@@ -3520,10 +3493,8 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                */
                                const char * const s = SvPV(AvARRAY(av)[i],len);
                                sv_setpvn(currph->inout, s, len);
-                               if (SvUTF8(AvARRAY(av)[i]))
+                               if (imp_dbh->utf8_strings)
                                        SvUTF8_on(currph->inout);
-                               else
-                                       SvUTF8_off(currph->inout);
                        }
                }
        }
@@ -3868,10 +3839,8 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int 
async)
 
        if (copystatus > 0) {
                sv_setpv(dataline, tempbuf);
-#ifdef is_utf8_string
-               if (imp_dbh->pg_enable_utf8)
+               if (imp_dbh->utf8_strings)
                        SvUTF8_on(dataline);
-#endif
                TRACE_PQFREEMEM;
                PQfreemem(tempbuf);
        }
diff --git a/dbdimp.h b/dbdimp.h
index 0dd02fe..d665239 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -31,11 +31,10 @@ struct imp_dbh_st {
        AV      *savepoints;       /* list of savepoints */
        PGconn  *conn;             /* connection structure */
        char    *sqlstate;         /* from the last result */
-       const char  *server_encoding;  /* encoding detected at login */
-       int utf8;
 
        bool    pg_bool_tf;        /* do bools return 't'/'f'? Set by user, 
default is 0 */
-       bool    pg_enable_utf8;    /* should we attempt to make utf8 strings? 
Set by user, default is 0 */
+    bool    utf8_strings;      /* so we set the utf8 flag on data from the 
database? */
+       bool    pg_enable_utf8;    /* (DEPRECATED) should we attempt to make 
utf8 strings? Set by user, default is 0 */
        bool    prepare_now;       /* force immediate prepares, even with 
placeholders. Set by user, default is 0 */
        bool    done_begin;        /* have we done a begin? (e.g. are we in a 
transaction?) */
        bool    dollaronly;        /* only consider $1, $2 ... as valid 
placeholders */
diff --git a/t/02attribs.t b/t/02attribs.t
index e98add9..5445ad0 100644
--- a/t/02attribs.t
+++ b/t/02attribs.t
@@ -55,7 +55,7 @@ d pg_options
 d pg_socket
 d pg_pid
 d pg_standard_conforming strings
-d pg_enable_utf8
+d pg_utf8_strings
 d Warn
 
 d pg_prepare_now - tested in 03smethod.t
@@ -420,7 +420,7 @@ SKIP: {
        $SQL = 'SELECT id, pname FROM dbd_pg_test WHERE id = ?';
        $sth = $dbh->prepare($SQL);
        $sth->execute(1);
-       local $dbh->{pg_enable_utf8} = 1;
+       ## local $dbh->{pg_utf8_strings} = 1;
 
        $t='Quote method returns correct utf-8 characters';
        my $utf8_str = chr(0x100).'dam'; # LATIN CAPITAL LETTER A WITH MACRON
@@ -438,11 +438,11 @@ SKIP: {
        $t='Unicode (utf8) data returned from database is not corrupted';
        is (length($name), 4, $t);
 
-       $t='ASCII text returned from database does have utf8 bit set';
+       $t='ASCII text returned from database *does* have utf8 bit set';
        $sth->finish();
        $sth->execute(1);
        my ($id2, $name2) = $sth->fetchrow_array();
-       ok (!Encode::is_utf8($name2), $t);
+       ok (Encode::is_utf8($name2), $t);
        $sth->finish();
 }
 
diff --git a/t/09arrays.t b/t/09arrays.t
index d004c34..03a9e1a 100644
--- a/t/09arrays.t
+++ b/t/09arrays.t
@@ -569,7 +569,6 @@ SKIP: {
                if $server_encoding ne 'UTF8';
 
        $t='String should be UTF-8';
-       local $dbh->{pg_enable_utf8} = 1;
        my $utf8_str = chr(0x100).'dam'; # LATIN CAPITAL LETTER A WITH MACRON
     ok (Encode::is_utf8( $utf8_str ), $t);
 
@@ -634,8 +633,8 @@ SKIP: {
        $expected = [1,['Bob',$utf8_str],'one'];
        is_deeply ($result, $expected, $t);
 
-       $t='Selected ASCII string should not be UTF-8';
-    ok (!Encode::is_utf8( $result->[1][0] ), $t);
+       $t='Selected ASCII string should be UTF-8';
+    ok (Encode::is_utf8( $result->[1][0] ), $t);
 
        $t='Selected string should be UTF-8';
     ok (Encode::is_utf8( $result->[1][1] ), $t);
diff --git a/testme.tmp.pl b/testme.tmp.pl
index 15f8e5d..713f5f8 100755
--- a/testme.tmp.pl
+++ b/testme.tmp.pl
@@ -18,15 +18,15 @@ use vars qw/$sth $info $count $SQL/;
 my $tracelevel = shift || 0;
 $ENV{DBI_TRACE} = $tracelevel;
 
-my $DSN = 'DBI:Pg:dbname=postgres';
+my $dbname = 'latin';
+my $DSN = "DBI:Pg:dbname=$dbname";
+
 my $dbh = DBI->connect($DSN, '', '', 
{AutoCommit=>0,RaiseError=>1,PrintError=>0})
   or die "Connection failed!\n";
 
 my $me = $dbh->{Driver}{Name};
 print "DBI is version $DBI::VERSION, I am $me, version of DBD::Pg is 
$DBD::Pg::VERSION\n";
 
-memory_leak_test_bug_65734();
-
 exit;
 
 sub memory_leak_test_bug_65734 {
-- 
1.7.0.5

Reply via email to