Committed by =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <[email protected]>

Subject: [DBD::Pg 1/8] Fix UTF-8 support in placeholders and return values

---
 Pg.xs         |  2 +-
 dbdimp.c      | 53 +++++++++++++++++++++++++++++++++++++++++++++-----
 dbdimp.h      |  4 +++-
 t/02attribs.t |  2 +-
 t/30unicode.t | 62 ++++++++++++++++++++++++++++++++++-------------------------
 5 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/Pg.xs b/Pg.xs
index edec91e..60f843b 100644
--- a/Pg.xs
+++ b/Pg.xs
@@ -224,7 +224,7 @@ quote(dbh, to_quote_sv, type_sv=Nullsv)
                if (SvROK(to_quote_sv) && !SvAMAGIC(to_quote_sv)) {
                        if (SvTYPE(SvRV(to_quote_sv)) != SVt_PVAV)
                                croak("Cannot quote a reference");
-                       to_quote_sv = pg_stringify_array(to_quote_sv, ",", 
imp_dbh->pg_server_version);
+                       to_quote_sv = pg_stringify_array(to_quote_sv, ",", 
imp_dbh->pg_server_version, imp_dbh->client_encoding_utf8);
                }
 
                /* Null is always returned as "NULL", so we can ignore any type 
given */
diff --git a/dbdimp.c b/dbdimp.c
index 476eae7..fed72bd 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -2446,7 +2446,7 @@ int dbd_bind_ph (SV * sth, imp_sth_t * imp_sth, SV * 
ph_name, SV * newvalue, IV
                }
                else if (SvTYPE(SvRV(newvalue)) == SVt_PVAV) {
                        SV * quotedval;
-                       quotedval = 
pg_stringify_array(newvalue,",",imp_dbh->pg_server_version);
+                       quotedval = 
pg_stringify_array(newvalue,",",imp_dbh->pg_server_version,imp_dbh->client_encoding_utf8);
                        currph->valuelen = sv_len(quotedval);
                        Renew(currph->value, currph->valuelen+1, char); /* 
freed in dbd_st_destroy */
                        Copy(SvUTF8(quotedval) ? SvPVutf8_nolen(quotedval) : 
SvPV_nolen(quotedval),
@@ -2544,6 +2544,8 @@ int dbd_bind_ph (SV * sth, imp_sth_t * imp_sth, SV * 
ph_name, SV * newvalue, IV
        (void)SvUPGRADE(newvalue, SVt_PV);
 
        if (SvOK(newvalue)) {
+               /* get the right encoding, without modifying the caller's copy 
*/
+               newvalue = pg_rightgraded_sv(aTHX_ newvalue, 
imp_dbh->client_encoding_utf8 && PG_BYTEA!=currph->bind_type->type_id);
                value_string = SvPV(newvalue, currph->valuelen);
                Renew(currph->value, currph->valuelen+1, char); /* freed in 
dbd_st_destroy */
                Copy(value_string, currph->value, currph->valuelen, char);
@@ -2582,7 +2584,7 @@ int dbd_bind_ph (SV * sth, imp_sth_t * imp_sth, SV * 
ph_name, SV * newvalue, IV
 
 
 /* ================================================================== */
-SV * pg_stringify_array(SV *input, const char * array_delim, int 
server_version) {
+SV * pg_stringify_array(SV *input, const char * array_delim, int 
server_version, bool utf8) {
 
        dTHX;
        AV * toparr;
@@ -2602,6 +2604,8 @@ SV * pg_stringify_array(SV *input, const char * 
array_delim, int server_version)
 
        toparr = (AV *) SvRV(input);
        value = newSVpv("{", 1);
+       if (utf8)
+           SvUTF8_on(value);
 
        /* Empty arrays are easy */
        if (av_len(toparr) < 0) {
@@ -2685,8 +2689,8 @@ SV * pg_stringify_array(SV *input, const char * 
array_delim, int server_version)
                                }
                                else {
                                        sv_catpv(value, "\"");
-                                       if (SvUTF8(svitem))
-                                               SvUTF8_on(value);
+                                       /* avoid up- or down-grading the 
caller's value */
+                                       svitem = pg_rightgraded_sv(aTHX_ 
svitem, utf8);
                                        string = SvPV(svitem, stringlength);
                                        while (stringlength--) {
                                                /* Escape backslashes and 
double-quotes. */
@@ -2872,6 +2876,41 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t 
*imp_dbh, unsigned char * input
 
 } /* end of pg_destringify_array */
 
+SV * pg_upgraded_sv(pTHX_ SV *input) {
+       U8 *p, *end;
+       STRLEN len;
+       /* SvPV() can change the value SvUTF8() (for overloaded values and tied 
values). */
+       p = (U8*)SvPV(input, len);
+       if(SvUTF8(input)) return input;
+       for(end = p + len; p != end; p++) {
+               if(*p & 0x80) {
+                       SV *output = sv_mortalcopy(input);
+                       sv_utf8_upgrade(output);
+                       return output;
+               }
+       }
+       return input;
+}
+
+SV * pg_downgraded_sv(pTHX_ SV *input) {
+       U8 *p, *end;
+       STRLEN len;
+       /* SvPV() can change the value SvUTF8() (for overloaded values and tied 
values). */
+       p = (U8*)SvPV(input, len);
+       if(!SvUTF8(input)) return input;
+       for(end = p + len; p != end; p++) {
+               if(*p & 0x80) {
+                       SV *output = sv_mortalcopy(input);
+                       sv_utf8_downgrade(output, DBDPG_FALSE);
+                       return output;
+               }
+       }
+       return input;
+}
+
+SV * pg_rightgraded_sv(pTHX_ SV *input, bool utf8) {
+       return utf8 ? pg_upgraded_sv(aTHX_ input) : pg_downgraded_sv(aTHX_ 
input);
+}
 
 /* ================================================================== */
 int pg_quickexec (SV * dbh, const char * sql, const int asyncflag)
@@ -3652,7 +3691,11 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
                                if (type_info && PG_BYTEA == 
type_info->type_id) {
                                        SvUTF8_off(sv);
                                }
-                               else {
+                               /*
+                                 Don't try to upgrade references (e.g. arrays).
+                                 pg_destringify_array() upgrades the items as 
appropriate.
+                               */
+                               else if (!SvROK(sv)) {
                                        SvUTF8_on(sv);
                                }
                        }
diff --git a/dbdimp.h b/dbdimp.h
index 350f1a3..abc082a 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -197,7 +197,9 @@ int pg_db_getfd (imp_dbh_t * imp_dbh);
 
 SV * pg_db_pg_notifies (SV *dbh, imp_dbh_t *imp_dbh);
 
-SV * pg_stringify_array(SV * input, const char * array_delim, int 
server_version);
+SV * pg_rightgraded_sv(pTHX_ SV *input, bool utf8);
+
+SV * pg_stringify_array(SV * input, const char * array_delim, int 
server_version, bool utf8);
 
 int pg_quickexec (SV *dbh, const char *sql, const int asyncflag);
 
diff --git a/t/02attribs.t b/t/02attribs.t
index e671fd5..5f43432 100644
--- a/t/02attribs.t
+++ b/t/02attribs.t
@@ -438,7 +438,7 @@ SKIP: {
        ok (Encode::is_utf8($name), $t);
 
        $t='Unicode (utf8) data returned from database is not corrupted';
-       is (length($name), 4, $t);
+       is ($name, $utf8_str, $t);
 
        $t='ASCII text returned from database does have utf8 bit set';
        $sth->finish();
diff --git a/t/30unicode.t b/t/30unicode.t
index 7c4da06..ad45fc5 100644
--- a/t/30unicode.t
+++ b/t/30unicode.t
@@ -23,41 +23,51 @@ if (! $dbh) {
 
 isnt ($dbh, undef, 'Connect to database for unicode testing');
 
-my $pgversion = $dbh->{pg_server_version};
-
 my $t;
 
-my $name = "\N{LATIN CAPITAL LETTER E WITH ACUTE}milie du Ch\N{LATIN SMALL 
LETTER A WITH CIRCUMFLEX}telet";
-
-my $SQL = 'SELECT ?::text';
-my $sth = $dbh->prepare($SQL);
-$sth->execute($name);
-my $result = $sth->fetchall_arrayref->[0][0];
-$t = 'Fetching UTF-8 string from the database returns proper string';
-is ($result, $name, $t);
-$t = 'Fetching UTF-8 string from the database returns string with UTF-8 flag 
on';
-ok (utf8::is_utf8($result), $t);
-
-$dbh->{pg_enable_utf8} = 0;
-$sth->execute($name);
-$result = $sth->fetchall_arrayref->[0][0];
-$t = 'Fetching UTF-8 string from the database returns proper string 
(pg_enable_utf8=0)';
-my $noutfname  = $name;
-Encode::_utf8_off($noutfname);
-is ($result, $noutfname, $t);
-$t = 'Fetching UTF-8 string from the database returns string with UTF-8 flag 
off (pg_enable_utf8=0)';
-ok (!utf8::is_utf8($result), $t);
+my $name_d = my $name_u = "\N{LATIN CAPITAL LETTER E WITH ACUTE}milie du 
Ch\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}telet";
+utf8::downgrade($name_d);
+utf8::upgrade($name_u);
+
+foreach (
+    [upgraded => text => $name_u],
+    [downgraded => text => $name_d],
+    [upgraded => 'text[]' => [$name_u]],
+    [downgraded => 'text[]' => [$name_d]],
+    [mixed => 'text[]' => [$name_d,$name_u]],
+) {
+    my ($state, $type, $value) = @$_;
+    $dbh->{pg_enable_utf8} = 1;
+
+    my $SQL = "SELECT ?::$type";
+    my $sth = $dbh->prepare($SQL);
+    $sth->execute($value);
+    my $result = $sth->fetchall_arrayref->[0][0];
+    $t = "Fetching $state UTF-8 $type from the database returns proper value";
+    is_deeply ($result, $value, $t);
+    $t = "Fetching $state UTF-8 $type from the database returns string with 
UTF-8 flag on";
+    ok (utf8::is_utf8($_), $t) for (ref $result ? @{$result} : $result);
+
+    $dbh->{pg_enable_utf8} = 0;
+    $sth->execute($value);
+    $result = $sth->fetchall_arrayref->[0][0];
+    $t = "Fetching $state UTF-8 $type from the database returns proper string 
(pg_enable_utf8=0)";
+    utf8::encode($_) for (ref $value ? @{$value} : $value);
+    is_deeply ($result, $value, $t);
+    $t = "Fetching $state UTF-8 $type from the database returns string with 
UTF-8 flag off (pg_enable_utf8=0)";
+    ok (!utf8::is_utf8($result), $t) for (ref $result ? @{$result} : $result);
+}
 
 $t = 'Generated string is not utf8';
-$name = 'Ada Lovelace';
+my $name = 'Ada Lovelace';
 utf8::encode($name);
 ok (!utf8::is_utf8($name), $t);
 
 $dbh->{pg_enable_utf8} = -1;
-$SQL = 'SELECT ?::text';
-$sth = $dbh->prepare($SQL);
+my $SQL = 'SELECT ?::text';
+my $sth = $dbh->prepare($SQL);
 $sth->execute($name);
-$result = $sth->fetchall_arrayref->[0][0];
+my $result = $sth->fetchall_arrayref->[0][0];
 $t = 'Fetching ASCII string from the database returns proper string';
 is ($result, $name, $t);
 $t = 'Fetching ASCII string from the database returns string with UTF-8 flag 
on';
-- 
1.8.4

Reply via email to