Committed by =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <[email protected]>
Subject: [DBD::Pg 1/8] Fix UTF-8 support in placeholders and return values
---
Pg.xs | 2 +-
dbdimp.c | 53 +++++++++++++++++++++++++++++++++++++++++++++-----
dbdimp.h | 4 +++-
t/02attribs.t | 2 +-
t/30unicode.t | 62 ++++++++++++++++++++++++++++++++++-------------------------
5 files changed, 89 insertions(+), 34 deletions(-)
diff --git a/Pg.xs b/Pg.xs
index edec91e..60f843b 100644
--- a/Pg.xs
+++ b/Pg.xs
@@ -224,7 +224,7 @@ quote(dbh, to_quote_sv, type_sv=Nullsv)
if (SvROK(to_quote_sv) && !SvAMAGIC(to_quote_sv)) {
if (SvTYPE(SvRV(to_quote_sv)) != SVt_PVAV)
croak("Cannot quote a reference");
- to_quote_sv = pg_stringify_array(to_quote_sv, ",",
imp_dbh->pg_server_version);
+ to_quote_sv = pg_stringify_array(to_quote_sv, ",",
imp_dbh->pg_server_version, imp_dbh->client_encoding_utf8);
}
/* Null is always returned as "NULL", so we can ignore any type
given */
diff --git a/dbdimp.c b/dbdimp.c
index 476eae7..fed72bd 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -2446,7 +2446,7 @@ int dbd_bind_ph (SV * sth, imp_sth_t * imp_sth, SV *
ph_name, SV * newvalue, IV
}
else if (SvTYPE(SvRV(newvalue)) == SVt_PVAV) {
SV * quotedval;
- quotedval =
pg_stringify_array(newvalue,",",imp_dbh->pg_server_version);
+ quotedval =
pg_stringify_array(newvalue,",",imp_dbh->pg_server_version,imp_dbh->client_encoding_utf8);
currph->valuelen = sv_len(quotedval);
Renew(currph->value, currph->valuelen+1, char); /*
freed in dbd_st_destroy */
Copy(SvUTF8(quotedval) ? SvPVutf8_nolen(quotedval) :
SvPV_nolen(quotedval),
@@ -2544,6 +2544,8 @@ int dbd_bind_ph (SV * sth, imp_sth_t * imp_sth, SV *
ph_name, SV * newvalue, IV
(void)SvUPGRADE(newvalue, SVt_PV);
if (SvOK(newvalue)) {
+ /* get the right encoding, without modifying the caller's copy
*/
+ newvalue = pg_rightgraded_sv(aTHX_ newvalue,
imp_dbh->client_encoding_utf8 && PG_BYTEA!=currph->bind_type->type_id);
value_string = SvPV(newvalue, currph->valuelen);
Renew(currph->value, currph->valuelen+1, char); /* freed in
dbd_st_destroy */
Copy(value_string, currph->value, currph->valuelen, char);
@@ -2582,7 +2584,7 @@ int dbd_bind_ph (SV * sth, imp_sth_t * imp_sth, SV *
ph_name, SV * newvalue, IV
/* ================================================================== */
-SV * pg_stringify_array(SV *input, const char * array_delim, int
server_version) {
+SV * pg_stringify_array(SV *input, const char * array_delim, int
server_version, bool utf8) {
dTHX;
AV * toparr;
@@ -2602,6 +2604,8 @@ SV * pg_stringify_array(SV *input, const char *
array_delim, int server_version)
toparr = (AV *) SvRV(input);
value = newSVpv("{", 1);
+ if (utf8)
+ SvUTF8_on(value);
/* Empty arrays are easy */
if (av_len(toparr) < 0) {
@@ -2685,8 +2689,8 @@ SV * pg_stringify_array(SV *input, const char *
array_delim, int server_version)
}
else {
sv_catpv(value, "\"");
- if (SvUTF8(svitem))
- SvUTF8_on(value);
+ /* avoid up- or down-grading the
caller's value */
+ svitem = pg_rightgraded_sv(aTHX_
svitem, utf8);
string = SvPV(svitem, stringlength);
while (stringlength--) {
/* Escape backslashes and
double-quotes. */
@@ -2872,6 +2876,41 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t
*imp_dbh, unsigned char * input
} /* end of pg_destringify_array */
+SV * pg_upgraded_sv(pTHX_ SV *input) {
+ U8 *p, *end;
+ STRLEN len;
+ /* SvPV() can change the value SvUTF8() (for overloaded values and tied
values). */
+ p = (U8*)SvPV(input, len);
+ if(SvUTF8(input)) return input;
+ for(end = p + len; p != end; p++) {
+ if(*p & 0x80) {
+ SV *output = sv_mortalcopy(input);
+ sv_utf8_upgrade(output);
+ return output;
+ }
+ }
+ return input;
+}
+
+SV * pg_downgraded_sv(pTHX_ SV *input) {
+ U8 *p, *end;
+ STRLEN len;
+ /* SvPV() can change the value SvUTF8() (for overloaded values and tied
values). */
+ p = (U8*)SvPV(input, len);
+ if(!SvUTF8(input)) return input;
+ for(end = p + len; p != end; p++) {
+ if(*p & 0x80) {
+ SV *output = sv_mortalcopy(input);
+ sv_utf8_downgrade(output, DBDPG_FALSE);
+ return output;
+ }
+ }
+ return input;
+}
+
+SV * pg_rightgraded_sv(pTHX_ SV *input, bool utf8) {
+ return utf8 ? pg_upgraded_sv(aTHX_ input) : pg_downgraded_sv(aTHX_
input);
+}
/* ================================================================== */
int pg_quickexec (SV * dbh, const char * sql, const int asyncflag)
@@ -3652,7 +3691,11 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
if (type_info && PG_BYTEA ==
type_info->type_id) {
SvUTF8_off(sv);
}
- else {
+ /*
+ Don't try to upgrade references (e.g. arrays).
+ pg_destringify_array() upgrades the items as
appropriate.
+ */
+ else if (!SvROK(sv)) {
SvUTF8_on(sv);
}
}
diff --git a/dbdimp.h b/dbdimp.h
index 350f1a3..abc082a 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -197,7 +197,9 @@ int pg_db_getfd (imp_dbh_t * imp_dbh);
SV * pg_db_pg_notifies (SV *dbh, imp_dbh_t *imp_dbh);
-SV * pg_stringify_array(SV * input, const char * array_delim, int
server_version);
+SV * pg_rightgraded_sv(pTHX_ SV *input, bool utf8);
+
+SV * pg_stringify_array(SV * input, const char * array_delim, int
server_version, bool utf8);
int pg_quickexec (SV *dbh, const char *sql, const int asyncflag);
diff --git a/t/02attribs.t b/t/02attribs.t
index e671fd5..5f43432 100644
--- a/t/02attribs.t
+++ b/t/02attribs.t
@@ -438,7 +438,7 @@ SKIP: {
ok (Encode::is_utf8($name), $t);
$t='Unicode (utf8) data returned from database is not corrupted';
- is (length($name), 4, $t);
+ is ($name, $utf8_str, $t);
$t='ASCII text returned from database does have utf8 bit set';
$sth->finish();
diff --git a/t/30unicode.t b/t/30unicode.t
index 7c4da06..ad45fc5 100644
--- a/t/30unicode.t
+++ b/t/30unicode.t
@@ -23,41 +23,51 @@ if (! $dbh) {
isnt ($dbh, undef, 'Connect to database for unicode testing');
-my $pgversion = $dbh->{pg_server_version};
-
my $t;
-my $name = "\N{LATIN CAPITAL LETTER E WITH ACUTE}milie du Ch\N{LATIN SMALL
LETTER A WITH CIRCUMFLEX}telet";
-
-my $SQL = 'SELECT ?::text';
-my $sth = $dbh->prepare($SQL);
-$sth->execute($name);
-my $result = $sth->fetchall_arrayref->[0][0];
-$t = 'Fetching UTF-8 string from the database returns proper string';
-is ($result, $name, $t);
-$t = 'Fetching UTF-8 string from the database returns string with UTF-8 flag
on';
-ok (utf8::is_utf8($result), $t);
-
-$dbh->{pg_enable_utf8} = 0;
-$sth->execute($name);
-$result = $sth->fetchall_arrayref->[0][0];
-$t = 'Fetching UTF-8 string from the database returns proper string
(pg_enable_utf8=0)';
-my $noutfname = $name;
-Encode::_utf8_off($noutfname);
-is ($result, $noutfname, $t);
-$t = 'Fetching UTF-8 string from the database returns string with UTF-8 flag
off (pg_enable_utf8=0)';
-ok (!utf8::is_utf8($result), $t);
+my $name_d = my $name_u = "\N{LATIN CAPITAL LETTER E WITH ACUTE}milie du
Ch\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}telet";
+utf8::downgrade($name_d);
+utf8::upgrade($name_u);
+
+foreach (
+ [upgraded => text => $name_u],
+ [downgraded => text => $name_d],
+ [upgraded => 'text[]' => [$name_u]],
+ [downgraded => 'text[]' => [$name_d]],
+ [mixed => 'text[]' => [$name_d,$name_u]],
+) {
+ my ($state, $type, $value) = @$_;
+ $dbh->{pg_enable_utf8} = 1;
+
+ my $SQL = "SELECT ?::$type";
+ my $sth = $dbh->prepare($SQL);
+ $sth->execute($value);
+ my $result = $sth->fetchall_arrayref->[0][0];
+ $t = "Fetching $state UTF-8 $type from the database returns proper value";
+ is_deeply ($result, $value, $t);
+ $t = "Fetching $state UTF-8 $type from the database returns string with
UTF-8 flag on";
+ ok (utf8::is_utf8($_), $t) for (ref $result ? @{$result} : $result);
+
+ $dbh->{pg_enable_utf8} = 0;
+ $sth->execute($value);
+ $result = $sth->fetchall_arrayref->[0][0];
+ $t = "Fetching $state UTF-8 $type from the database returns proper string
(pg_enable_utf8=0)";
+ utf8::encode($_) for (ref $value ? @{$value} : $value);
+ is_deeply ($result, $value, $t);
+ $t = "Fetching $state UTF-8 $type from the database returns string with
UTF-8 flag off (pg_enable_utf8=0)";
+ ok (!utf8::is_utf8($result), $t) for (ref $result ? @{$result} : $result);
+}
$t = 'Generated string is not utf8';
-$name = 'Ada Lovelace';
+my $name = 'Ada Lovelace';
utf8::encode($name);
ok (!utf8::is_utf8($name), $t);
$dbh->{pg_enable_utf8} = -1;
-$SQL = 'SELECT ?::text';
-$sth = $dbh->prepare($SQL);
+my $SQL = 'SELECT ?::text';
+my $sth = $dbh->prepare($SQL);
$sth->execute($name);
-$result = $sth->fetchall_arrayref->[0][0];
+my $result = $sth->fetchall_arrayref->[0][0];
$t = 'Fetching ASCII string from the database returns proper string';
is ($result, $name, $t);
$t = 'Fetching ASCII string from the database returns string with UTF-8 flag
on';
--
1.8.4