Committed by Greg Sabino Mullane <[email protected]>
First pass at new UTF-8 support.
Deprecate pg_enable_utf8 completely.
Use the new pg_utf8_strings.
Defaults that to on in all cases except when the server_encoding is SQL_ASCII
---
Pg.pm | 16 ++++++--
TODO | 2 -
dbdimp.c | 109 ++++++++++++++++++++------------------------------------
dbdimp.h | 5 +--
t/02attribs.t | 8 ++--
t/09arrays.t | 5 +--
testme.tmp.pl | 6 ++--
7 files changed, 62 insertions(+), 89 deletions(-)
diff --git a/Pg.pm b/Pg.pm
index ac4d9ba..f6afbeb 100644
--- a/Pg.pm
+++ b/Pg.pm
@@ -1626,6 +1626,7 @@ use 5.006001;
pg_bool_tf => undef,
pg_db => undef,
pg_default_port => undef,
+ pg_utf8_strings => undef,
pg_enable_utf8 => undef,
pg_errorlevel => undef,
pg_expand_array => undef,
@@ -3122,12 +3123,19 @@ DBD::Pg specific attribute. Defaults to false. When
true, question marks inside
are not treated as L<placeholders|/Placeholders>. Useful for statements that
contain unquoted question
marks, such as geometric operators.
+=head3 B<pg_utf8_strings> (boolean)
+
+DBD::Pg specific attribute. In normal use, this should not be needed, as it
will be set
+automatically according to the server encoding. SQL_ASCII will set this to
false, while
+everything else will set it to true. If you force it off, then everything will
be returned
+as byte soup, even data from UTF-8 databases, which is very likely not what
you want. If
+you force it on for SQL_ASCII databases, the results will be unpredictable. It
is recommended
+that you only use this attribute as a last resort and with a full
understanding of what
+it does.
+
=head3 B<pg_enable_utf8> (boolean)
-DBD::Pg specific attribute. If true, then the C<utf8> flag will be turned on
-for returned character data (if the data is valid UTF-8). For details about
-the C<utf8> flag, see the C<Encode> module. This attribute is only relevant
under
-perl 5.8 and later.
+Deprecated, please us pg_utf8_strings instead.
=head3 B<pg_errorlevel> (integer)
diff --git a/TODO b/TODO
index 1afec40..7ace7e1 100644
--- a/TODO
+++ b/TODO
@@ -19,8 +19,6 @@ http://rt.cpan.org/Public/Dist/Display.html?Name=DBD-Pg
supported as a server encoding (e.g. BIG5)
- Support passing hashrefs in and out for custom types.
- Support a flag for behind-the-scenes CURSOR to emulate partial fetches.
-- Handle unicode conversion better and perhaps eliminate the need for
- the pg_enable_utf8 attribute.
- Fix this: http://nntp.x.perl.org/group/perl.cpan.testers/2698430
- Composite type support:
http://www.postgresql.org/docs/current/interactive/rowtypes.html
- Full support for execute_array, e.g. the return values
diff --git a/dbdimp.c b/dbdimp.c
index 09e7b8d..81a697e 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -85,7 +85,6 @@ static ExecStatusType _sqlstate(pTHX_ imp_dbh_t *imp_dbh,
PGresult *result);
static int pg_db_rollback_commit (pTHX_ SV *dbh, imp_dbh_t *imp_dbh, int
action);
static void pg_st_split_statement (pTHX_ imp_sth_t *imp_sth, int version, char
*statement);
static int pg_st_prepare_statement (pTHX_ SV *sth, imp_sth_t *imp_sth);
-static int is_high_bit_set(pTHX_ const unsigned char *val, STRLEN size);
static int pg_st_deallocate_statement(pTHX_ SV *sth, imp_sth_t *imp_sth);
static PGTransactionStatusType pg_db_txn_status (pTHX_ imp_dbh_t *imp_dbh);
static int pg_db_start_txn (pTHX_ SV *dbh, imp_dbh_t *imp_dbh);
@@ -214,31 +213,28 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
TRACE_PQPROTOCOLVERSION;
imp_dbh->pg_protocol = PQprotocolVersion(imp_dbh->conn);
- /* Grab the server encoding so we can set out utf8 flags intelligently
*/
- imp_dbh->server_encoding = PQparameterStatus(imp_dbh->conn,
"server_encoding");
-
- /* Check the value of the pg_enable_utf8 attribute. Default to not
there or -1 */
+ /* Check the value of the pg_utf8_strings attribute. Default to not set
(-1) */
utf8int = -1;
- DBD_ATTRIB_GET_IV(attr, "pg_enable_utf8", 14, svp, utf8int);
+ DBD_ATTRIB_GET_IV(attr, "pg_utf8_strings", 15, svp, utf8int);
/*
We need to see if we are treating things with utf8 respect, or as
byte soup
The rules are:
- pg_enable_utf8 trumps everything else
- SQL_ASCII is always byte soup
- Everything else is not
+ - pg_utf8_strings trumps everything else
+ - SQL_ASCII is always byte soup
+ - Everything else is not
*/
if (utf8int > 1) { /* Force it on, no matter what */
- imp_dbh->utf8 = 1;
+ imp_dbh->utf8_strings = DBDPG_TRUE;
}
else {
if (utf8int == 0) { /* Force it off, no matter what */
- imp_dbh->utf8 = 0;
+ imp_dbh->utf8_strings = DBDPG_FALSE;
}
- else {
- imp_dbh->utf8 =
- (0 == strncmp(imp_dbh->server_encoding,
"SQL_ASCII", 9))
- ? 0 : 1;
+ else { /* Neither is set, so use the server_encoding */
+ imp_dbh->utf8_strings =
+ (0 == strncmp(PQparameterStatus(imp_dbh->conn,
"server_encoding"), "SQL_ASCII", 9))
+ ? DBDPG_FALSE : DBDPG_TRUE;
}
}
@@ -262,7 +258,6 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
/* Set all the defaults for this database handle */
imp_dbh->pg_bool_tf = DBDPG_FALSE;
- imp_dbh->pg_enable_utf8 = DBDPG_FALSE;
imp_dbh->prepare_now = DBDPG_FALSE;
imp_dbh->done_begin = DBDPG_FALSE;
imp_dbh->dollaronly = DBDPG_FALSE;
@@ -275,6 +270,10 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
imp_dbh->async_status = 0;
imp_dbh->async_sth = NULL;
+ /* Deprecated: */
+ imp_dbh->pg_enable_utf8 = DBDPG_FALSE;
+
+
/* If using server version 7.4, switch to "smart" */
imp_dbh->server_prepare = PGLIBVERSION >= 80000 ? 1 : 2;
@@ -315,10 +314,8 @@ static void pg_error (pTHX_ SV * h, int error_num, const
char * error_msg)
sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
/* Set as utf-8 */
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8)
+ if (imp_dbh->utf8_strings)
SvUTF8_on(DBIc_ERRSTR(imp_xxh));
-#endif
if (TEND) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER);
@@ -777,13 +774,11 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv)
retsv = newSViv((IV) PGLIBVERSION );
else if (strEQ("pg_prepare_now", key))
retsv = newSViv((IV)imp_dbh->prepare_now);
-#ifdef is_utf8_string
else if (strEQ("pg_enable_utf8", key))
retsv = newSViv((IV)imp_dbh->pg_enable_utf8);
-#endif
break;
- case 15: /* pg_default_port pg_async_status pg_expand_array */
+ case 15: /* pg_default_port pg_async_status pg_expand_array
pg_utf8_strings */
if (strEQ("pg_default_port", key))
retsv = newSViv((IV) PGDEFPORT );
@@ -791,6 +786,8 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh, SV
* keysv)
retsv = newSViv((IV)imp_dbh->async_status);
else if (strEQ("pg_expand_array", key))
retsv = newSViv((IV)imp_dbh->expand_array);
+ else if (strEQ("pg_utf8_strings", key))
+ retsv = newSViv((IV)imp_dbh->utf8_strings);
break;
case 17: /* pg_server_prepare pg_server_version */
@@ -896,20 +893,25 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv, SV * valuesv
retval = 1;
}
-#ifdef is_utf8_string
else if (strEQ("pg_enable_utf8", key)) {
imp_dbh->pg_enable_utf8 = newval!=0 ? DBDPG_TRUE :
DBDPG_FALSE;
retval = 1;
}
-#endif
+
break;
- case 15: /* pg_expand_array */
+ case 15: /* pg_expand_array pg_utf8_strings */
if (strEQ("pg_expand_array", key)) {
imp_dbh->expand_array = newval ? DBDPG_TRUE :
DBDPG_FALSE;
retval = 1;
}
+
+ else if (strEQ("pg_utf8_strings", key)) {
+ imp_dbh->utf8_strings = newval!=0 ? DBDPG_TRUE :
DBDPG_FALSE;
+ retval = 1;
+ }
+
break;
case 17: /* pg_server_prepare */
@@ -1107,20 +1109,20 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t *
imp_sth, SV * keysv)
AV *av = newAV();
char *fieldname;
SV * sv_fieldname;
+ D_imp_dbh_from_sth;
retsv = newRV_inc(sv_2mortal((SV*)av));
while(--fields >= 0) {
TRACE_PQFNAME;
fieldname = PQfname(imp_sth->result, fields);
sv_fieldname = newSVpv(fieldname,0);
-#ifdef is_utf8_string
- if (is_high_bit_set(aTHX_ (unsigned char
*)fieldname, strlen(fieldname)) && is_utf8_string((unsigned char *)fieldname,
strlen(fieldname)))
+ if (imp_dbh->utf8_strings)
SvUTF8_on(sv_fieldname);
-#endif
(void)av_store(av, fields, sv_fieldname);
}
}
else if (strEQ("TYPE", key)) {
/* Need to convert the Pg type to ANSI/SQL type. */
+ /* None of this should ever be non-ASCII, so don't
worry about utf8 here */
sql_type_info_t * type_info;
AV *av = newAV();
retsv = newRV_inc(sv_2mortal((SV*)av));
@@ -2680,16 +2682,9 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t
*imp_dbh, unsigned char * input
av_push(currentav, newSViv('t' ==
*string ? 1 : 0));
else {
SV *sv = newSVpvn(string, section_size);
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8) {
- SvUTF8_off(sv);
- if (is_high_bit_set(aTHX_
(unsigned char *)string, section_size) && is_utf8_string((unsigned
char*)string, section_size)) {
- SvUTF8_on(sv);
- }
- }
-#endif
+ if (imp_dbh->utf8_strings)
+ SvUTF8_on(sv);
av_push(currentav, sv);
-
}
}
section_size = 0;
@@ -3362,16 +3357,6 @@ int dbd_st_execute (SV * sth, imp_sth_t * imp_sth)
/* ================================================================== */
-static int is_high_bit_set(pTHX_ const unsigned char * val, STRLEN size)
-{
- if (TSTART) TRC(DBILOGFP, "%sBegin is_high_bit_set\n", THEADER);
- while (*val && size--)
- if (*val++ & 0x80) return 1;
- return 0;
-}
-
-
-/* ================================================================== */
AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
{
dTHX;
@@ -3470,11 +3455,16 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
break;
default:
sv_setpvn(sv, (char *)value,
value_len);
+ if (imp_dbh->utf8_strings)
+ SvUTF8_on(sv);
}
}
else {
value_len = strlen((char *)value);
sv_setpvn(sv, (char *)value, value_len);
+ /* Check for specific types here? */
+ if (imp_dbh->utf8_strings)
+ SvUTF8_on(sv);
}
if (type_info && (PG_BPCHAR ==
type_info->type_id) && chopblanks) {
@@ -3488,23 +3478,6 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
}
}
}
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8 && type_info) {
- SvUTF8_off(sv);
- switch (type_info->type_id) {
- case PG_CHAR:
- case PG_TEXT:
- case PG_BPCHAR:
- case PG_VARCHAR:
- if (is_high_bit_set(aTHX_ value,
value_len) && is_utf8_string((unsigned char*)value, value_len)) {
- SvUTF8_on(sv);
- }
- break;
- default:
- break;
- }
- }
-#endif
}
}
@@ -3520,10 +3493,8 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
*/
const char * const s = SvPV(AvARRAY(av)[i],len);
sv_setpvn(currph->inout, s, len);
- if (SvUTF8(AvARRAY(av)[i]))
+ if (imp_dbh->utf8_strings)
SvUTF8_on(currph->inout);
- else
- SvUTF8_off(currph->inout);
}
}
}
@@ -3868,10 +3839,8 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int
async)
if (copystatus > 0) {
sv_setpv(dataline, tempbuf);
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8)
+ if (imp_dbh->utf8_strings)
SvUTF8_on(dataline);
-#endif
TRACE_PQFREEMEM;
PQfreemem(tempbuf);
}
diff --git a/dbdimp.h b/dbdimp.h
index 0dd02fe..d665239 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -31,11 +31,10 @@ struct imp_dbh_st {
AV *savepoints; /* list of savepoints */
PGconn *conn; /* connection structure */
char *sqlstate; /* from the last result */
- const char *server_encoding; /* encoding detected at login */
- int utf8;
bool pg_bool_tf; /* do bools return 't'/'f'? Set by user,
default is 0 */
- bool pg_enable_utf8; /* should we attempt to make utf8 strings?
Set by user, default is 0 */
+ bool utf8_strings; /* so we set the utf8 flag on data from the
database? */
+ bool pg_enable_utf8; /* (DEPRECATED) should we attempt to make
utf8 strings? Set by user, default is 0 */
bool prepare_now; /* force immediate prepares, even with
placeholders. Set by user, default is 0 */
bool done_begin; /* have we done a begin? (e.g. are we in a
transaction?) */
bool dollaronly; /* only consider $1, $2 ... as valid
placeholders */
diff --git a/t/02attribs.t b/t/02attribs.t
index e98add9..5445ad0 100644
--- a/t/02attribs.t
+++ b/t/02attribs.t
@@ -55,7 +55,7 @@ d pg_options
d pg_socket
d pg_pid
d pg_standard_conforming strings
-d pg_enable_utf8
+d pg_utf8_strings
d Warn
d pg_prepare_now - tested in 03smethod.t
@@ -420,7 +420,7 @@ SKIP: {
$SQL = 'SELECT id, pname FROM dbd_pg_test WHERE id = ?';
$sth = $dbh->prepare($SQL);
$sth->execute(1);
- local $dbh->{pg_enable_utf8} = 1;
+ ## local $dbh->{pg_utf8_strings} = 1;
$t='Quote method returns correct utf-8 characters';
my $utf8_str = chr(0x100).'dam'; # LATIN CAPITAL LETTER A WITH MACRON
@@ -438,11 +438,11 @@ SKIP: {
$t='Unicode (utf8) data returned from database is not corrupted';
is (length($name), 4, $t);
- $t='ASCII text returned from database does have utf8 bit set';
+ $t='ASCII text returned from database *does* have utf8 bit set';
$sth->finish();
$sth->execute(1);
my ($id2, $name2) = $sth->fetchrow_array();
- ok (!Encode::is_utf8($name2), $t);
+ ok (Encode::is_utf8($name2), $t);
$sth->finish();
}
diff --git a/t/09arrays.t b/t/09arrays.t
index d004c34..03a9e1a 100644
--- a/t/09arrays.t
+++ b/t/09arrays.t
@@ -569,7 +569,6 @@ SKIP: {
if $server_encoding ne 'UTF8';
$t='String should be UTF-8';
- local $dbh->{pg_enable_utf8} = 1;
my $utf8_str = chr(0x100).'dam'; # LATIN CAPITAL LETTER A WITH MACRON
ok (Encode::is_utf8( $utf8_str ), $t);
@@ -634,8 +633,8 @@ SKIP: {
$expected = [1,['Bob',$utf8_str],'one'];
is_deeply ($result, $expected, $t);
- $t='Selected ASCII string should not be UTF-8';
- ok (!Encode::is_utf8( $result->[1][0] ), $t);
+ $t='Selected ASCII string should be UTF-8';
+ ok (Encode::is_utf8( $result->[1][0] ), $t);
$t='Selected string should be UTF-8';
ok (Encode::is_utf8( $result->[1][1] ), $t);
diff --git a/testme.tmp.pl b/testme.tmp.pl
index 15f8e5d..713f5f8 100755
--- a/testme.tmp.pl
+++ b/testme.tmp.pl
@@ -18,15 +18,15 @@ use vars qw/$sth $info $count $SQL/;
my $tracelevel = shift || 0;
$ENV{DBI_TRACE} = $tracelevel;
-my $DSN = 'DBI:Pg:dbname=postgres';
+my $dbname = 'latin';
+my $DSN = "DBI:Pg:dbname=$dbname";
+
my $dbh = DBI->connect($DSN, '', '',
{AutoCommit=>0,RaiseError=>1,PrintError=>0})
or die "Connection failed!\n";
my $me = $dbh->{Driver}{Name};
print "DBI is version $DBI::VERSION, I am $me, version of DBD::Pg is
$DBD::Pg::VERSION\n";
-memory_leak_test_bug_65734();
-
exit;
sub memory_leak_test_bug_65734 {
--
1.7.0.5