Committed by Greg Sabino Mullane <[email protected]>
Another stab at the UTF-8 system, this time simplified as much as possible.
See the pod for pg_enable_utf8 for an explanation.
Note that this commit will probably be picked out later, as we
want to release a new minor version before releasing such a big
change
---
Pg.pm | 25 ++++++++++++++---
dbdimp.c | 88 +++++++++++++++++++++++++++++++++++--------------------------
dbdimp.h | 6 +++-
3 files changed, 75 insertions(+), 44 deletions(-)
diff --git a/Pg.pm b/Pg.pm
index f04b1b9..7faf481 100644
--- a/Pg.pm
+++ b/Pg.pm
@@ -1633,6 +1633,7 @@ use 5.006001;
pg_db => undef,
pg_default_port => undef,
pg_enable_utf8 => undef,
+ pg_utf8_flag => undef,
pg_errorlevel => undef,
pg_expand_array => undef,
pg_host => undef,
@@ -3128,12 +3129,26 @@ DBD::Pg specific attribute. Defaults to false. When
true, question marks inside
are not treated as L<placeholders|/Placeholders>. Useful for statements that
contain unquoted question
marks, such as geometric operators.
-=head3 B<pg_enable_utf8> (boolean)
+=head3 B<pg_enable_utf8> (integer)
-DBD::Pg specific attribute. If true, then the C<utf8> flag will be turned on
-for returned character data (if the data is valid UTF-8). For details about
-the C<utf8> flag, see the C<Encode> module. This attribute is only relevant
under
-perl 5.8 and later.
+DBD::Pg specific attribute. The behavior of DBD::Pg with regards to this flag
has
+changed as of version xxx. The default value for this attribute, -1, indicates
+that the internal C<utf8> flag will be turned on for all strings coming back
+from the database if the client_encoding is set to 'UTF8'. Use of this default
+is highly encouraged, and you should not need to use this attribute except
+for the following two conditions:
+
+If this attribute is set to 0, then the internal C<utf8> flag will *never* be
+turned on for returned data, regardless of the current client_encoding.
+
+If this attribute is set to -1, then the internal C<utf8> flag will *always*
+be turned on for returned data, regardless of the current client_encoding
+(with the exception of bytea data).
+
+Note that the value of client_encoding is only checked on connection time. If
+you change the client_encoding to/from 'UTF8' after connecting, you can set
+pg_enable_utf8 to -1 to force DBD::Pg to read in the new client_encoding and
+act accordingly.
=head3 B<pg_errorlevel> (integer)
diff --git a/dbdimp.c b/dbdimp.c
index 494033f..1f293e6 100644
--- a/dbdimp.c
+++ b/dbdimp.c
@@ -224,8 +224,15 @@ int dbd_db_login6 (SV * dbh, imp_dbh_t * imp_dbh, char *
dbname, char * uid, cha
}
}
- imp_dbh->pg_bool_tf = DBDPG_FALSE;
- imp_dbh->pg_enable_utf8 = DBDPG_FALSE;
+ imp_dbh->client_encoding_utf8 =
+ (0 == strncmp(PQparameterStatus(imp_dbh->conn,
"client_encoding"), "UTF8", 4))
+ ? DBDPG_TRUE : DBDPG_FALSE;
+
+ /* If the client_encoding is UTF8, flip the utf8 flag until convinced
otherwise */
+ imp_dbh->pg_utf8_flag = imp_dbh->client_encoding_utf8;
+
+ imp_dbh->pg_enable_utf8 = -1;
+
imp_dbh->prepare_now = DBDPG_FALSE;
imp_dbh->done_begin = DBDPG_FALSE;
imp_dbh->dollaronly = DBDPG_FALSE;
@@ -278,10 +285,8 @@ static void pg_error (pTHX_ SV * h, int error_num, const
char * error_msg)
sv_setpv(DBIc_STATE(imp_xxh), (char*)imp_dbh->sqlstate);
/* Set as utf-8 */
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8)
+ if (imp_dbh->pg_utf8_flag)
SvUTF8_on(DBIc_ERRSTR(imp_xxh));
-#endif
if (TEND_slow) TRC(DBILOGFP, "%sEnd pg_error\n", THEADER_slow);
@@ -725,10 +730,12 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv)
retsv = newSViv((IV)imp_dbh->pg_protocol);
break;
- case 12: /* pg_INV_WRITE */
+ case 12: /* pg_INV_WRITE pg_utf8_flag */
if (strEQ("pg_INV_WRITE", key))
retsv = newSViv((IV) INV_WRITE );
+ else if (strEQ("pg_utf8_flag", key))
+ retsv = newSViv((IV)imp_dbh->pg_utf8_flag);
break;
case 13: /* pg_errorlevel */
@@ -743,10 +750,8 @@ SV * dbd_db_FETCH_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv)
retsv = newSViv((IV) PGLIBVERSION );
else if (strEQ("pg_prepare_now", key))
retsv = newSViv((IV)imp_dbh->prepare_now);
-#ifdef is_utf8_string
else if (strEQ("pg_enable_utf8", key))
retsv = newSViv((IV)imp_dbh->pg_enable_utf8);
-#endif
break;
case 15: /* pg_default_port pg_async_status pg_expand_array */
@@ -865,12 +870,33 @@ int dbd_db_STORE_attrib (SV * dbh, imp_dbh_t * imp_dbh,
SV * keysv, SV * valuesv
retval = 1;
}
-#ifdef is_utf8_string
+ /*
+ We don't want to check the client_encoding every single time
we talk to the database,
+ so we only do it here, which allows people to signal DBD::Pg
that something
+ may have changed, so could you please rescan client_encoding?
+ */
else if (strEQ("pg_enable_utf8", key)) {
- imp_dbh->pg_enable_utf8 = newval!=0 ? DBDPG_TRUE :
DBDPG_FALSE;
+ /* Technically, we only allow -1, 0, and 1 */
+ imp_dbh->pg_enable_utf8 = newval;
+
+ /* Never use the utf8 flag, no matter what */
+ if (0 == imp_dbh->pg_enable_utf8) {
+ imp_dbh->pg_utf8_flag = DBDPG_FALSE;
+ }
+ /* Always use the flag, no matter what */
+ else if (1 == imp_dbh->pg_enable_utf8) {
+ imp_dbh->pg_utf8_flag = DBDPG_TRUE;
+ }
+ /* Do The Right Thing */
+ else {
+ imp_dbh->client_encoding_utf8 =
+ (0 ==
strncmp(PQparameterStatus(imp_dbh->conn, "client_encoding"), "UTF8", 4))
+ ? DBDPG_TRUE : DBDPG_FALSE;
+ imp_dbh->pg_enable_utf8 = -1;
+ imp_dbh->pg_utf8_flag =
imp_dbh->client_encoding_utf8;
+ }
retval = 1;
}
-#endif
break;
case 15: /* pg_expand_array */
@@ -1084,10 +1110,8 @@ SV * dbd_st_FETCH_attrib (SV * sth, imp_sth_t * imp_sth,
SV * keysv)
TRACE_PQFNAME;
fieldname = PQfname(imp_sth->result, fields);
sv_fieldname = newSVpv(fieldname,0);
-#ifdef is_utf8_string
if (is_high_bit_set(aTHX_ (unsigned char
*)fieldname, strlen(fieldname)) && is_utf8_string((unsigned char *)fieldname,
strlen(fieldname)))
SvUTF8_on(sv_fieldname);
-#endif
(void)av_store(av, fields, sv_fieldname);
}
}
@@ -2677,14 +2701,9 @@ static SV * pg_destringify_array(pTHX_ imp_dbh_t
*imp_dbh, unsigned char * input
av_push(currentav, newSViv('t' ==
*string ? 1 : 0));
else {
SV *sv = newSVpvn(string, section_size);
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8) {
- SvUTF8_off(sv);
- if (is_high_bit_set(aTHX_
(unsigned char *)string, section_size) && is_utf8_string((unsigned
char*)string, section_size)) {
- SvUTF8_on(sv);
- }
+ if (imp_dbh->pg_utf8_flag) {
+ SvUTF8_on(sv);
}
-#endif
av_push(currentav, sv);
}
@@ -3490,23 +3509,18 @@ AV * dbd_st_fetch (SV * sth, imp_sth_t * imp_sth)
}
}
}
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8 && type_info) {
- SvUTF8_off(sv);
- switch (type_info->type_id) {
- case PG_CHAR:
- case PG_TEXT:
- case PG_BPCHAR:
- case PG_VARCHAR:
- if (is_high_bit_set(aTHX_ value,
value_len) && is_utf8_string((unsigned char*)value, value_len)) {
- SvUTF8_on(sv);
- }
- break;
- default:
- break;
+ if (imp_dbh->pg_utf8_flag) {
+ /*
+ The only exception to our rule about setting
utf8 if the client_encoding
+ is set to UTF8 is bytea.
+ */
+ if (type_info && PG_BYTEA ==
type_info->type_id) {
+ SvUTF8_off(sv);
+ }
+ else {
+ SvUTF8_on(sv);
}
}
-#endif
}
}
@@ -3870,10 +3884,8 @@ int pg_db_getcopydata (SV * dbh, SV * dataline, int
async)
if (copystatus > 0) {
sv_setpv(dataline, tempbuf);
-#ifdef is_utf8_string
- if (imp_dbh->pg_enable_utf8)
+ if (imp_dbh->pg_utf8_flag)
SvUTF8_on(dataline);
-#endif
TRACE_PQFREEMEM;
PQfreemem(tempbuf);
}
diff --git a/dbdimp.h b/dbdimp.h
index 1310e16..6058ff1 100644
--- a/dbdimp.h
+++ b/dbdimp.h
@@ -30,13 +30,17 @@ struct imp_dbh_st {
PGconn *conn; /* connection structure */
char *sqlstate; /* from the last result */
+
bool pg_bool_tf; /* do bools return 't'/'f'? Set by user,
default is 0 */
- bool pg_enable_utf8; /* should we attempt to make utf8 strings?
Set by user, default is 0 */
bool prepare_now; /* force immediate prepares, even with
placeholders. Set by user, default is 0 */
bool done_begin; /* have we done a begin? (e.g. are we in a
transaction?) */
bool dollaronly; /* only consider $1, $2 ... as valid
placeholders */
bool expand_array; /* transform arrays from the db into Perl
arrays? Default is 1 */
bool txn_read_only; /* are we in read-only mode? Set with
$dbh->{ReadOnly} */
+
+ int pg_enable_utf8; /* legacy utf8 flag: force utf8 flag on or
off, regardless of client_encoding */
+ bool pg_utf8_flag; /* are we currently flipping the utf8 flag
on? */
+ bool client_encoding_utf8; /* is the client_encoding utf8 last we
checked? */
};
--
1.7.1