Hi,
The attached patch enables setting of the internal Perl UTF-8 flag for
character content based on DB2CODEPAGE and LANG environment variables as
well as based on the content itself (is_high_bit_set).
The patched version of DBD::DB2 will behave consistently to DBD drivers
like DBD::Oracle or DBD::Pg when it comes to getting UTF-8 content out
of the database via the Perl DBI.
--michael
--
Michael Kröll
---------------------------------------
Three United Mobile Solutions ag
----------------------------------------
Hasnerstrasse 123
A-1160 Vienna (Austria)
mobile: +43 664 808 41 743
http://www.3united.com/
--- /tmp/dbdimp.c Sun Sep 19 16:07:07 2004
+++ dbdimp.c Wed Mar 15 15:11:12 2006
@@ -29,6 +29,12 @@
#define SQL_ATTR_QUERY_OPTIMIZATION_LEVEL 1293
#endif
+#ifdef WIN32
+ #define strcasecmp strcmpi
+#endif
+
+static int is_high_bit_set(char *val);
+
DBISTATE_DECLARE;
void dbd_init( dbistate_t *dbistate )
@@ -2492,6 +2498,13 @@
return imp_sth->RowCount;
}
+static int is_high_bit_set(val)
+ char *val;
+{
+ while (*val)
+ if (*val++ & 0x80) return 1;
+ return 0;
+}
AV *dbd_st_fetch( SV *sth,
imp_sth_t *imp_sth )
@@ -2505,6 +2518,8 @@
imp_fbh_t *fbh;
SV *sv;
int arraylen;
+ int env_is_utf8 = 0;
+ char *e;
/* Check that execute() was executed sucessfuly. This also implies */
/* that dbd_describe() executed sucessfuly so the memory buffers */
@@ -2576,6 +2591,15 @@
" dbd_st_fetch %d fields\n", num_fields );
ChopBlanks = DBIc_has( imp_sth, DBIcf_ChopBlanks );
+
+ /* Check if the content might be UTF8 based on the environment */
+ e = getenv("DB2CODEPAGE");
+ if (e && strlen(e) >= 4 && !strcmp(e + strlen(e) - 4, "1208"))
+ env_is_utf8 = 1;
+ e = getenv("LANG");
+ if (e && strlen(e) >= 4 && !strcasecmp(e + strlen(e) - 4, "utf8"))
+ env_is_utf8 = 1;
+
for( i = 0; i < num_fields; ++i )
{
fbh = &imp_sth->fbh[i];
@@ -2606,8 +2630,20 @@
if( fbh->rlen > -1 && /* normal case - column is not null */
fbh->bufferSize > 0 )
{
+
int nullAdj = SQL_C_CHAR == fbh->ftype ? 1 : 0;
+#ifdef is_utf8_string
+ /* if ( fbh->dbtype == SQL_VARCHAR || fbh->dbtype == SQL_CLOB ) */
+ if ( fbh->ftype == SQL_C_CHAR )
+ {
+ if (env_is_utf8 && is_high_bit_set((char*)fbh->buffer) &&
is_utf8_string((unsigned char*)fbh->buffer, fbh->rlen)) {
+ /* warn("Setting UTF8 flag on"); */
+ SvUTF8_on(sv);
+ }
+ }
+#endif
+
if( fbh->rlen > ( fbh->bufferSize - nullAdj ) ) /* data has been
truncated */
{
int longTruncOk = DBIc_has( imp_sth, DBIcf_LongTruncOk );