Hi,

The attached patch enables setting of the internal Perl UTF-8 flag for
character content based on DB2CODEPAGE and LANG environment variables as
well as based on the content itself (is_high_bit_set).
The patched version of DBD::DB2 will behave consistently to DBD drivers
like DBD::Oracle or DBD::Pg when it comes to getting UTF-8 content out
of the database via the Perl DBI.

--michael

--
Michael Kröll
---------------------------------------
Three United Mobile Solutions ag
----------------------------------------
Hasnerstrasse 123
A-1160 Vienna (Austria)
mobile: +43 664 808 41 743
http://www.3united.com/

--- /tmp/dbdimp.c       Sun Sep 19 16:07:07 2004
+++ dbdimp.c    Wed Mar 15 15:11:12 2006
@@ -29,6 +29,12 @@
  #define SQL_ATTR_QUERY_OPTIMIZATION_LEVEL 1293
 #endif
 
+#ifdef WIN32
+ #define strcasecmp strcmpi
+#endif
+
+static int is_high_bit_set(char *val);
+
 DBISTATE_DECLARE;
 
 void dbd_init( dbistate_t *dbistate )
@@ -2492,6 +2498,13 @@
     return imp_sth->RowCount;
 }
 
+static int is_high_bit_set(val)
+                char *val;
+{
+       while (*val)
+               if (*val++ & 0x80) return 1;
+       return 0;
+}
 
 AV *dbd_st_fetch( SV *sth,
                   imp_sth_t *imp_sth )
@@ -2505,6 +2518,8 @@
     imp_fbh_t *fbh;
     SV *sv;
     int arraylen;
+    int env_is_utf8 = 0;
+    char *e;
 
     /* Check that execute() was executed sucessfuly. This also implies    */
     /* that dbd_describe() executed sucessfuly so the memory buffers    */
@@ -2576,6 +2591,15 @@
                    "    dbd_st_fetch %d fields\n", num_fields );
 
     ChopBlanks = DBIc_has( imp_sth, DBIcf_ChopBlanks );
+
+    /* Check if the content might be UTF8 based on the environment */
+    e = getenv("DB2CODEPAGE");
+    if (e && strlen(e) >= 4 && !strcmp(e + strlen(e) - 4, "1208"))
+        env_is_utf8 = 1;
+    e = getenv("LANG");
+    if (e && strlen(e) >= 4 && !strcasecmp(e + strlen(e) - 4, "utf8"))
+         env_is_utf8 = 1;
+
     for( i = 0; i < num_fields; ++i )
     {
       fbh = &imp_sth->fbh[i];
@@ -2606,8 +2630,20 @@
       if( fbh->rlen > -1 &&      /* normal case - column is not null */
           fbh->bufferSize > 0 )
       {
+
         int nullAdj = SQL_C_CHAR == fbh->ftype ? 1 : 0;
 
+#ifdef is_utf8_string
+        /* if ( fbh->dbtype == SQL_VARCHAR || fbh->dbtype == SQL_CLOB ) */
+        if ( fbh->ftype == SQL_C_CHAR )
+        {
+            if (env_is_utf8 && is_high_bit_set((char*)fbh->buffer) && 
is_utf8_string((unsigned char*)fbh->buffer, fbh->rlen)) {
+                /* warn("Setting UTF8 flag on"); */
+                SvUTF8_on(sv);
+            }
+        }
+#endif
+
         if( fbh->rlen > ( fbh->bufferSize - nullAdj ) ) /* data has been 
truncated */
         {
           int longTruncOk = DBIc_has( imp_sth, DBIcf_LongTruncOk );

Reply via email to