Author: byterock
Date: Thu Oct  2 11:51:29 2008
New Revision: 11917

Added:
   dbd-oracle/branches/utf8_ea/t/02utf8.t

Log:
more stuff

Added: dbd-oracle/branches/utf8_ea/t/02utf8.t
==============================================================================
--- (empty file)
+++ dbd-oracle/branches/utf8_ea/t/02utf8.t      Thu Oct  2 11:51:29 2008
@@ -0,0 +1,241 @@
+#!perl -w
+
+use Test::More tests => 1;
+
+#
+# this script shows a bug in DBD::Oracle v.1.22 (and others) execute_array 
+# handling with respect to 'perl unicode strings', i.e. strings with the 
+# internal utf8 flag set.
+#
+# according to the docs, these strings should automatically be handled 
+# correctly by the driver, but this only works for execute, not execute_array.
+# the values SHOULD be correctly stored on utf8 databases, or the correct 
+# replacement character used on other databases.
+#
+# on a database with charset US7ASCII, this can result in database corruption 
+# (i.e non ascii values in a varchar2 field), and on a UTF-8 database (UTF8 
+# or AL32UTF8) will result in corrupted data.
+#
+# the test only functions on US7ASCII or UTF8/AL32UTF8 databases.
+#
+# run as:
+#
+#    utf8test.pl <oracle connection string>
+#
+#    eg. utf8test.pl scott/tiger 
+#    or  utf8test.pl scott/tiger@//host/service
+#    etc.
+#
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Encode;
+
+#
+# this test does NOT work (although the bug still exists) if
+# non-ascii characters from the so-called native (latin-1 usually)
+# character set are used, OR if a unicode character is used that
+# has an ASCII replacement character other than '?', so be careful
+# to use 'wierd' unicode characters here.
+#
+# the unicode character below looks like a house or keyhole
+# it's a non-latin-1, non-ascii character without any ascii replacement 
+# character the utf8 encoding of unicode char 6e9 is (0xDB,0xA9) or (219,169)
+#
+my $utf8_string = "\x{6e9}"; 
+my $ascii_string = "A";
+
+sub get_connection_string {
+       my $connection_string;
+       my $r = GetOptions("server=s" => \$connection_string);
+       die "bad options" unless $r;
+       $connection_string = shift @ARGV if (!$connection_string && @ARGV);
+       die "must supply server connection string: $0 user/[EMAIL 
PROTECTED]|//host/service}]"
+               unless $connection_string;
+       return $connection_string;
+}
+
+sub determine_db_charset_class {
+       my ($dbh) = @_;
+       my $sql = q/
+--------------
+select value from nls_database_parameters where parameter = 'NLS_CHARACTERSET'
+       /;
+
+       my ($db_charset) = $dbh->selectrow_array($sql); 
+
+       my $types =  [
+               { type => 'ASCII', charsets => [ 'US7ASCII' ] }
+               , { type => 'UTF8', charsets => [ 'UTF8','AL32UTF8' ] }
+       ];
+
+       for my $t (@$types) {
+               if (grep {$db_charset eq $_ } @{$t->{charsets}}) {
+                       return ($t->{type}, $db_charset);
+               }
+       }
+
+#      die "db charset $db_charset not handled by this testcase";
+}
+
+sub create_test_table {
+       my ($dbh) = @_;
+       my $sql = q/
+----------------
+create table t__utf8_test 
+(
+       method varchar2(50)
+       , string_type varchar2(50)
+       , string varchar2(200) 
+)
+       /;
+
+       $dbh->do($sql);
+       print "test table created\n";
+}
+
+sub drop_test_table {
+       my ($dbh) = @_;
+       my $sql = q/
+------------------
+drop table t__utf8_test
+       /;
+
+       eval {
+               $dbh->do($sql);
+       };
+
+       if ($@) {
+               if ($@ =~ /ORA-00942/) {
+                       print "test table doesn't exist for drop (ok)\n";
+               } else {
+                       die "unexcepted error dropping table: $@";
+               }
+       }
+}
+
+sub create_statement {
+       my ($dbh) = @_;
+       return $dbh->prepare(q/
+--------------
+insert into t__utf8_test values (?,?,?)
+       /);
+}
+
+sub insert_using_execute {
+       my ($dbh) = @_;
+       my $sth = create_statement $dbh;
+       $sth->execute("execute", "utf8", $utf8_string);
+       $sth->execute("execute", "ascii", $ascii_string);
+       $dbh->commit;
+       print "rows created using execute\n";
+}
+
+sub insert_using_execute_array {
+       my ($dbh) = @_;
+       my $sth = create_statement $dbh;
+       my @tuple_status;
+       my @types = ("utf8", "ascii");
+       my $rows = $sth->execute_array( {
+               ArrayTupleStatus => [EMAIL PROTECTED]
+               , ArrayTupleFetch => sub {
+                       my $type = pop @types;
+                       return undef unless $type;
+                       my $str = $type eq "utf8"? $utf8_string : $ascii_string;
+                       return ['execute_array',$type, $str ];
+               }
+       });
+
+       unless (defined($rows)) {
+               die "error during execute_array...";
+               # actual error message available via @tuple_status...
+       }
+
+       $dbh->commit;
+       print "rows created using execute_array\n";
+}
+
+sub check_show_results {
+       my ($dbh, $dbclass) = @_;
+       my $sth = $dbh->prepare(q/
+--------------
+select method,string_type,dump(string) from t__utf8_test
+order by method,string_type
+       /);
+       $sth->execute;
+
+       print "results: \n";
+       printf "  %-15.15s %-6.6s %-40.40s %s\n", 
+               "method","type","dump","result";
+       printf "  %-15.15s %-6.6s %-40.40s %s\n", 
+                       
"=================","===========","===================================","======";
+
+       while (my ($method, $string_type, $dump) = $sth->fetchrow_array) {
+               my $orig_dump = $dump;
+               $dump=~s/^.*:\s+//g;
+               # extract as 'bytes' and use decode below for utf8 as necessary
+               my $str = pack("C*", split /,/, $dump);
+               my $result;
+               if ($string_type eq "ascii") {
+                       $result = ($str eq $ascii_string) ? 'PASS':'FAIL';
+               } else {
+                       if ($dbclass eq "ASCII") {
+                               # shoddy brute force replacement of non-ascii
+                               (my $tmp = $utf8_string) =~ s/[^[:ascii:]]/?/g;
+                               $result = $str eq $tmp ? 'PASS':'FAIL';
+                       } else {
+                               my $dec_str = Encode::decode_utf8($str);
+                               $result=$dec_str eq $utf8_string?'PASS':'FAIL';
+                       }
+               }
+
+               printf "  %-15.15s %-6.6s %-40.40s %s\n", 
+                       $method, $string_type, $orig_dump, $result;
+       
+       }
+       print "\n";
+}
+
+sub do_test {
+       my ($dbh) = @_;
+
+       my ($dbclass, $charset) = determine_db_charset_class $dbh;
+
+       print "\n======= test for database type $dbclass ($charset) 
============\n\n";
+
+       drop_test_table $dbh;
+       create_test_table $dbh;
+       insert_using_execute $dbh;
+       insert_using_execute_array $dbh; 
+       check_show_results $dbh, $dbclass;
+#      drop_test_table $dbh;
+
+       $dbh->disconnect;
+}
+
+sub main {
+       #
+       # ensure environment consistentcy by clearing NLS_LANG
+       #
+       #delete $ENV{NLS_LANG};
+
+       die "utf8_string doesn't have unicode flag set" 
+               unless Encode::is_utf8($utf8_string);
+
+       die "ascii_string does have unicode flag set" 
+               unless !Encode::is_utf8($ascii_string);
+
+       my $dbh = DBI->connect("dbi:Oracle:", '[EMAIL PROTECTED]','dunebuggy', 
+               {PrintError=>0, RaiseError=>1,AutoCommit=>0,dbd_verbose=>15});
+
+print $dbh->ora_can_unicode();
+exit;
+        $dbh->{'dbd_verbose'}=0;       
+       do_test $dbh;
+}
+
+main;
+exit 1;
+

Reply via email to