Author: byterock
Date: Thu Oct 2 11:51:29 2008
New Revision: 11917
Added:
dbd-oracle/branches/utf8_ea/t/02utf8.t
Log:
more stuff
Added: dbd-oracle/branches/utf8_ea/t/02utf8.t
==============================================================================
--- (empty file)
+++ dbd-oracle/branches/utf8_ea/t/02utf8.t Thu Oct 2 11:51:29 2008
@@ -0,0 +1,241 @@
+#!perl -w
+
+use Test::More tests => 1;
+
+#
+# this script shows a bug in DBD::Oracle v.1.22 (and others) execute_array
+# handling with respect to 'perl unicode strings', i.e. strings with the
+# internal utf8 flag set.
+#
+# according to the docs, these strings should automatically be handled
+# correctly by the driver, but this only works for execute, not execute_array.
+# the values SHOULD be correctly stored on utf8 databases, or the correct
+# replacement character used on other databases.
+#
+# on a database with charset US7ASCII, this can result in database corruption
+# (i.e non ascii values in a varchar2 field), and on a UTF-8 database (UTF8
+# or AL32UTF8) will result in corrupted data.
+#
+# the test only functions on US7ASCII or UTF8/AL32UTF8 databases.
+#
+# run as:
+#
+# utf8test.pl <oracle connection string>
+#
+# eg. utf8test.pl scott/tiger
+# or utf8test.pl scott/tiger@//host/service
+# etc.
+#
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Encode;
+
+#
+# this test does NOT work (although the bug still exists) if
+# non-ascii characters from the so-called native (latin-1 usually)
+# character set are used, OR if a unicode character is used that
+# has an ASCII replacement character other than '?', so be careful
+# to use 'wierd' unicode characters here.
+#
+# the unicode character below looks like a house or keyhole
+# it's a non-latin-1, non-ascii character without any ascii replacement
+# character the utf8 encoding of unicode char 6e9 is (0xDB,0xA9) or (219,169)
+#
+my $utf8_string = "\x{6e9}";
+my $ascii_string = "A";
+
+sub get_connection_string {
+ my $connection_string;
+ my $r = GetOptions("server=s" => \$connection_string);
+ die "bad options" unless $r;
+ $connection_string = shift @ARGV if (!$connection_string && @ARGV);
+ die "must supply server connection string: $0 user/[EMAIL
PROTECTED]|//host/service}]"
+ unless $connection_string;
+ return $connection_string;
+}
+
+sub determine_db_charset_class {
+ my ($dbh) = @_;
+ my $sql = q/
+--------------
+select value from nls_database_parameters where parameter = 'NLS_CHARACTERSET'
+ /;
+
+ my ($db_charset) = $dbh->selectrow_array($sql);
+
+ my $types = [
+ { type => 'ASCII', charsets => [ 'US7ASCII' ] }
+ , { type => 'UTF8', charsets => [ 'UTF8','AL32UTF8' ] }
+ ];
+
+ for my $t (@$types) {
+ if (grep {$db_charset eq $_ } @{$t->{charsets}}) {
+ return ($t->{type}, $db_charset);
+ }
+ }
+
+# die "db charset $db_charset not handled by this testcase";
+}
+
+sub create_test_table {
+ my ($dbh) = @_;
+ my $sql = q/
+----------------
+create table t__utf8_test
+(
+ method varchar2(50)
+ , string_type varchar2(50)
+ , string varchar2(200)
+)
+ /;
+
+ $dbh->do($sql);
+ print "test table created\n";
+}
+
+sub drop_test_table {
+ my ($dbh) = @_;
+ my $sql = q/
+------------------
+drop table t__utf8_test
+ /;
+
+ eval {
+ $dbh->do($sql);
+ };
+
+ if ($@) {
+ if ($@ =~ /ORA-00942/) {
+ print "test table doesn't exist for drop (ok)\n";
+ } else {
+ die "unexcepted error dropping table: $@";
+ }
+ }
+}
+
+sub create_statement {
+ my ($dbh) = @_;
+ return $dbh->prepare(q/
+--------------
+insert into t__utf8_test values (?,?,?)
+ /);
+}
+
+sub insert_using_execute {
+ my ($dbh) = @_;
+ my $sth = create_statement $dbh;
+ $sth->execute("execute", "utf8", $utf8_string);
+ $sth->execute("execute", "ascii", $ascii_string);
+ $dbh->commit;
+ print "rows created using execute\n";
+}
+
+sub insert_using_execute_array {
+ my ($dbh) = @_;
+ my $sth = create_statement $dbh;
+ my @tuple_status;
+ my @types = ("utf8", "ascii");
+ my $rows = $sth->execute_array( {
+ ArrayTupleStatus => [EMAIL PROTECTED]
+ , ArrayTupleFetch => sub {
+ my $type = pop @types;
+ return undef unless $type;
+ my $str = $type eq "utf8"? $utf8_string : $ascii_string;
+ return ['execute_array',$type, $str ];
+ }
+ });
+
+ unless (defined($rows)) {
+ die "error during execute_array...";
+ # actual error message available via @tuple_status...
+ }
+
+ $dbh->commit;
+ print "rows created using execute_array\n";
+}
+
+sub check_show_results {
+ my ($dbh, $dbclass) = @_;
+ my $sth = $dbh->prepare(q/
+--------------
+select method,string_type,dump(string) from t__utf8_test
+order by method,string_type
+ /);
+ $sth->execute;
+
+ print "results: \n";
+ printf " %-15.15s %-6.6s %-40.40s %s\n",
+ "method","type","dump","result";
+ printf " %-15.15s %-6.6s %-40.40s %s\n",
+
"=================","===========","===================================","======";
+
+ while (my ($method, $string_type, $dump) = $sth->fetchrow_array) {
+ my $orig_dump = $dump;
+ $dump=~s/^.*:\s+//g;
+ # extract as 'bytes' and use decode below for utf8 as necessary
+ my $str = pack("C*", split /,/, $dump);
+ my $result;
+ if ($string_type eq "ascii") {
+ $result = ($str eq $ascii_string) ? 'PASS':'FAIL';
+ } else {
+ if ($dbclass eq "ASCII") {
+ # shoddy brute force replacement of non-ascii
+ (my $tmp = $utf8_string) =~ s/[^[:ascii:]]/?/g;
+ $result = $str eq $tmp ? 'PASS':'FAIL';
+ } else {
+ my $dec_str = Encode::decode_utf8($str);
+ $result=$dec_str eq $utf8_string?'PASS':'FAIL';
+ }
+ }
+
+ printf " %-15.15s %-6.6s %-40.40s %s\n",
+ $method, $string_type, $orig_dump, $result;
+
+ }
+ print "\n";
+}
+
+sub do_test {
+ my ($dbh) = @_;
+
+ my ($dbclass, $charset) = determine_db_charset_class $dbh;
+
+ print "\n======= test for database type $dbclass ($charset)
============\n\n";
+
+ drop_test_table $dbh;
+ create_test_table $dbh;
+ insert_using_execute $dbh;
+ insert_using_execute_array $dbh;
+ check_show_results $dbh, $dbclass;
+# drop_test_table $dbh;
+
+ $dbh->disconnect;
+}
+
+sub main {
+ #
+ # ensure environment consistentcy by clearing NLS_LANG
+ #
+ #delete $ENV{NLS_LANG};
+
+ die "utf8_string doesn't have unicode flag set"
+ unless Encode::is_utf8($utf8_string);
+
+ die "ascii_string does have unicode flag set"
+ unless !Encode::is_utf8($ascii_string);
+
+ my $dbh = DBI->connect("dbi:Oracle:", '[EMAIL PROTECTED]','dunebuggy',
+ {PrintError=>0, RaiseError=>1,AutoCommit=>0,dbd_verbose=>15});
+
+print $dbh->ora_can_unicode();
+exit;
+ $dbh->{'dbd_verbose'}=0;
+ do_test $dbh;
+}
+
+main;
+exit 1;
+