Author: timbo
Date: Wed Nov 10 08:08:01 2004
New Revision: 573
Modified:
dbi/trunk/Changes
dbi/trunk/DBI.pm
dbi/trunk/t/01basics.t
Log:
add docs for new funcs. add $neat_maxlen to :utils export tag
Modified: dbi/trunk/Changes
==============================================================================
--- dbi/trunk/Changes (original)
+++ dbi/trunk/Changes Wed Nov 10 08:08:01 2004
@@ -9,11 +9,12 @@
Fixed space-parsing bug in DBI::SQL::Nano thanks to Jeff Zucker.
Fixed a couple of bad links in docs thanks to Graham Barr.
Fixed test.pl Win32 undef warning thanks to H.Merijn Brand & David Repko.
- Updated Roadmap and ToDo
+
+ Changed use DBI qw(:utils) export tag to include $neat_maxlen.
+ Updated Roadmap and ToDo.
Added data_string_diff() data_string_desc() and data_diff()
utility functions to help diagnose Unicode issues.
-XXX needs docs
=head2 Changes in DBI 1.45 (svn rev 480), 6th October 2004
Modified: dbi/trunk/DBI.pm
==============================================================================
--- dbi/trunk/DBI.pm (original)
+++ dbi/trunk/DBI.pm Wed Nov 10 08:08:01 2004
@@ -227,7 +227,8 @@
SQL_CURSOR_TYPE_DEFAULT
) ], # for ODBC cursor types
utils => [ qw(
- neat neat_list dump_results looks_like_number
+ neat neat_list $neat_maxlen dump_results looks_like_number
+ data_string_diff data_string_desc data_diff
) ],
profile => [ qw(
dbi_profile dbi_profile_merge dbi_time
@@ -1009,7 +1010,7 @@
sub data_diff {
- my ($a, $b) = @_;
+ my ($a, $b, $logical) = @_;
require utf8;
# hacks to cater for perl 5.6 for data_string_diff() & data_string_desc()
@@ -1018,21 +1019,33 @@
} unless defined &utf8::is_utf8;
*utf8::valid = sub { 1 } unless defined &utf8::valid;
- my $a_desc = data_string_desc($a);
- my $b_desc = data_string_desc($b);
my $diff = data_string_diff($a, $b);
+ return "" if $logical and !$diff;
- return "" if !$diff && $a_desc eq $b_desc;
+ my $a_desc = data_string_desc($a);
+ my $b_desc = data_string_desc($b);
+ return "" if !$diff and $a_desc eq $b_desc;
- return "\$a: $a_desc\n\$b: $b_desc\n$diff";
+ $diff ||= "Strings contain the same sequence of characters"
+ if length($a);
+ $diff .= "\n" if $diff;
+ return "a: $a_desc\nb: $b_desc\n$diff";
}
-sub data_string_diff
+sub data_string_diff {
# Compares 'logical' characters, not bytes, so a latin1 string and an
# an equivalent unicode string will compare as equal even though their
# byte encodings are different.
my ($a, $b) = @_;
+ unless (defined $a and defined $b) { # one undef
+ return ""
+ if !defined $a and !defined $b;
+ return "String a is undef, string b has ".length($b)." characters"
+ if !defined $a;
+ return "String b is undef, string a has ".length($a)." characters"
+ if !defined $b;
+ }
my @a_chars = (utf8::is_utf8($a)) ? unpack("U*", $a) : unpack("C*", $a);
my @b_chars = (utf8::is_utf8($b)) ? unpack("U*", $b) : unpack("C*", $b);
my $i = 0;
@@ -1051,10 +1064,10 @@
next unless $c =~ m/\\x\{08(..)}/;
$c .= "='" .chr(hex($1)) ."'"
}
- return sprintf "Strings differ at index $i: a[$i]=$desc[0],
b[$i]=$desc[1]\n";
+ return sprintf "Strings differ at index $i: a[$i]=$desc[0],
b[$i]=$desc[1]";
}
- return "String a truncated after $i characters\n" if @b_chars;
- return "String b truncated after $i characters\n" if @a_chars;
+ return "String a truncated after $i characters" if @b_chars;
+ return "String b truncated after $i characters" if @a_chars;
return "";
}
@@ -1067,16 +1080,16 @@
# (might be ascii so also need to check for hibit to make it worthwhile)
# - UTF8 flag set but invalid UTF8 byte sequence
# could do better here, but this'll do for now
- my $is_ascii = $a =~ m/^[\000-\177]*$/;
- return sprintf "UTF8 %s%s, %s, %d characters %d bytes%s",
+ my $utf8 = sprintf "UTF8 %s%s",
utf8::is_utf8($a) ? "on" : "off",
- utf8::valid($a) ? "" : " but INVALID encoding",
- $is_ascii ? "ASCII" : "Non-ASCII",
+ utf8::valid($a||'') ? "" : " but INVALID encoding";
+ return "$utf8, undef" unless defined $a;
+ my $is_ascii = $a =~ m/^[\000-\177]*$/;
+ return sprintf "%s, %s, %d characters %d bytes",
+ $utf8, $is_ascii ? "ASCII" : "non-ASCII",
length($a), bytes::length($a);
}
-#BEGIN { die data_diff("foox", "foo\x{083a}bar")}
-
sub connect_test_perf {
my($class, $dsn,$dbuser,$dbpass, $attr) = @_;
@@ -2603,15 +2616,90 @@
=head2 DBI Utility Functions
-In addition to the methods listed in the previous section,
-the DBI package also provides these utility functions:
+In addition to the DBI methods listed in the previous section,
+the DBI package also provides several utility functions.
+
+These can be imported into your code by listing them in
+the C<use> statement. For example:
+
+ use DBI qw(neat data_diff);
+
+Alternatively, all these utility functions (except hash) can be
+imported using the C<:utils> import tag. For example:
+
+ use DBI qw(:utils);
=over 4
+=item C<data_string_desc>
+
+ $description = data_string_desc($string);
+
+Returns an informal description of the string. For example:
+
+ UTF8 off, ASCII, 42 characters 42 bytes
+ UTF8 off, non-ASCII, 42 characters 42 bytes
+ UTF8 on, non-ASCII, 4 characters 6 bytes
+ UTF8 on but INVALID encoding, non-ASCII, 4 characters 6 bytes
+ UTF8 off, undef
+
+The initial C<UTF8> on/off refers to Perl's internal SvUTF8 flag.
+If $string has the SvUTF8 flag set but the sequence of bytes it
+contains are not a valid UTF-8 encoding then data_string_desc()
+will report C<UTF8 on but INVALID encoding>.
+
+The C<ASCII> vs C<non-ASCII> portion shows C<ASCII> if I<all> the
+characters in the string are ASCII (have code points <= 127).
+
+=item C<data_string_diff>
+
+ $diff = data_string_diff($a, $b);
+
+Returns an informal description of the first character difference
+between the strings. If both $a and $b contain the same sequence
+of characters then data_string_diff() returns an empty string.
+For example:
+
+ Params a & b Result
+ ------------ ------
+ 'aaa', 'aaa' ''
+ 'aaa', 'abc' 'Strings differ at index 2: a[2]=a, b[2]=b'
+ 'aaa', undef 'String b is undef, string a has 3 characters'
+ 'aaa', 'aa' 'String b truncated after 2 characters'
+
+Unicode characters are reported in C<\x{XXXX}> format. Unicode
+code points in the range U+0800 to U+08FF are unassigned and most
+likely to occur due to double-encoding. Characters in this range
+are reported as C<\x{08XX}='C'> where C<C> is the corresponding
+latin-1 character.
+
+The data_string_diff() function only considers I<characters> and
+not the underlying encoding. See the L</data_diff> function for
+
+=item C<data_diff>
+
+ $diff = data_diff($a, $b);
+ $diff = data_diff($a, $b, $logical);
+
+Returns an informal description of the difference between two strings.
+It calls L</data_string_desc> and L</data_string_diff>
+and returns the combined results as a multi-line string.
+
+For example, C<data_diff("abc", "ab\x{263a}")> will return:
+
+ a: UTF8 off, ASCII, 3 characters 3 bytes
+ b: UTF8 on, non-ASCII, 3 characters 5 bytes
+ Strings differ at index 2: a[2]=c, b[2]=\x{263A}
+
+If $a and $b are identical in both the characters they contain and
+their physical encoding then data_diff() returns an empty string.
+If $logical is true then physical encoding differences are ignored.
+
+
=item C<neat>
- $str = DBI::neat($value);
- $str = DBI::neat($value, $maxlen);
+ $str = neat($value);
+ $str = neat($value, $maxlen);
Return a string containing a neat (and tidy) representation of the
supplied value.
@@ -2635,15 +2723,15 @@
=item C<neat_list>
- $str = DBI::neat_list([EMAIL PROTECTED], $maxlen, $field_sep);
+ $str = neat_list([EMAIL PROTECTED], $maxlen, $field_sep);
-Calls C<DBI::neat> on each element of the list and returns a string
+Calls C<neat> on each element of the list and returns a string
containing the results joined with C<$field_sep>. C<$field_sep> defaults
to C<", ">.
=item C<looks_like_number>
- @bool = DBI::looks_like_number(@array);
+ @bool = looks_like_number(@array);
Returns true for each element that looks like a number.
Returns false for each element that does not look like a number.
Modified: dbi/trunk/t/01basics.t
==============================================================================
--- dbi/trunk/t/01basics.t (original)
+++ dbi/trunk/t/01basics.t Wed Nov 10 08:08:01 2004
@@ -2,9 +2,11 @@
use strict;
-use Test::More tests => 110;
+use Test::More tests => 131;
use File::Spec;
+$|=1;
+
## ----------------------------------------------------------------------------
## 01basic.t - test of some basic DBI functions
## ----------------------------------------------------------------------------
@@ -240,13 +242,44 @@
cmp_ok(DBI::hash("foo1" ), '==', -1077531989, '... should be -1077531989');
cmp_ok(DBI::hash("foo1",0), '==', -1077531989, '... should be -1077531989');
cmp_ok(DBI::hash("foo2",0), '==', -1077531990, '... should be -1077531990');
+cmp_ok(DBI::hash("foo1",1), '==', -1263462440, '... should be -1263462440');
+cmp_ok(DBI::hash("foo2",1), '==', -1263462437, '... should be -1263462437');
+
+is(data_string_desc(""), "UTF8 off, ASCII, 0 characters 0 bytes");
+is(data_string_desc(42), "UTF8 off, ASCII, 2 characters 2 bytes");
+is(data_string_desc("foo"), "UTF8 off, ASCII, 3 characters 3 bytes");
+is(data_string_desc(undef), "UTF8 off, undef");
+is(data_string_desc("bar\x{263a}"), "UTF8 on, non-ASCII, 4 characters 6
bytes");
+is(data_string_desc("\xEA"), "UTF8 off, non-ASCII, 1 characters 1 bytes");
+
+is(data_string_diff( "", ""), "");
+is(data_string_diff( "",undef), "String b is undef, string a has 0
characters");
+is(data_string_diff(undef,undef), "");
+is(data_string_diff("aaa","aaa"), "");
+
+is(data_string_diff("aaa","aba"), "Strings differ at index 1: a[1]=a, b[1]=b");
+is(data_string_diff("aba","aaa"), "Strings differ at index 1: a[1]=b, b[1]=a");
+is(data_string_diff("aa" ,"aaa"), "String a truncated after 2 characters");
+is(data_string_diff("aaa","aa" ), "String b truncated after 2 characters");
+
+is(data_diff( "", ""), "");
+is(data_diff(undef,undef), "");
+is(data_diff("aaa","aaa"), "");
+
+is(data_diff( "",undef),
+ join "","a: UTF8 off, ASCII, 0 characters 0 bytes\n",
+ "b: UTF8 off, undef\n",
+ "String b is undef, string a has 0 characters\n");
+is(data_diff("aaa","aba"),
+ join "","a: UTF8 off, ASCII, 3 characters 3 bytes\n",
+ "b: UTF8 off, ASCII, 3 characters 3 bytes\n",
+ "Strings differ at index 1: a[1]=a, b[1]=b\n");
+is(data_diff(pack("C",0xEA), pack("U",0xEA)),
+ join "", "a: UTF8 off, non-ASCII, 1 characters 1 bytes\n",
+ "b: UTF8 on, non-ASCII, 1 characters 2 bytes\n",
+ "Strings contain the same sequence of characters\n");
+is(data_diff(pack("C",0xEA), pack("U",0xEA), 1), ""); # no logical difference
-# skip these if we are using DBI::PurePerl
-SKIP: {
- skip 'using DBI::PurePerl', 2 if ($DBI::PurePerl && !eval {
DBI::hash("foo1",1) });
- cmp_ok(DBI::hash("foo1",1), '==', -1263462440, '... should be
-1263462440');
- cmp_ok(DBI::hash("foo2",1), '==', -1263462437, '... should be
-1263462437');
-}
## ----------------------------------------------------------------------------
# restrict this test to just developers