At 19:05 -0500 11/7/05, Eric Herrera wrote:
I'm attempting to sort using utf8_bin and I don't think its sorting properly. I believe I have everything set correctly. I've appended all related data. I also have a small perl script below which I used to
generate the tests.

I may be wrong, but I expect this query:

select id, utf8_data from test order by utf8_data collate utf8_bin

to generate the "EXPECTED ORDER" data section below. However, "TEST ORDER" is what gets generated.

I don't see any difference between the two sections.





Am I correct in my assumption that this is not ordering according to utf8 binary?

Anyone have experience with utf8 binary collation?




-Eric


===========================================================
===========================================================

CREATE TABLE `test` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `utf8_data` text character set utf8 collate utf8_bin,
  PRIMARY KEY  (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin

============================================================
============================================================
mysql> show collation like 'utf8%';
+--------------------+---------+-----+---------+----------+---------+
| Collation          | Charset | Id  | Default | Compiled | Sortlen |
+--------------------+---------+-----+---------+----------+---------+
| utf8_general_ci    | utf8    |  33 | Yes     | Yes      |       1 |
| utf8_bin           | utf8    |  83 |         | Yes      |       1 |
| utf8_unicode_ci    | utf8    | 192 |         | Yes      |       8 |
| utf8_icelandic_ci  | utf8    | 193 |         | Yes      |       8 |
| utf8_latvian_ci    | utf8    | 194 |         | Yes      |       8 |
| utf8_romanian_ci   | utf8    | 195 |         | Yes      |       8 |
| utf8_slovenian_ci  | utf8    | 196 |         | Yes      |       8 |
| utf8_polish_ci     | utf8    | 197 |         | Yes      |       8 |
| utf8_estonian_ci   | utf8    | 198 |         | Yes      |       8 |
| utf8_spanish_ci    | utf8    | 199 |         | Yes      |       8 |
| utf8_swedish_ci    | utf8    | 200 |         | Yes      |       8 |
| utf8_turkish_ci    | utf8    | 201 |         | Yes      |       8 |
| utf8_czech_ci      | utf8    | 202 |         | Yes      |       8 |
| utf8_danish_ci     | utf8    | 203 |         | Yes      |       8 |
| utf8_lithuanian_ci | utf8    | 204 |         | Yes      |       8 |
| utf8_slovak_ci     | utf8    | 205 |         | Yes      |       8 |
| utf8_spanish2_ci   | utf8    | 206 |         | Yes      |       8 |
| utf8_roman_ci      | utf8    | 207 |         | Yes      |       8 |
| utf8_persian_ci    | utf8    | 208 |         | Yes      |       8 |
+--------------------+---------+-----+---------+----------+---------+
19 rows in set (0.01 sec)

=========================================================
unidb.pl
=========================================================
use DBI;
use Encode;

my $dbh = DBI->connect("test");

$dbh->do("delete from test");

my $sth = $dbh->prepare(
  "INSERT into test set utf8_data = ? "
);

foreach my $ord (4033..4053){

  my $hex = sprintf("%4.4x", $ord);
  print "$hex - $ord\n";
  my $unicode;
  eval "\$unicode = \"\\x{$hex}\"";

  #$unicode = pack("U*",$ord);
  $unicode = Encode::encode_utf8($unicode);

  $sth->execute($unicode);
}

print "\n\n";

# read them back out
my $sth = $dbh->prepare("select id, utf8_data from test order by utf8_data collate utf8_bin");
$sth->execute;
printf("%5s %5s %10s %5s %25s\n","node_id","path", "ord", "len" ,
"binary");
print "----------------------------------------------------------------\n";
while (my @a = $sth->fetchrow_array()){
  my $len = length($a[1]);
  $a[1] = Encode::decode_utf8($a[1]);
  printf("%5s %5s %10s %5s %25s\n", $a[0], 'X', ord($a[1]), $len),
  unpack("B*",$a[1])
  );
}

====================================================================
TEST ORDER
====================================================================

$ perl unidb.pl
0fc1 - 4033
0fc2 - 4034
0fc3 - 4035
0fc4 - 4036
0fc5 - 4037
0fc6 - 4038
0fc7 - 4039
0fc8 - 4040
0fc9 - 4041
0fca - 4042
0fcb - 4043
0fcc - 4044
0fcd - 4045
0fce - 4046
0fcf - 4047
0fd0 - 4048
0fd1 - 4049
0fd2 - 4050
0fd3 - 4051
0fd4 - 4052
0fd5 - 4053

node_id  path        ord   len                    binary
----------------------------------------------------------------
  106     X       4033     3  111000001011111110000001
  107     X       4034     3  111000001011111110000010
  108     X       4035     3  111000001011111110000011
  109     X       4036     3  111000001011111110000100
  110     X       4037     3  111000001011111110000101
  111     X       4038     3  111000001011111110000110
  112     X       4039     3  111000001011111110000111
  113     X       4040     3  111000001011111110001000
  114     X       4041     3  111000001011111110001001
  115     X       4042     3  111000001011111110001010
  116     X       4043     3  111000001011111110001011
  117     X       4044     3  111000001011111110001100
  118     X       4045     3  111000001011111110001101
  119     X       4046     3  111000001011111110001110
  120     X       4047     3  111000001011111110001111
  121     X       4048     3  111000001011111110010000
  122     X       4049     3  111000001011111110010001
  123     X       4050     3  111000001011111110010010
  124     X       4051     3  111000001011111110010011
  125     X       4052     3  111000001011111110010100
  126     X       4053     3  111000001011111110010101


===================================================================
EXPECTED ORDER
===================================================================

$ perl unidb.pl
node_id  path        ord   len                    binary
----------------------------------------------------------------
  106     X       4033     3  111000001011111110000001
  107     X       4034     3  111000001011111110000010
  108     X       4035     3  111000001011111110000011
  109     X       4036     3  111000001011111110000100
  110     X       4037     3  111000001011111110000101
  111     X       4038     3  111000001011111110000110
  112     X       4039     3  111000001011111110000111
  113     X       4040     3  111000001011111110001000
  114     X       4041     3  111000001011111110001001
  115     X       4042     3  111000001011111110001010
  116     X       4043     3  111000001011111110001011
  117     X       4044     3  111000001011111110001100
  118     X       4045     3  111000001011111110001101
  119     X       4046     3  111000001011111110001110
  120     X       4047     3  111000001011111110001111
  121     X       4048     3  111000001011111110010000
  122     X       4049     3  111000001011111110010001
  123     X       4050     3  111000001011111110010010
  124     X       4051     3  111000001011111110010011
  125     X       4052     3  111000001011111110010100
  126     X       4053     3  111000001011111110010101

--
Paul DuBois, MySQL Documentation Team
Madison, Wisconsin, USA
MySQL AB, www.mysql.com

--
MySQL General Mailing List
For list archives: http://lists.mysql.com/mysql
To unsubscribe:    http://lists.mysql.com/[EMAIL PROTECTED]

Reply via email to