Heya, I was cleaning up some local repositories the other day, and noticed that the JID case sensitivity patches have yet to be applied. I'd love to not have to keep a local fork of djabberd[1] in order to fix this rather easily fixable but fundamentally important issue. As we've seen on the lists, this is not purely a theoretical problem -- people are actually being bitten by it.
The only reason I've heard for not applying them was fears about backwards incompatability. With the attached patches, users' options are: * Add 'CaseSensitive on', and nothing is changed. * Run 'fix-jabber-roster-case' to fix up SQLite rosters. * If you have non-SQLite rosters, write a tool to do the case-folding and merging for your own storage format. I can't see any way to make this easier than the above, and I really do want to see this bug put to rest. - Alex [1] http://github.com/bestpractical/djabberd
>From 7234cc3410b9e72dec7e0ff1763aadfc260b0144 Mon Sep 17 00:00:00 2001 From: Alex Vandiver <ale...@bestpractical.com> Date: Thu, 26 Mar 2009 21:24:28 -0400 Subject: [PATCH 1/5] Do stringprep, as specified by the RFC, on JID object creation --- DJabberd/Makefile.PL | 1 + DJabberd/lib/DJabberd/JID.pm | 89 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletions(-) diff --git a/DJabberd/Makefile.PL b/DJabberd/Makefile.PL index 6de3e23..cfc06e2 100644 --- a/DJabberd/Makefile.PL +++ b/DJabberd/Makefile.PL @@ -14,6 +14,7 @@ WriteMakefile( 'Net::SSLeay' => 0, 'Log::Log4perl' => 0, 'Digest::HMAC_SHA1' => 0, + 'Unicode::Stringprep' => 0, }, clean => { FILES => 't/log/*' }, AUTHOR => 'Brad Fitzpatrick <b...@danga.com>', diff --git a/DJabberd/lib/DJabberd/JID.pm b/DJabberd/lib/DJabberd/JID.pm index 3959ee5..f37e29c 100644 --- a/DJabberd/lib/DJabberd/JID.pm +++ b/DJabberd/lib/DJabberd/JID.pm @@ -13,6 +13,84 @@ use constant AS_STRING => 3; use constant AS_BSTRING => 4; use constant AS_STREXML => 5; +# Stringprep functions for converting to canonical form +use Unicode::Stringprep; +use Unicode::Stringprep::Mapping; +use Unicode::Stringprep::Prohibited; +my $nodeprep = Unicode::Stringprep->new( + 3.2, + [ + \...@unicode::Stringprep::Mapping::B1, + \...@unicode::Stringprep::Mapping::B2, + ], + 'KC', + [ + \...@unicode::Stringprep::Prohibited::C11, + \...@unicode::Stringprep::Prohibited::C12, + \...@unicode::Stringprep::Prohibited::C21, + \...@unicode::Stringprep::Prohibited::C22, + \...@unicode::Stringprep::Prohibited::C3, + \...@unicode::Stringprep::Prohibited::C4, + \...@unicode::Stringprep::Prohibited::C5, + \...@unicode::Stringprep::Prohibited::C6, + \...@unicode::Stringprep::Prohibited::C7, + \...@unicode::Stringprep::Prohibited::C8, + \...@unicode::Stringprep::Prohibited::C9, + [ + 0x0022, undef, # " + 0x0026, undef, # & + 0x0027, undef, # ' + 0x002F, undef, # / + 0x003A, undef, # : + 0x003C, undef, # < + 0x003E, undef, # > + 0x0040, undef, # @ + ] + ], + 1, +); +my $nameprep = Unicode::Stringprep->new( + 3.2, + [ + \...@unicode::Stringprep::Mapping::B1, + \...@unicode::Stringprep::Mapping::B2, + ], + 'KC', + [ + \...@unicode::Stringprep::Prohibited::C12, + \...@unicode::Stringprep::Prohibited::C22, + \...@unicode::Stringprep::Prohibited::C3, + \...@unicode::Stringprep::Prohibited::C4, + \...@unicode::Stringprep::Prohibited::C5, + \...@unicode::Stringprep::Prohibited::C6, + \...@unicode::Stringprep::Prohibited::C7, + \...@unicode::Stringprep::Prohibited::C8, + \...@unicode::Stringprep::Prohibited::C9, + ], + 1, +); +my $resourceprep = Unicode::Stringprep->new( + 3.2, + [ + \...@unicode::Stringprep::Mapping::B1, + ], + 'KC', + [ + \...@unicode::Stringprep::Prohibited::C12, + \...@unicode::Stringprep::Prohibited::C21, + \...@unicode::Stringprep::Prohibited::C22, + \...@unicode::Stringprep::Prohibited::C3, + \...@unicode::Stringprep::Prohibited::C4, + \...@unicode::Stringprep::Prohibited::C5, + \...@unicode::Stringprep::Prohibited::C6, + \...@unicode::Stringprep::Prohibited::C7, + \...@unicode::Stringprep::Prohibited::C8, + \...@unicode::Stringprep::Prohibited::C9, + ], + 1, +); + + # returns DJabberd::JID object, or undef on failure due to invalid format sub new { #my ($class, $jidstring) = @_; @@ -29,7 +107,16 @@ sub new { (?: /(.{1,1023}) )? # $3: optional resource $!x; - return bless [ $1, $2, $3 ], $_[0]; + # Stringprep uses regexes, so store these away first + my ($node, $host, $res) = ($1, $2, $3); + + return eval { + bless [ + defined $node ? $nodeprep->($node) : undef, + $nameprep->($host), + defined $res ? $resourceprep->($res) : undef, + ], $_[0] + }; } sub is_bare { -- 1.6.3.3.473.gb74fc4.dirty
>From 81a71a37d10546f1ecae8dd09e878114ecb45a29 Mon Sep 17 00:00:00 2001 From: Alex Vandiver <ale...@bestpractical.com> Date: Thu, 9 Apr 2009 13:17:42 -0400 Subject: [PATCH 2/5] Lower-case all domains for checking verification status --- DJabberd/lib/DJabberd/Connection/ServerIn.pm | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DJabberd/lib/DJabberd/Connection/ServerIn.pm b/DJabberd/lib/DJabberd/Connection/ServerIn.pm index 32c7599..d7ca323 100644 --- a/DJabberd/lib/DJabberd/Connection/ServerIn.pm +++ b/DJabberd/lib/DJabberd/Connection/ServerIn.pm @@ -16,7 +16,7 @@ sub set_vhost { sub peer_domain_is_verified { my ($self, $domain) = @_; - return $self->{verified_remote_domain}->{$domain}; + return $self->{verified_remote_domain}->{lc $domain}; } sub on_stream_start { @@ -140,7 +140,7 @@ sub dialback_result_valid { my %opts = @_; my $res = qq{<db:result from='$opts{recv_server}' to='$opts{orig_server}' type='valid'/>}; - $self->{verified_remote_domain}->{$opts{orig_server}} = $opts{orig_server}; + $self->{verified_remote_domain}->{lc $opts{orig_server}} = $opts{orig_server}; $self->log->debug("Dialback result valid for connection $self->{id}. from=$opts{recv_server}, to=$opts{orig_server}: $res\n"); $self->write($res); } -- 1.6.3.3.473.gb74fc4.dirty
>From e327279d56e61e9c84ff393d03c13ae0ee144850 Mon Sep 17 00:00:00 2001 From: Alex Vandiver <ale...@bestpractical.com> Date: Fri, 3 Apr 2009 17:47:36 -0400 Subject: [PATCH 3/5] Add global CaseSensitive flag for backwards compatibility --- DJabberd/lib/DJabberd.pm | 5 +++++ DJabberd/lib/DJabberd/JID.pm | 7 +++++++ 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/DJabberd/lib/DJabberd.pm b/DJabberd/lib/DJabberd.pm index 9debd3d..a168fdb 100644 --- a/DJabberd/lib/DJabberd.pm +++ b/DJabberd/lib/DJabberd.pm @@ -167,6 +167,11 @@ sub fake_s2s_peer { return $fake_peers{$host}; } +sub set_config_casesensitive { + my ($self, $val) = @_; + $DJabberd::JID::CASE_SENSITIVE = as_bool($val); +} + sub add_vhost { my ($self, $vhost) = @_; my $sname = lc $vhost->name; diff --git a/DJabberd/lib/DJabberd/JID.pm b/DJabberd/lib/DJabberd/JID.pm index f37e29c..bfef24c 100644 --- a/DJabberd/lib/DJabberd/JID.pm +++ b/DJabberd/lib/DJabberd/JID.pm @@ -3,6 +3,9 @@ use strict; use DJabberd::Util qw(exml); use Digest::SHA1; +# Configurable via 'CaseSensitive' config option +our $CASE_SENSITIVE = 0; + use overload '""' => \&as_string_exml; @@ -107,6 +110,10 @@ sub new { (?: /(.{1,1023}) )? # $3: optional resource $!x; + # If we're in case-sensitive mode, for backwards-compatibility, + # then skip stringprep + return bless [ $1, $2, $3 ], $_[0] if $DJabberd::JID::CASE_SENSITIVE; + # Stringprep uses regexes, so store these away first my ($node, $host, $res) = ($1, $2, $3); -- 1.6.3.3.473.gb74fc4.dirty
>From 2996bf98e3597ce928f2cd4e826542621c83dd76 Mon Sep 17 00:00:00 2001 From: Alex Vandiver <ale...@bestpractical.com> Date: Thu, 24 Sep 2009 17:35:42 -0400 Subject: [PATCH 4/5] Add tool to unify mixed-case records in existing SQLite rosters --- .../fix-jabber-roster-case | 176 ++++++++++++++++++++ 1 files changed, 176 insertions(+), 0 deletions(-) create mode 100755 DJabberd-RosterStorage-SQLite/fix-jabber-roster-case diff --git a/DJabberd-RosterStorage-SQLite/fix-jabber-roster-case b/DJabberd-RosterStorage-SQLite/fix-jabber-roster-case new file mode 100755 index 0000000..e3760f8 --- /dev/null +++ b/DJabberd-RosterStorage-SQLite/fix-jabber-roster-case @@ -0,0 +1,176 @@ +#!/usr/bin/perl + +use DJabberd::JID; +use DJabberd::Subscription; +use DBI; +use strict; +use warnings; + +die "$0 must be run with a case-folding version of DJabberd\n" + unless DJabberd::JID->new('t...@example.com')->eq(DJabberd::JID->new('t...@example.com')); + +my $roster = shift @ARGV; +die "Usage: $0 rosterfile\n" unless defined $roster and -f $roster; + +# Note that all work is performed in a transaction for consistency, +# hence AutoCommit => 0 +my $dbh = DBI->connect( + "dbi:SQLite:dbname=$roster", "", "", + { RaiseError => 1, PrintError => 0, AutoCommit => 0 } +); + +my $all_dups = $dbh->selectcol_arrayref(<<SQL); +SELECT LOWER(first.jid) + FROM jidmap first + JOIN jidmap other + ON LOWER(first.jid) = LOWER(other.jid) + LEFT JOIN jidmap later + ON LOWER(later.jid) = LOWER(first.jid) + AND later.jidid > first.jidid + GROUP BY first.jid +HAVING COUNT(DISTINCT other.jid) > 1 + AND later.jid IS NULL + ORDER BY COUNT(DISTINCT other.jid), LOWER(first.jid) +SQL + +for my $name (@{$all_dups}) { + warn "Looking at $name\n"; + + # Grab the set of dups + my $dups = $dbh->selectall_arrayref(<<SQL, undef, $name); +SELECT jidid, jid FROM jidmap WHERE LOWER(jid) = ? +SQL + + # For later use, build up the set of JIDs ids to coalesce. + my $jidset = "(" . join(",", map {$_->[0]} @$dups) .")"; + warn " JID equivalence set is $jidset\n"; + + # Since this is the output of LOWER(jid), it is _probably_ already + # correct, but we run it through canonicalize to get KC + # normalization, etc + $name = canonicalize($name); + next unless defined $name; + warn " Canonical form is $name\n"; + + # Blow away all of the duplicate JIDs, and re-insert the new name + $dbh->do("DELETE FROM jidmap WHERE jidid IN $jidset"); + $dbh->do("INSERT INTO jidmap(jid) VALUES(?)", undef, $name); + my $newid = $dbh->last_insert_id(undef, undef, "jidmap", "jidid"); + warn " New row id is $newid\n"; + + # Next, find all of the places in the roster where the old IDs + # showed up, either as userids or contactids + my %types = (userid => "contactid", contactid => "userid"); + for my $column (keys %types) { + warn " Looking for occurrances in $column\n"; + my $other = $types{$column}; + # We now generate a list of contacts that any of the JIDs as + # the user, (or users who have any of the JIDs as a contact) + my $otherlist = $dbh->selectcol_arrayref(<<SQL); +SELECT $other FROM roster WHERE $column IN $jidset GROUP BY $other +SQL + for my $otherval (@$otherlist) { + my $merge = $dbh->selectall_arrayref(<<SQL, { Slice => {} }); +SELECT $column, name, subscription FROM roster WHERE $other = $otherval AND $column IN $jidset ORDER BY $column ASC +SQL + # We now have a set of information to merge together, in $merge + warn " $otherval references JIDs @{[map $_->{$column}, @$merge]} in $column\n"; + $dbh->do(<<SQL, undef, $newid, $otherval, merge(@$merge)); +INSERT INTO roster($column, $other, name, subscription) VALUES(?, ?, ?, ?) +SQL + } + } + + # Delete any references to old JIDs in the roster + $dbh->do("DELETE FROM roster WHERE contactid IN $jidset OR userid IN $jidset"); + + # Merge roster groups + my $grouplist = $dbh->selectcol_arrayref(<<SQL); +SELECT name FROM rostergroup WHERE userid IN $jidset GROUP BY name +SQL + for my $groupname (@$grouplist) { + # Make a new group of this name for the new JID's id + $dbh->do("INSERT INTO rostergroup(userid, name) VALUES(?, ?)", undef, $newid, $groupname); + my $groupid = $dbh->last_insert_id(undef, undef, "rostergroup", "groupid"); + # Find all of the distinct contacts in groups of this name in + # any of the old JIDs, and add them to the new group + my $merge = $dbh->do(<<SQL, undef, $groupid, $groupname); +INSERT INTO groupitem(groupid, contactid) +SELECT ?, groupitem.contactid + FROM rostergroup + JOIN groupitem + ON rostergroup.groupid = groupitem.groupid + WHERE rostergroup.name = ? + AND rostergroup.userid IN $jidset + GROUP BY groupitem.contactid +SQL + } + # Remove the old groups + $dbh->do("DELETE FROM rostergroup WHERE userid IN $jidset"); + + # Look for places the any of the old JIDs appeared in other roster + # groups, and replace them with the new JID's id + $dbh->do(<<SQL, undef, $newid); +INSERT INTO groupitem(groupid, contactid) +SELECT groupid, ? + FROM groupitem + WHERE contactid IN $jidset + GROUP BY groupid +SQL + # Remove the old contacts from the groups + $dbh->do("DELETE FROM groupitem WHERE contactid IN $jidset"); + +} + +# The above merely handles cases where one JID appears twice; now we +# iterate though _all_ names, and lower-case them, to catch JIDs which +# are incorrectly cased but only appear once. +my $all = $dbh->selectall_arrayref("SELECT jid, jidid FROM jidmap", { Slice => {} }); +for my $row (@{$all}) { + # Canonicalize the name. Since this is the output of LOWER(jid), + # it is _probably_ already correct, but we round-trip through + # DJabberd::JID to ensure we get KC normalization, and the like + my $name = canonicalize($row->{jid}); + next unless defined $name; + next if $name eq $row->{jid}; + warn "Looking at @{[$row->{jid}]}\n"; + warn " Canonical form is $name\n"; + $dbh->do("UPDATE jidmap SET jid = ? WHERE jidid = ?", undef, $name, $row->{jidid}); +} + +$dbh->commit or die "Error commiting changes: ".$dbh->errstr; + + + + +sub canonicalize { + # Canonicalize the name. We round-trip through DJabberd::JID to + # ensure we get KC normalization, case folding, and the like + my $name = shift; + my $jid = DJabberd::JID->new($name); + unless ($jid) { + warn "Can't make canonical form of $name!"; + return undef; + } + return $jid->as_string; +} + +sub merge { + my (@merge) = @_; + + # Trivial case + return ($merge[0]->{name}, $merge[0]->{subscription}) + if @merge == 1; + + # More complex case; name is arbitrarily picked to be first non-null name, or null + my($name) = grep {defined} map {$_->{name}} @merge; + @merge = map {DJabberd::Subscription->from_bitmask($_->{subscription})} @merge; + + # to overrides pendout, from overrides pendin + my $merged = DJabberd::Subscription->new; + $merged->{to} = 1 if grep {$_->{to}} @merge; + $merged->{pendout} = 1 if not $merged->{to} and grep {$_->{pendout}} @merge; + $merged->{from} = 1 if grep {$_->{from}} @merge; + $merged->{pendin} = 1 if not $merged->{from} and grep {$_->{pendin}} @merge; + return $name, $merged->as_bitmask; +} -- 1.6.3.3.473.gb74fc4.dirty
>From f2431a189c3282f4ef805548e53d182387c345f2 Mon Sep 17 00:00:00 2001 From: Alex Vandiver <a...@chmrr.net> Date: Fri, 25 Sep 2009 14:21:34 -0400 Subject: [PATCH 5/5] Update CHANGES file --- DJabberd/CHANGES | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/DJabberd/CHANGES b/DJabberd/CHANGES index cc4ca2c..155a9dd 100644 --- a/DJabberd/CHANGES +++ b/DJabberd/CHANGES @@ -1,3 +1,12 @@ + - IMPORTANT: JIDs are now case insensitive, and properly UTF8 + normalized, as the spec calls for. If you wish to preserve the + old behavior, set 'CaseSensitive' in your configuration. If you + have existing roster storage, you will need to ensure that + existing entries with differing case are merged. If you use the + SQLite roster, 'fix-jabber-roster-case' has been provided in the + DJabberd::RosterStorage::SQLite distribution. + (Alex Vandiver <ale...@bestpractical.com>) + - Some configuration file docs (Alex Vandiver <ale...@bestpractical.com>) -- 1.6.3.3.473.gb74fc4.dirty