On Thu, 2009-03-26 at 18:44 -0700, Martin Atkins wrote: > So when DJabberd parses a JID, it ought to normalize the case in the > node and domain parts. It must presumably do this in a way defined in > stringprep rather than relying on Perl's in-built ability to turn > strings to lowercase. > > Is this correct? (and does anyone want to send in a patch? :) )
That sounds correct. Attached is a patch, which _does_ add a dep of Unicode::Stringprep, but is probably as faithful of an implementation of the spec as we might be able to get. - Alex
>From 958320315b7e7a3e5c4e8c9cdfb3809d6a90ecee Mon Sep 17 00:00:00 2001 From: Alex Vandiver <ale...@mit.edu> Date: Thu, 26 Mar 2009 21:24:28 -0400 Subject: [PATCH] Do stringprep, as specified by the RFC, on JID object creation --- DJabberd/Makefile.PL | 1 + DJabberd/lib/DJabberd/JID.pm | 91 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 1 deletions(-) diff --git a/DJabberd/Makefile.PL b/DJabberd/Makefile.PL index 6de3e23..cfc06e2 100644 --- a/DJabberd/Makefile.PL +++ b/DJabberd/Makefile.PL @@ -14,6 +14,7 @@ WriteMakefile( 'Net::SSLeay' => 0, 'Log::Log4perl' => 0, 'Digest::HMAC_SHA1' => 0, + 'Unicode::Stringprep' => 0, }, clean => { FILES => 't/log/*' }, AUTHOR => 'Brad Fitzpatrick <b...@danga.com>', diff --git a/DJabberd/lib/DJabberd/JID.pm b/DJabberd/lib/DJabberd/JID.pm index 3959ee5..c1dd0c3 100644 --- a/DJabberd/lib/DJabberd/JID.pm +++ b/DJabberd/lib/DJabberd/JID.pm @@ -12,6 +12,86 @@ use constant RES => 2; use constant AS_STRING => 3; use constant AS_BSTRING => 4; use constant AS_STREXML => 5; +use constant CANONICAL => 6; + +# Stringprep functions for converting to canonical form +use Unicode::Stringprep; +use Unicode::Stringprep::Mapping; +use Unicode::Stringprep::Prohibited; +my $nodeprep = Unicode::Stringprep->new( + 3.2, + [ + \...@unicode::Stringprep::Mapping::B1, + \...@unicode::Stringprep::Mapping::B2, + ], + 'KC', + [ + \...@unicode::Stringprep::Prohibited::C11, + \...@unicode::Stringprep::Prohibited::C12, + \...@unicode::Stringprep::Prohibited::C21, + \...@unicode::Stringprep::Prohibited::C22, + \...@unicode::Stringprep::Prohibited::C3, + \...@unicode::Stringprep::Prohibited::C4, + \...@unicode::Stringprep::Prohibited::C5, + \...@unicode::Stringprep::Prohibited::C6, + \...@unicode::Stringprep::Prohibited::C7, + \...@unicode::Stringprep::Prohibited::C8, + \...@unicode::Stringprep::Prohibited::C9, + [ + 0x0022, undef, # " + 0x0026, undef, # & + 0x0027, undef, # ' + 0x002F, undef, # / + 0x003A, undef, # : + 0x003C, undef, # < + 0x003E, undef, # > + 0x0040, undef, # @ + ] + ], + 1, +); +my $nameprep = Unicode::Stringprep->new( + 3.2, + [ + \...@unicode::Stringprep::Mapping::B1, + \...@unicode::Stringprep::Mapping::B2, + ], + 'KC', + [ + \...@unicode::Stringprep::Prohibited::C12, + \...@unicode::Stringprep::Prohibited::C22, + \...@unicode::Stringprep::Prohibited::C3, + \...@unicode::Stringprep::Prohibited::C4, + \...@unicode::Stringprep::Prohibited::C5, + \...@unicode::Stringprep::Prohibited::C6, + \...@unicode::Stringprep::Prohibited::C7, + \...@unicode::Stringprep::Prohibited::C8, + \...@unicode::Stringprep::Prohibited::C9, + ], + 1, +); +my $resourceprep = Unicode::Stringprep->new( + 3.2, + [ + \...@unicode::Stringprep::Mapping::B1, + ], + 'KC', + [ + \...@unicode::Stringprep::Prohibited::C11, + \...@unicode::Stringprep::Prohibited::C12, + \...@unicode::Stringprep::Prohibited::C21, + \...@unicode::Stringprep::Prohibited::C22, + \...@unicode::Stringprep::Prohibited::C3, + \...@unicode::Stringprep::Prohibited::C4, + \...@unicode::Stringprep::Prohibited::C5, + \...@unicode::Stringprep::Prohibited::C6, + \...@unicode::Stringprep::Prohibited::C7, + \...@unicode::Stringprep::Prohibited::C8, + \...@unicode::Stringprep::Prohibited::C9, + ], + 1, +); + # returns DJabberd::JID object, or undef on failure due to invalid format sub new { @@ -29,7 +109,16 @@ sub new { (?: /(.{1,1023}) )? # $3: optional resource $!x; - return bless [ $1, $2, $3 ], $_[0]; + # Stringprep uses regexes, so store these away first + my ($node, $host, $res) = ($1, $2, $3); + + return eval { + bless [ + defined $node ? $nodeprep->($node) : undef, + $nameprep->($host), + defined $res ? $resourceprep->($res) : undef, + ], $_[0] + }; } sub is_bare { -- 1.6.2.1.321.g7b198.dirty