On Thu, 2009-03-26 at 18:44 -0700, Martin Atkins wrote:
> So when DJabberd parses a JID, it ought to normalize the case in the 
> node and domain parts. It must presumably do this in a way defined in 
> stringprep rather than relying on Perl's in-built ability to turn 
> strings to lowercase.
> 
> Is this correct? (and does anyone want to send in a patch? :) )

That sounds correct.  Attached is a patch, which _does_ add a dep of
Unicode::Stringprep, but is probably as faithful of an implementation of
the spec as we might be able to get.
 - Alex
>From 958320315b7e7a3e5c4e8c9cdfb3809d6a90ecee Mon Sep 17 00:00:00 2001
From: Alex Vandiver <ale...@mit.edu>
Date: Thu, 26 Mar 2009 21:24:28 -0400
Subject: [PATCH] Do stringprep, as specified by the RFC, on JID object creation

---
 DJabberd/Makefile.PL         |    1 +
 DJabberd/lib/DJabberd/JID.pm |   91 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 91 insertions(+), 1 deletions(-)

diff --git a/DJabberd/Makefile.PL b/DJabberd/Makefile.PL
index 6de3e23..cfc06e2 100644
--- a/DJabberd/Makefile.PL
+++ b/DJabberd/Makefile.PL
@@ -14,6 +14,7 @@ WriteMakefile(
                   'Net::SSLeay'                  => 0,
                   'Log::Log4perl'                => 0,
                   'Digest::HMAC_SHA1'            => 0,
+                  'Unicode::Stringprep'          => 0,
               },
               clean      => { FILES => 't/log/*' },
               AUTHOR     => 'Brad Fitzpatrick <b...@danga.com>',
diff --git a/DJabberd/lib/DJabberd/JID.pm b/DJabberd/lib/DJabberd/JID.pm
index 3959ee5..c1dd0c3 100644
--- a/DJabberd/lib/DJabberd/JID.pm
+++ b/DJabberd/lib/DJabberd/JID.pm
@@ -12,6 +12,86 @@ use constant RES        => 2;
 use constant AS_STRING  => 3;
 use constant AS_BSTRING => 4;
 use constant AS_STREXML => 5;
+use constant CANONICAL  => 6;
+
+# Stringprep functions for converting to canonical form
+use Unicode::Stringprep;
+use Unicode::Stringprep::Mapping;
+use Unicode::Stringprep::Prohibited;
+my $nodeprep = Unicode::Stringprep->new(
+    3.2,
+    [
+        \...@unicode::Stringprep::Mapping::B1,
+        \...@unicode::Stringprep::Mapping::B2,
+    ],
+    'KC',
+    [
+        \...@unicode::Stringprep::Prohibited::C11,
+        \...@unicode::Stringprep::Prohibited::C12,
+        \...@unicode::Stringprep::Prohibited::C21,
+        \...@unicode::Stringprep::Prohibited::C22,
+        \...@unicode::Stringprep::Prohibited::C3,
+        \...@unicode::Stringprep::Prohibited::C4,
+        \...@unicode::Stringprep::Prohibited::C5,
+        \...@unicode::Stringprep::Prohibited::C6,
+        \...@unicode::Stringprep::Prohibited::C7,
+        \...@unicode::Stringprep::Prohibited::C8,
+        \...@unicode::Stringprep::Prohibited::C9,
+        [
+            0x0022, undef, # "
+            0x0026, undef, # &
+            0x0027, undef, # '
+            0x002F, undef, # /
+            0x003A, undef, # :
+            0x003C, undef, # <
+            0x003E, undef, # >
+            0x0040, undef, # @
+        ]
+    ],
+    1,
+);
+my $nameprep = Unicode::Stringprep->new(
+    3.2,
+    [
+        \...@unicode::Stringprep::Mapping::B1,
+        \...@unicode::Stringprep::Mapping::B2,
+    ],
+    'KC',
+    [
+        \...@unicode::Stringprep::Prohibited::C12,
+        \...@unicode::Stringprep::Prohibited::C22,
+        \...@unicode::Stringprep::Prohibited::C3,
+        \...@unicode::Stringprep::Prohibited::C4,
+        \...@unicode::Stringprep::Prohibited::C5,
+        \...@unicode::Stringprep::Prohibited::C6,
+        \...@unicode::Stringprep::Prohibited::C7,
+        \...@unicode::Stringprep::Prohibited::C8,
+        \...@unicode::Stringprep::Prohibited::C9,
+    ],
+    1,
+);
+my $resourceprep = Unicode::Stringprep->new(
+    3.2,
+    [
+        \...@unicode::Stringprep::Mapping::B1,
+    ],
+    'KC',
+    [
+        \...@unicode::Stringprep::Prohibited::C11,
+        \...@unicode::Stringprep::Prohibited::C12,
+        \...@unicode::Stringprep::Prohibited::C21,
+        \...@unicode::Stringprep::Prohibited::C22,
+        \...@unicode::Stringprep::Prohibited::C3,
+        \...@unicode::Stringprep::Prohibited::C4,
+        \...@unicode::Stringprep::Prohibited::C5,
+        \...@unicode::Stringprep::Prohibited::C6,
+        \...@unicode::Stringprep::Prohibited::C7,
+        \...@unicode::Stringprep::Prohibited::C8,
+        \...@unicode::Stringprep::Prohibited::C9,
+    ],
+    1,
+);
+
 
 # returns DJabberd::JID object, or undef on failure due to invalid format
 sub new {
@@ -29,7 +109,16 @@ sub new {
            (?: /(.{1,1023})   )?                                           # $3: optional resource
            $!x;
 
-    return bless [ $1, $2, $3 ], $_[0];
+    # Stringprep uses regexes, so store these away first
+    my ($node, $host, $res) = ($1, $2, $3);
+
+    return eval {
+        bless [
+            defined $node ? $nodeprep->($node) : undef,
+            $nameprep->($host),
+            defined $res  ? $resourceprep->($res) : undef,
+        ], $_[0]
+    };
 }
 
 sub is_bare {
-- 
1.6.2.1.321.g7b198.dirty

Reply via email to