Update of /cvsroot/mahogany/M/src/util
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4853/src/util

Modified Files:
        matchurl.cpp 
Log Message:
more tweaks to avoid false positives:
- don't count '(' and ')' as part of URLs (this is legal but rare)
- require at least 2 dots in a non-mailto URL


Index: matchurl.cpp
===================================================================
RCS file: /cvsroot/mahogany/M/src/util/matchurl.cpp,v
retrieving revision 1.31
retrieving revision 1.32
diff -b -u -2 -r1.31 -r1.32
--- matchurl.cpp        21 Mar 2004 21:37:25 -0000      1.31
+++ matchurl.cpp        14 Jul 2004 18:19:11 -0000      1.32
@@ -430,5 +430,8 @@
 {
    return c == '-' || c == '_' || c == '.' || c == '!' || c == '~' ||
-          c == '*' || c == '\'' || c == '(' || c == ')';
+          c == '*'
+          // these don't appear in the URLs in practice
+          //|| c == '\'' || c == '(' || c == ')'
+          ;
 }
 
@@ -759,11 +762,24 @@
    // '@' matches may result in false positives, as not every '@' character
    // is inside a mailto URL so try to weed them out by requiring that the
-   // mail address has a reasonable minimal length ("[EMAIL PROTECTED]" is probably
-   // the shortest we can have, hence 10) which at least avoids matching the
-   // bare '@'s
+   // mail address has a reasonable minimal length ("[EMAIL PROTECTED]" and
+   // "www.xyz.fr" are probably the shortest ones we can have, hence 10) which
+   // at least avoids matching the bare '@'s
    //
-   // also check that we have at least one dot in the domain part, otherwise
-   // it probably isn't an address/URL neither
-   if ( (p - start < 10) || !memchr(text + pos + 1, '.', p - text - pos - 1) )
+   // also check that we have at least one dot in the domain part for the mail
+   // addresses and two dots for the other URLs, otherwise it probably isn't an
+   // address/URL neither
+   bool good = (p - start) >= 10;
+
+   if ( good )
+   {
+      const char *
+         pDot = (char *)memchr(text + pos + 1, '.', p - text - pos - 1);
+      if ( !pDot )
+         good = false;
+      else if ( !isMail )
+         good = memchr(pDot + 1, '.', p - pDot - 1) != NULL;
+   }
+
+   if ( !good )
    {
       int offDiff = pos + len + 1;



-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=4721&alloc_id=10040&op=click
_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates

Reply via email to