Update of /cvsroot/mahogany/M/src/util
In directory usw-pr-cvs1:/tmp/cvs-serv16265/src/util

Modified Files:
        matchurl.cpp 
Log Message:
another heuristical test to avoid false positives with mailto URL matching

Index: matchurl.cpp
===================================================================
RCS file: /cvsroot/mahogany/M/src/util/matchurl.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -u -2 -r1.4 -r1.5
--- matchurl.cpp        26 Apr 2002 10:41:05 -0000      1.4
+++ matchurl.cpp        26 Apr 2002 18:01:37 -0000      1.5
@@ -410,10 +410,13 @@
       return -1;
 
-   // there are 2 different cases: a mailto: URL or a mail address and
-   // anything else which we need to treat differently
+   // the provisional start and end of the URL, will be changed below
    const char *start = text + pos;
    const char *p = start + len;
 
-   if ( *start == '@' )
+   // there are 2 different cases: a mailto: URL or a mail address and
+   // anything else which we need to treat differently
+   bool isMail = *start == '@';
+
+   if ( isMail )
    {
       // look for the start of the address
@@ -484,13 +487,15 @@
    len = p - start;
 
-   // '@' matches may result in false positives, as not every '@' character is
-   // inside a mailto URL so try to weed them out by requiring that the mail
-   // address has a reasonable minimal length ("[EMAIL PROTECTED]" is probably the
-   // shortest we can have, hence 10) which at least avoids matching the bare
-   // '@'s
+   if ( isMail )
+   {
+      // '@' matches may result in false positives, as not every '@' character
+      // is inside a mailto URL so try to weed them out by requiring that the
+      // mail address has a reasonable minimal length ("[EMAIL PROTECTED]" is probably
+      // the shortest we can have, hence 10) which at least avoids matching the
+      // bare '@'s
    //
-   // NB: we do it here and not inside "if ( '@' )" branch as we want to do it
-   //     after removing trailing punctuation
-   if ( len < 10 )
+      // also check that we have at least one dot in the domain part, otherwise
+      // it probably isn't an address neither
+      if ( (len < 10) || !memchr(text + pos + 1, '.', p - text - pos - 1) )
    {
       int offDiff = pos + len + 1;
@@ -500,4 +505,5 @@
       // slightly more efficient than recursion...
       goto match;
+      }
    }
 


_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates

Reply via email to