Update of /cvsroot/mahogany/M/src/util
In directory usw-pr-cvs1:/tmp/cvs-serv30874/src/util

Modified Files:
        matchurl.cpp 
Log Message:
1. match URLs at the start of the line properly (first letter was skipped)
2. don't match single '@' and other stuff containing '@' but not looking
   like a valid email address


Index: matchurl.cpp
===================================================================
RCS file: /cvsroot/mahogany/M/src/util/matchurl.cpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -u -2 -r1.3 -r1.4
--- matchurl.cpp        25 Apr 2002 20:17:01 -0000      1.3
+++ matchurl.cpp        26 Apr 2002 10:41:05 -0000      1.4
@@ -403,10 +403,13 @@
 int URLDetector::FindURL(const char *text, int& len)
 {
+   int offset = 0;
+
+match:
    int pos = scan(text, len);
    if ( !len )
       return -1;
 
-   // there are 2 different cases: a mailto: URL or a mail address and anything
-   // else which we need to treat differently
+   // there are 2 different cases: a mailto: URL or a mail address and
+   // anything else which we need to treat differently
    const char *start = text + pos;
    const char *p = start + len;
@@ -423,7 +426,11 @@
       if ( !hasAngleBracket )
       {
+         if ( !IsLocalPartChar(*start) )
+         {
          // we went too far backwards
          start++;
       }
+         //else: we stopped at the start of the text
+      }
       //else: keep '<' as part of the URL
 
@@ -434,6 +441,6 @@
       }
 
-      // finally we should either have the brackets from both sides or none at
-      // all
+      // finally we should either have the brackets from both sides or none
+      // at all
       if ( hasAngleBracket )
       {
@@ -465,5 +472,6 @@
          }
 
-         // continue on the next line and no need to test the first character
+         // continue on the next line and no need to test the first
+         // character
          p += 3;
       }
@@ -476,5 +484,23 @@
    len = p - start;
 
-   return start - text;
+   // '@' matches may result in false positives, as not every '@' character is
+   // inside a mailto URL so try to weed them out by requiring that the mail
+   // address has a reasonable minimal length ("[EMAIL PROTECTED]" is probably the
+   // shortest we can have, hence 10) which at least avoids matching the bare
+   // '@'s
+   //
+   // NB: we do it here and not inside "if ( '@' )" branch as we want to do it
+   //     after removing trailing punctuation
+   if ( len < 10 )
+   {
+      int offDiff = pos + len + 1;
+      offset += offDiff;
+      text += offDiff;
+
+      // slightly more efficient than recursion...
+      goto match;
+   }
+
+   return start - text + offset;
 }
 


_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates

Reply via email to