Update of /cvsroot/mahogany/M/src/util
In directory usw-pr-cvs1:/tmp/cvs-serv30874/src/util
Modified Files:
matchurl.cpp
Log Message:
1. match URLs at the start of the line properly (first letter was skipped)
2. don't match single '@' and other stuff containing '@' but not looking
like a valid email address
Index: matchurl.cpp
===================================================================
RCS file: /cvsroot/mahogany/M/src/util/matchurl.cpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -u -2 -r1.3 -r1.4
--- matchurl.cpp 25 Apr 2002 20:17:01 -0000 1.3
+++ matchurl.cpp 26 Apr 2002 10:41:05 -0000 1.4
@@ -403,10 +403,13 @@
int URLDetector::FindURL(const char *text, int& len)
{
+ int offset = 0;
+
+match:
int pos = scan(text, len);
if ( !len )
return -1;
- // there are 2 different cases: a mailto: URL or a mail address and anything
- // else which we need to treat differently
+ // there are 2 different cases: a mailto: URL or a mail address and
+ // anything else which we need to treat differently
const char *start = text + pos;
const char *p = start + len;
@@ -423,7 +426,11 @@
if ( !hasAngleBracket )
{
+ if ( !IsLocalPartChar(*start) )
+ {
// we went too far backwards
start++;
}
+ //else: we stopped at the start of the text
+ }
//else: keep '<' as part of the URL
@@ -434,6 +441,6 @@
}
- // finally we should either have the brackets from both sides or none at
- // all
+ // finally we should either have the brackets from both sides or none
+ // at all
if ( hasAngleBracket )
{
@@ -465,5 +472,6 @@
}
- // continue on the next line and no need to test the first character
+ // continue on the next line and no need to test the first
+ // character
p += 3;
}
@@ -476,5 +484,23 @@
len = p - start;
- return start - text;
+ // '@' matches may result in false positives, as not every '@' character is
+ // inside a mailto URL so try to weed them out by requiring that the mail
+ // address has a reasonable minimal length ("[EMAIL PROTECTED]" is probably the
+ // shortest we can have, hence 10) which at least avoids matching the bare
+ // '@'s
+ //
+ // NB: we do it here and not inside "if ( '@' )" branch as we want to do it
+ // after removing trailing punctuation
+ if ( len < 10 )
+ {
+ int offDiff = pos + len + 1;
+ offset += offDiff;
+ text += offDiff;
+
+ // slightly more efficient than recursion...
+ goto match;
+ }
+
+ return start - text + offset;
}
_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates