Update of /cvsroot/mahogany/M/src/util
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4853/src/util
Modified Files:
matchurl.cpp
Log Message:
more tweaks to avoid false positives:
- don't count '(' and ')' as part of URLs (this is legal but rare)
- require at least 2 dots in a non-mailto URL
Index: matchurl.cpp
===================================================================
RCS file: /cvsroot/mahogany/M/src/util/matchurl.cpp,v
retrieving revision 1.31
retrieving revision 1.32
diff -b -u -2 -r1.31 -r1.32
--- matchurl.cpp 21 Mar 2004 21:37:25 -0000 1.31
+++ matchurl.cpp 14 Jul 2004 18:19:11 -0000 1.32
@@ -430,5 +430,8 @@
{
return c == '-' || c == '_' || c == '.' || c == '!' || c == '~' ||
- c == '*' || c == '\'' || c == '(' || c == ')';
+ c == '*'
+ // these don't appear in the URLs in practice
+ //|| c == '\'' || c == '(' || c == ')'
+ ;
}
@@ -759,11 +762,24 @@
// '@' matches may result in false positives, as not every '@' character
// is inside a mailto URL so try to weed them out by requiring that the
- // mail address has a reasonable minimal length ("[EMAIL PROTECTED]" is probably
- // the shortest we can have, hence 10) which at least avoids matching the
- // bare '@'s
+ // mail address has a reasonable minimal length ("[EMAIL PROTECTED]" and
+ // "www.xyz.fr" are probably the shortest ones we can have, hence 10) which
+ // at least avoids matching the bare '@'s
//
- // also check that we have at least one dot in the domain part, otherwise
- // it probably isn't an address/URL neither
- if ( (p - start < 10) || !memchr(text + pos + 1, '.', p - text - pos - 1) )
+ // also check that we have at least one dot in the domain part for the mail
+ // addresses and two dots for the other URLs, otherwise it probably isn't an
+ // address/URL neither
+ bool good = (p - start) >= 10;
+
+ if ( good )
+ {
+ const char *
+ pDot = (char *)memchr(text + pos + 1, '.', p - text - pos - 1);
+ if ( !pDot )
+ good = false;
+ else if ( !isMail )
+ good = memchr(pDot + 1, '.', p - pDot - 1) != NULL;
+ }
+
+ if ( !good )
{
int offDiff = pos + len + 1;
-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=4721&alloc_id=10040&op=click
_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates