Jens Thoms Toerring
Tue, 26 Aug 2003 16:52:06 +0000
This is the patch for treating robots.txt entries like User-agent: Teleport*
as not applying to aspseek.
Regards, Jens
--
Freie Universitaet Berlin Jens Thoms Toerring
Universitaetsbibliothek
Webteam Tel: 0049 30 838 56055
Garystrasse 39 Fax: 0049 30 838 53738
14195 Berlin e-mail: [EMAIL PROTECTED]
--- aspseek-orig/src/wcache.cpp 2003-08-20 14:28:47.000000000 +0200
+++ aspseek-my/src/wcache.cpp 2003-08-26 18:32:40.000000000 +0200
@@ -115,22 +115,26 @@
{
/* Skip comment */
}
- else if (!(STRNCASECMP(s, "User-Agent:")))
- {
- myrule = 0;
- if (strstr(s + 11, "*"))
- myrule = 1;
- else
- {
- /* case insensitive substring match */
- e = s + 11;
- while (*e++ != '\0')
- *e = tolower(*e);
- if (strstr(s + 11, USER_AGENT_LC))
- myrule = 1;
- }
-
- }
+ else if (!(STRNCASECMP(s, "User-Agent:")))
+ {
+ myrule = 0;
+
+ for ( e = s + 11; *e != '\0'; e++ )
+ *e = tolower( *e );
+
+ for ( e = s + 11; isspace ( *e ); e++ )
+ /* empty */;
+
+ /* Don't index if we have a name string that either is identical
+ to our name or starts with a star or which has a star after a
+ set of letters that fit the begnning of our name. */
+
+ if ( ! strcmp( e, USER_AGENT_LC ) ||
+ ( ( where = strchr( e, '*' ) ) != NULL &&
+ ( *( where - 1 ) == ':' || isspace( *( where - 1 ) ) ||
+ ! strncmp( e, USER_AGENT_LC, where - e ) ) ) )
+ myrule = 1;
+ }
else if ((!(STRNCASECMP(s, "Disallow"))) && myrule)
{
if ((e = strchr(s + 9, '#'))) *e = 0;