Author: hydrox
Date: 2006-03-12 19:08:48 +0100 (Sun, 12 Mar 2006)
New Revision: 1879
Modified:
trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
trunk/source/de/anomic/plasma/plasmaURLPattern.java
Log:
*)URL-Cleaner: moved logging-statement to correct position
*)plasmaURLPattern: host is now added to the hashset in lowercase
Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2006-03-11 21:55:32 UTC
(rev 1878)
+++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2006-03-12 18:08:48 UTC
(rev 1879)
@@ -816,12 +816,12 @@
lastBlacklistedHash = entry.hash();
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls +
" blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " +
entry.hash() + " " + entry.url());
remove(entry.hash());
+ if (blacklistedUrls % 100 == 0) {
+ serverLog.logInfo("URLDBCLEANER", "Deleted " +
blacklistedUrls + " URLs until now. Last deleted URL-Hash: " +
lastBlacklistedUrl);
+ }
}
lastUrl = entry.url().toString();
lastHash = entry.hash();
- if (blacklistedUrls % 100 == 0 && blacklistedUrls != 0) {
- serverLog.logInfo("URLDBCLEANER", "Deleted " +
blacklistedUrls + " URLs until now. Last deleted URL-Hash: " +
lastBlacklistedUrl);
- }
}
} catch (RuntimeException e) {
if (e.getMessage().indexOf("not found in LURL") != -1) {
Modified: trunk/source/de/anomic/plasma/plasmaURLPattern.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaURLPattern.java 2006-03-11 21:55:32 UTC
(rev 1878)
+++ trunk/source/de/anomic/plasma/plasmaURLPattern.java 2006-03-12 18:08:48 UTC
(rev 1879)
@@ -81,7 +81,7 @@
public void add(String host, String path) {
if (path.length() > 0 && path.charAt(0) == '/') path =
path.substring(1);
- hostpaths.put(host, path);
+ hostpaths.put(host.toLowerCase(), path);
}
public boolean isListed(String hostlow, String path) {
_______________________________________________
YaCy-svn mailing list
[email protected]
http://lists.berlios.de/mailman/listinfo/yacy-svn