Author: orbiter Date: 2008-01-22 21:49:26 +0100 (Tue, 22 Jan 2008) New Revision: 4369
Modified: trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java trunk/source/de/anomic/yacy/yacyURL.java Log: patch for http://forum.yacy-websuche.de/viewtopic.php?p=4597#p4597 (urls that have no protocol but start with www will be treated as http://www... Modified: trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java 2008-01-22 20:44:12 UTC (rev 4368) +++ trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java 2008-01-22 20:49:26 UTC (rev 4369) @@ -465,10 +465,12 @@ assert delta >= 0: "delta = " + delta; int s = urlFileIndex.size(); kelondroRow.Entry rowEntry = urlFileIndex.remove(result.getBytes(), false); - assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; + assert (rowEntry != null) && (urlFileIndex.size() + 1 == s) : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; if (rowEntry == null) { serverLog.logSevere("PLASMA BALANCER", "get() found a valid urlhash, but failed to fetch the corresponding url entry - total size = " + size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " + urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size()); return null; + } else { + assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; } plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry); long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta; Modified: trunk/source/de/anomic/yacy/yacyURL.java =================================================================== --- trunk/source/de/anomic/yacy/yacyURL.java 2008-01-22 20:44:12 UTC (rev 4368) +++ trunk/source/de/anomic/yacy/yacyURL.java 2008-01-22 20:49:26 UTC (rev 4369) @@ -403,7 +403,14 @@ assert (url != null); url = url.trim(); int p = url.indexOf(':'); - if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'"); + if (p < 0) { + if (url.startsWith("www.")) { + url = "http://" + url; + p = 4; + } else { + throw new MalformedURLException("protocol is not given in '" + url + "'"); + } + } this.protocol = url.substring(0, p).toLowerCase().trim(); if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'"); if (url.substring(p + 1, p + 3).equals("//")) { _______________________________________________ YaCy-svn mailing list YaCy-svn@lists.berlios.de https://lists.berlios.de/mailman/listinfo/yacy-svn