Author: orbiter
Date: 2008-01-22 21:49:26 +0100 (Tue, 22 Jan 2008)
New Revision: 4369

Modified:
   trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java
   trunk/source/de/anomic/yacy/yacyURL.java
Log:
patch for http://forum.yacy-websuche.de/viewtopic.php?p=4597#p4597
(urls that have no protocol but start with www will be treated as http://www...

Modified: trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java      2008-01-22 
20:44:12 UTC (rev 4368)
+++ trunk/source/de/anomic/plasma/plasmaCrawlBalancer.java      2008-01-22 
20:49:26 UTC (rev 4369)
@@ -465,10 +465,12 @@
         assert delta >= 0: "delta = " + delta;
         int s = urlFileIndex.size();
         kelondroRow.Entry rowEntry = urlFileIndex.remove(result.getBytes(), 
false);
-        assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + 
urlFileIndex.size() + ", s = " + s + ", result = " + result;
+        assert (rowEntry != null) && (urlFileIndex.size() + 1 == s) : 
"urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + 
result;
         if (rowEntry == null) {
             serverLog.logSevere("PLASMA BALANCER", "get() found a valid 
urlhash, but failed to fetch the corresponding url entry - total size = " + 
size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " 
+ urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size());
             return null;
+        } else {
+            assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + 
urlFileIndex.size() + ", s = " + s + ", result = " + result;
         }
         plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry);
         long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : 
minimumGlobalDelta;

Modified: trunk/source/de/anomic/yacy/yacyURL.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyURL.java    2008-01-22 20:44:12 UTC (rev 
4368)
+++ trunk/source/de/anomic/yacy/yacyURL.java    2008-01-22 20:49:26 UTC (rev 
4369)
@@ -403,7 +403,14 @@
         assert (url != null);
         url = url.trim();
         int p = url.indexOf(':');
-        if (p < 0) throw new MalformedURLException("protocol is not given in 
'" + url + "'");
+        if (p < 0) {
+            if (url.startsWith("www.")) {
+                url = "http://"; + url;
+                p = 4;
+            } else {
+                throw new MalformedURLException("protocol is not given in '" + 
url + "'");
+            }
+        }
         this.protocol = url.substring(0, p).toLowerCase().trim();
         if (url.length() < p + 4) throw new MalformedURLException("URL not 
parseable: '" + url + "'");
         if (url.substring(p + 1, p + 3).equals("//")) {

_______________________________________________
YaCy-svn mailing list
YaCy-svn@lists.berlios.de
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an