Author: orbiter
Date: 2008-02-03 13:40:40 +0100 (Sun, 03 Feb 2008)
New Revision: 4434
Modified:
trunk/source/de/anomic/plasma/plasmaWordIndex.java
trunk/source/de/anomic/yacy/yacyClient.java
Log:
added another link double-check
Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-02-03 11:21:50 UTC
(rev 4433)
+++ trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-02-03 12:40:40 UTC
(rev 4434)
@@ -51,6 +51,7 @@
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRotateIterator;
+import de.anomic.kelondro.kelondroRowSet;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverMemory;
import de.anomic.server.logging.serverLog;
@@ -361,6 +362,27 @@
container.addAllUnique(collections.getContainer(wordHash,
urlselection));
}
}
+
+ // check doubles
+ int beforeDouble = container.size();
+ ArrayList<kelondroRowSet> d = container.removeDoubles();
+ kelondroRowSet set;
+ for (int i = 0; i < d.size(); i++) {
+ // for each element in the double-set, take that one that is the
most recent one
+ set = d.get(i);
+ indexRWIRowEntry e, elm = null;
+ long lm = 0;
+ for (int j = 0; j < set.size(); j++) {
+ e = new indexRWIRowEntry(set.get(j));
+ if ((elm == null) || (e.lastModified() > lm)) {
+ elm = e;
+ lm = e.lastModified();
+ }
+ }
+ container.addUnique(elm.toKelondroEntry());
+ }
+ if (container.size() < beforeDouble) System.out.println("*** DEBUG
DOUBLECHECK - removed " + (beforeDouble - container.size()) + " index entries
from word container " + container.getWordHash());
+
return container;
}
Modified: trunk/source/de/anomic/yacy/yacyClient.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyClient.java 2008-02-03 11:21:50 UTC (rev
4433)
+++ trunk/source/de/anomic/yacy/yacyClient.java 2008-02-03 12:40:40 UTC (rev
4434)
@@ -572,11 +572,6 @@
}
}
- // insert the containers to the index
- for (int m = 0; m < words; m++) {
- wordIndex.addEntries(container[m], true);
- }
-
// read index abstract
if (abstractCache != null) {
Iterator<Map.Entry<String, String>> i =
result.entrySet().iterator();
@@ -600,7 +595,12 @@
}
}
- // generate statistics
+ // insert the containers to the index
+ for (int m = 0; m < words; m++) {
+ wordIndex.addEntries(container[m], true);
+ }
+
+ // generate statistics
long searchtime;
try {
searchtime = Integer.parseInt((String)
result.get("searchtime"));
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn