Author: orbiter
Date: 2008-02-03 13:40:40 +0100 (Sun, 03 Feb 2008)
New Revision: 4434

Modified:
   trunk/source/de/anomic/plasma/plasmaWordIndex.java
   trunk/source/de/anomic/yacy/yacyClient.java
Log:
added another link double-check

Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndex.java  2008-02-03 11:21:50 UTC 
(rev 4433)
+++ trunk/source/de/anomic/plasma/plasmaWordIndex.java  2008-02-03 12:40:40 UTC 
(rev 4434)
@@ -51,6 +51,7 @@
 import de.anomic.kelondro.kelondroMergeIterator;
 import de.anomic.kelondro.kelondroOrder;
 import de.anomic.kelondro.kelondroRotateIterator;
+import de.anomic.kelondro.kelondroRowSet;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverMemory;
 import de.anomic.server.logging.serverLog;
@@ -361,6 +362,27 @@
                 container.addAllUnique(collections.getContainer(wordHash, 
urlselection));
             }
         }
+        
+        // check doubles
+        int beforeDouble = container.size();
+        ArrayList<kelondroRowSet> d = container.removeDoubles();
+        kelondroRowSet set;
+        for (int i = 0; i < d.size(); i++) {
+            // for each element in the double-set, take that one that is the 
most recent one
+            set = d.get(i);
+            indexRWIRowEntry e, elm = null;
+            long lm = 0;
+            for (int j = 0; j < set.size(); j++) {
+                e = new indexRWIRowEntry(set.get(j));
+                if ((elm == null) || (e.lastModified() > lm)) {
+                    elm = e;
+                    lm = e.lastModified();
+                }
+            }
+            container.addUnique(elm.toKelondroEntry());
+        }
+        if (container.size() < beforeDouble) System.out.println("*** DEBUG 
DOUBLECHECK - removed " + (beforeDouble - container.size()) + " index entries 
from word container " + container.getWordHash());
+
         return container;
     }
 

Modified: trunk/source/de/anomic/yacy/yacyClient.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyClient.java 2008-02-03 11:21:50 UTC (rev 
4433)
+++ trunk/source/de/anomic/yacy/yacyClient.java 2008-02-03 12:40:40 UTC (rev 
4434)
@@ -572,11 +572,6 @@
             }
         }
         
-               // insert the containers to the index
-        for (int m = 0; m < words; m++) {
-            wordIndex.addEntries(container[m], true);
-               }
-        
                // read index abstract
                if (abstractCache != null) {
                        Iterator<Map.Entry<String, String>> i = 
result.entrySet().iterator();
@@ -600,7 +595,12 @@
                        }
                }
 
-               // generate statistics
+               // insert the containers to the index
+        for (int m = 0; m < words; m++) {
+            wordIndex.addEntries(container[m], true);
+        }
+        
+        // generate statistics
                long searchtime;
                try {
                        searchtime = Integer.parseInt((String) 
result.get("searchtime"));

_______________________________________________
YaCy-svn mailing list
YaCy-svn@lists.berlios.de
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an