plasma

orbiter at BerliOS Tue, 14 Mar 2006 15:25:50 -0800

Author: orbiter
Date: 2006-03-15 00:22:49 +0100 (Wed, 15 Mar 2006)
New Revision: 1888


Modified:
   trunk/source/de/anomic/plasma/plasmaDHTChunk.java
   trunk/source/de/anomic/plasma/plasmaWordIndex.java
Log:
try to fix new 100% cpu bug, possibly caused by iterator method
see http://www.yacy-forum.de/viewtopic.php?p=18900#18900

Modified: trunk/source/de/anomic/plasma/plasmaDHTChunk.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaDHTChunk.java   2006-03-14 22:56:41 UTC 
(rev 1887)
+++ trunk/source/de/anomic/plasma/plasmaDHTChunk.java   2006-03-14 23:22:49 UTC 
(rev 1888)
@@ -172,6 +172,82 @@
     private int  selectTransferContainersResource(String hash, int 
resourceLevel, int maxcount) {
         // the hash is a start hash from where the indexes are picked
         ArrayList tmpContainers = new ArrayList(maxcount);
+        try {
+            String[] wordHashes = wordIndex.wordHashes(hash, resourceLevel, 
true, maxcount);
+            plasmaWordIndexEntryContainer indexContainer;
+            Iterator urlIter;
+            plasmaWordIndexEntry indexEntry;
+            plasmaCrawlLURL.Entry lurl;
+            int refcount = 0;
+
+            urlCache = new HashMap();
+            while ((maxcount > refcount) && ((tmpContainers.size() == 0) || 
(yacyDHTAction.dhtDistance(wordHashes[refcount], 
((plasmaWordIndexEntryContainer) tmpContainers.get(0)).wordHash()) < 0.2))) {
+                // make an on-the-fly entity and insert values
+                indexContainer = wordIndex.getContainer(wordHashes[refcount], 
true, 10000);
+                int notBoundCounter = 0;
+                try {
+                    urlIter = indexContainer.entries();
+                    // iterate over indexes to fetch url entries and store 
them in the urlCache
+                    while ((urlIter.hasNext()) && (maxcount > refcount)) {
+                        indexEntry = (plasmaWordIndexEntry) urlIter.next();
+                        try {
+                            lurl = lurls.getEntry(indexEntry.getUrlHash(), 
indexEntry);
+                            if ((lurl == null) || (lurl.url() == null)) {
+                                notBoundCounter++;
+                                urlIter.remove();
+                                wordIndex.removeEntries(wordHashes[refcount], 
new String[] { indexEntry.getUrlHash() }, true);
+                            } else {
+                                urlCache.put(indexEntry.getUrlHash(), lurl);
+                                refcount++;
+                            }
+                        } catch (IOException e) {
+                            notBoundCounter++;
+                            urlIter.remove();
+                            wordIndex.removeEntries(wordHashes[refcount], new 
String[] { indexEntry.getUrlHash() }, true);
+                        }
+                    }
+
+                    // remove all remaining; we have enough
+                    while (urlIter.hasNext()) {
+                        indexEntry = (plasmaWordIndexEntry) urlIter.next();
+                        urlIter.remove();
+                    }
+
+                    // use whats left
+                    log.logFine("Selected partial index (" + 
indexContainer.size() + " from " + wordIndex.indexSize(wordHashes[refcount-1]) 
+ " URLs, " + notBoundCounter + " not bound) for word " + 
indexContainer.wordHash());
+                    tmpContainers.add(indexContainer);
+                } catch (kelondroException e) {
+                    log.logSevere("plasmaWordIndexDistribution/2: deleted DB 
for word " + wordHashes[refcount], e);
+                    wordIndex.deleteIndex(wordHashes[refcount]);
+                }
+            }
+            // create result
+            indexContainers = (plasmaWordIndexEntryContainer[]) 
tmpContainers.toArray(new plasmaWordIndexEntryContainer[tmpContainers.size()]);
+
+            if ((indexContainers == null) || (indexContainers.length == 0)) {
+                log.logFine("No index available for index transfer, hash 
start-point " + startPointHash);
+                this.status = chunkStatus_FAILED;
+                return 0;
+            }
+
+            this.status = chunkStatus_FILLED;
+            
+            return refcount;
+        } catch (kelondroException e) {
+            log.logSevere("selectTransferIndexes database corrupted: " + 
e.getMessage(), e);
+            indexContainers = new plasmaWordIndexEntryContainer[0];
+            urlCache = new HashMap();
+            
+            this.status = chunkStatus_FAILED;
+            
+            return 0;
+        }
+    }
+
+    /*
+    private int  selectTransferContainersResource(String hash, int 
resourceLevel, int maxcount) {
+        // the hash is a start hash from where the indexes are picked
+        ArrayList tmpContainers = new ArrayList(maxcount);
         String nexthash = "";
         try {
             Iterator wordHashIterator = wordIndex.wordHashes(hash, 
resourceLevel, true);
@@ -245,7 +321,8 @@
             return 0;
         }
     }
-
+    */
+    
     public int deleteTransferIndexes() {
         Iterator urlIter;
         plasmaWordIndexEntry indexEntry;

Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndex.java  2006-03-14 22:56:41 UTC 
(rev 1887)
+++ trunk/source/de/anomic/plasma/plasmaWordIndex.java  2006-03-14 23:22:49 UTC 
(rev 1888)
@@ -380,6 +380,22 @@
     public static final int RL_ASSORTMENTS = 2;
     public static final int RL_WORDFILES   = 3;
     
+    public synchronized String[] wordHashes(String startHash, int 
resourceLevel, boolean rot, int count) {
+        String[] hashes = new String[count];
+        Iterator i = wordHashes(startHash, resourceLevel, rot);
+        int j = 0;
+        while ((count-- > 0) && (i.hasNext())) {
+            hashes[j++] = (String) i.next();
+        }
+        if (count > 0) {
+            String[] s = new String[j];
+            System.arraycopy(hashes, 0, s, 0, j);
+            return s;
+        } else {
+            return hashes;
+        }
+    }
+    
     public Iterator wordHashes(String startHash, int resourceLevel, boolean 
rot) {
         if (rot) return new rotatingWordIterator(startHash, resourceLevel);
         else return new correctedWordIterator(startHash, resourceLevel, rot); 
// use correction until bug is found

_______________________________________________
YaCy-svn mailing list
[email protected]
http://lists.berlios.de/mailman/listinfo/yacy-svn

[YaCy-svn] r1888 - trunk/source/de/anomic/plasma

Antwort per Email an