Author: orbiter
Date: 2006-03-13 11:43:12 +0100 (Mon, 13 Mar 2006)
New Revision: 1880

Modified:
   trunk/htroot/PerformanceQueues_p.html
   trunk/htroot/PerformanceQueues_p.java
   trunk/htroot/xml/status_p.java
   trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java
   trunk/source/de/anomic/kelondro/kelondroMScoreCluster.java
   trunk/source/de/anomic/plasma/plasmaSearchEvent.java
   trunk/source/de/anomic/plasma/plasmaSwitchboard.java
   trunk/source/de/anomic/plasma/plasmaWordIndex.java
   trunk/source/de/anomic/plasma/plasmaWordIndexCache.java
   trunk/source/de/anomic/plasma/plasmaWordIndexInterface.java
   trunk/yacy.init
Log:
introduced a second RAM cache for DHT transfer

Modified: trunk/htroot/PerformanceQueues_p.html
===================================================================
--- trunk/htroot/PerformanceQueues_p.html       2006-03-12 18:08:48 UTC (rev 
1879)
+++ trunk/htroot/PerformanceQueues_p.html       2006-03-13 10:43:12 UTC (rev 
1880)
@@ -69,53 +69,59 @@
 <div class=small><b>Indexing Cache Settings:</b></div>
 <form action="PerformanceQueues_p.html" method="post" 
enctype="multipart/form-data">
 <table border="0" cellpadding="5" cellspacing="1" width="100%">
+  <tr valign="top" class="TableHeader">
+    <td class=small>Cache Type</td>
+    <td class=small>Indexing</td>
+    <td class=small>DHT</td>
+    <td class=small>Description</td>
+  </tr>
   <tr valign="top" class="TableCellDark">
-    <td class=small>Words in RAM Cache:</td>
-    <td class=small>#[wordCacheRAMSize]#</td>
+    <td class=small>Words in RAM cache:</td>
+    <td class=small>#[wordCacheWSize]#</td>
+    <td class=small>#[wordCacheKSize]#</td>
     <td class=small>
-    This is the current size of the word cache.
-    The smaller this number, the faster the shut-down procedure will be.
-    The maximum of this cache can be set below.
+    This is the current size of the word caches.
+    The indexing cache speeds up the indexing process, the DHT cache holds 
indexes temporary for approval.
+    The maximum of this caches can be set below.
     </td>
   </tr>
   <tr valign="top" class="TableCellDark">
     <td class=small>Maximum URLs currently assigned<br>to one cached word:</td>
-    <td class=small>#[maxURLinWordCache]#</td>
+    <td class=small>#[maxURLinWCache]#</td>
+    <td class=small>not controlled<br>for DHT cache</td>
     <td class=small>
     This is the maximum size of URLs assigned to a single word cache entry.
     If this is a big number, it shows that the caching works efficiently.
     </td>
   </tr>
   <tr valign="top" class="TableCellDark">
-    <td class=small>Maximum Age of Word in cache:</td>
-    <td class=small>#[maxAgeOfWordCache]#</td>
+    <td class=small>Maximum age of a word:</td>
+    <td class=small>#[maxAgeOfWCache]#</td>
+    <td class=small>#[maxAgeOfKCache]#</td>
     <td class=small>
-    This is the maximum age of a word index that is in the RAM cache in 
minutes.
+    This is the maximum age of a word in an index in minutes.
     </td>
   </tr>
   <tr valign="top" class="TableCellDark">
-    <td class=small>Minimum Age of Word in cache:</td>
-    <td class=small>#[minAgeOfWordCache]#</td>
+    <td class=small>Minimum age of a word:</td>
+    <td class=small>#[minAgeOfWCache]#</td>
+    <td class=small>#[minAgeOfKCache]#</td>
     <td class=small>
-    This is the minimum age of a word index that is in the RAM cache in 
minutes.
+    This is the minimum age of a word in an index in minutes.
     </td>
   </tr>
   <tr valign="top" class="TableCellDark">
-    <td class=small>Maximum number of Word Caches, low limit:</td>
-    <td class=small><input name="wordCacheMaxLow" type="text" size="20" 
maxlength="100" value="#[wordCacheMaxLow]#"></td>
+    <td class=small>Maximum number of words in cache:</td>
+    <td class=small><input name="wordCacheMaxCount" type="text" size="20" 
maxlength="100" value="#[wordCacheMaxCount]#"></td>
+    <td class=small>cannot be set for DHT</td>
     <td class=small rowspan="2">
     This is is the number of word indexes that shall be held in the
     ram cache during indexing. When YaCy is shut down, this cache must be
-    flushed to disc; this may last some minutes. The low limit is valid for 
crawling tasks, the high limit is valid
-    for search and DHT transmission tasks.
+    flushed to disc; this may last some minutes.
     </td>
   </tr>
-  <tr valign="top" class="TableCellDark">
-    <td class=small>Maximum number of Word Caches, high limit:</td>
-    <td class=small><input name="wordCacheMaxHigh" type="text" size="20" 
maxlength="100" value="#[wordCacheMaxHigh]#"></td>
-  </tr>
   <tr valign="top" class="TableCellLight">
-    <td class=small colspan="3">
+    <td class=small colspan="4">
     <input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size">
     Changes take effect immediately</td>
   </tr>

Modified: trunk/htroot/PerformanceQueues_p.java
===================================================================
--- trunk/htroot/PerformanceQueues_p.java       2006-03-12 18:08:48 UTC (rev 
1879)
+++ trunk/htroot/PerformanceQueues_p.java       2006-03-13 10:43:12 UTC (rev 
1880)
@@ -142,15 +142,14 @@
                 idlesleep = Long.parseLong(d((String) 
defaultSettings.get(threadName + "_idlesleep"), "1000"));
                 busysleep = Long.parseLong(d((String) 
defaultSettings.get(threadName + "_busysleep"),  "100"));
                 memprereq = Long.parseLong(d((String) 
defaultSettings.get(threadName + "_memprereq"),    "0"));
-                
+
                 // check values to prevent short-cut loops
                 if (idlesleep < 1000) idlesleep = 1000;
                 if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep 
= 0; memprereq = 0; }
                 if ((threadName.equals("50_localcrawl")) && (busysleep < 100)) 
busysleep = 100;
                 if ((threadName.equals("61_globalcrawltrigger")) && (busysleep 
< 100)) busysleep = 100;
                 if ((threadName.equals("62_remotetriggeredcrawl")) && 
(busysleep < 100)) busysleep = 100;
-                
-                
+
                 // on-the-fly re-configuration
                 switchboard.setThreadPerformance(threadName, idlesleep, 
busysleep, memprereq);
                 switchboard.setConfig(threadName + "_idlesleep", idlesleep);
@@ -171,12 +170,9 @@
         prop.put("table", c);
         
         if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
-            int wordCacheMaxLow = post.getInt("wordCacheMaxLow", 8000);
-            int wordCacheMaxHigh = post.getInt("wordCacheMaxHigh", 10000);
-            if (wordCacheMaxLow > wordCacheMaxHigh) wordCacheMaxLow = 
wordCacheMaxHigh;
-            switchboard.setConfig("wordCacheMaxLow", 
Integer.toString(wordCacheMaxLow));
-            switchboard.setConfig("wordCacheMaxHigh", 
Integer.toString(wordCacheMaxHigh));
-            switchboard.wordIndex.setMaxWords(wordCacheMaxLow, 
wordCacheMaxHigh);
+            int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 10000);
+            switchboard.setConfig("wordCacheMaxCount", 
Integer.toString(wordCacheMaxCount));
+            switchboard.wordIndex.setMaxWordCount(wordCacheMaxCount);
             int maxWaitingWordFlush = post.getInt("maxWaitingWordFlush", 180);
             switchboard.setConfig("maxWaitingWordFlush", 
Integer.toString(maxWaitingWordFlush));
         }
@@ -251,13 +247,15 @@
         }
         
         // table cache settings
-        prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
-        prop.put("maxURLinWordCache", "" + 
switchboard.wordIndex.maxURLinWordCache());
-        prop.put("maxAgeOfWordCache", "" + 
(switchboard.wordIndex.maxAgeOfWordCache() / 1000 / 60)); // minutes
-        prop.put("minAgeOfWordCache", "" + 
(switchboard.wordIndex.minAgeOfWordCache() / 1000 / 60)); // minutes
+        prop.put("wordCacheWSize", switchboard.wordIndex.wSize());
+        prop.put("wordCacheKSize", switchboard.wordIndex.kSize());
+        prop.put("maxURLinWCache", "" + 
switchboard.wordIndex.maxURLinWCache());
+        prop.put("maxAgeOfWCache", "" + 
(switchboard.wordIndex.maxAgeOfWCache() / 1000 / 60)); // minutes
+        prop.put("minAgeOfWCache", "" + 
(switchboard.wordIndex.minAgeOfWCache() / 1000 / 60)); // minutes
+        prop.put("maxAgeOfKCache", "" + 
(switchboard.wordIndex.maxAgeOfKCache() / 1000 / 60)); // minutes
+        prop.put("minAgeOfKCache", "" + 
(switchboard.wordIndex.minAgeOfKCache() / 1000 / 60)); // minutes
         prop.put("maxWaitingWordFlush", 
switchboard.getConfig("maxWaitingWordFlush", "180"));
-        prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", 
"10000"));
-        prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", 
"10000"));
+        prop.put("wordCacheMaxCount", 
switchboard.getConfig("wordCacheMaxCount", "10000"));
         prop.put("onlineCautionDelay", 
switchboard.getConfig("onlineCautionDelay", "30000"));
         prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - 
switchboard.proxyLastAccess);
         

Modified: trunk/htroot/xml/status_p.java
===================================================================
--- trunk/htroot/xml/status_p.java      2006-03-12 18:08:48 UTC (rev 1879)
+++ trunk/htroot/xml/status_p.java      2006-03-13 10:43:12 UTC (rev 1880)
@@ -64,7 +64,7 @@
         prop.put("rejected", 0);
         yacyCore.peerActions.updateMySeed();
         prop.put("ppm", yacyCore.seedDB.mySeed.get(yacySeed.ISPEED, 
"unknown"));
-        prop.put("wordCacheSize", switchboard.wordIndex.wordCacheRAMSize());
+        prop.put("wordCacheSize", switchboard.wordIndex.wSize() + 
switchboard.wordIndex.kSize());
         prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", 
"10000"));
         prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", 
"10000"));
 

Modified: trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java        
2006-03-12 18:08:48 UTC (rev 1879)
+++ trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java        
2006-03-13 10:43:12 UTC (rev 1880)
@@ -27,7 +27,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Iterator;
 
 public class kelondroCollectionIndex {
 

Modified: trunk/source/de/anomic/kelondro/kelondroMScoreCluster.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroMScoreCluster.java  2006-03-12 
18:08:48 UTC (rev 1879)
+++ trunk/source/de/anomic/kelondro/kelondroMScoreCluster.java  2006-03-13 
10:43:12 UTC (rev 1880)
@@ -73,17 +73,9 @@
         } catch (ParseException e) {}
     }
     
-    /*
     public static int string2score(String s) {
-        int i = string2scoreX(s);
-        System.out.println("string2core(" + s + ") = " + i);
-        return i;
-    }
-    */
-    
-    public static int string2score(String s) {
         // this can be used to calculate a score from a string
-        
+        if ((s == null) || (s.length() == 0) || (s.charAt(0) == '-')) return 0;
         try {
             long l = 0;
             if (s.length() == shortDateFormatString.length()) {
@@ -97,7 +89,10 @@
             }
             // fix out-of-ranges
             if (l > Integer.MAX_VALUE) return Integer.MAX_VALUE;
-            if (l < 0) return 0;
+            if (l < 0) {
+                System.out.println("string2score: negative score for input " + 
s);
+                return 0;
+            }
             return (int) l;
         } catch (Exception e) {
             // try it lex
@@ -110,7 +105,10 @@
             }
             for (int i = len; i < 5; i++) c <<= 6;
             if (c > Integer.MAX_VALUE) return Integer.MAX_VALUE;
-            if (c < 0) return 0;
+            if (c < 0) {
+                System.out.println("string2score: negative score for input " + 
s);
+                return 0;
+            }
             return c;
         }
     }
@@ -411,14 +409,18 @@
         
     public static void main(String[] args) {
         
-        if (args.length > 0) System.out.println("score of " + args[0] + ": " + 
string2score(args[0]));
-        //System.exit(0);
+        String t = "ZZZZZZZZZZ";
+        System.out.println("score of " + t + ": " + string2score(t));
+        if (args.length > 0) {
+            System.out.println("score of " + args[0] + ": " + 
string2score(args[0]));
+            System.exit(0);
+        }
         
         System.out.println("Test for Score: start");
         kelondroMScoreCluster s = new kelondroMScoreCluster();
-       long c = 0;
+        long c = 0;
 
-       // create cluster
+        // create cluster
         long time = System.currentTimeMillis();
         Random random = new Random(1234);
         int r;

Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchEvent.java        2006-03-12 
18:08:48 UTC (rev 1879)
+++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java        2006-03-13 
10:43:12 UTC (rev 1880)
@@ -304,7 +304,7 @@
                 while (hashi.hasNext()) {
                     wordHash = (String) hashi.next();
                     rcGlobal.setWordHash(wordHash);
-                    wordIndex.addEntries(rcGlobal, System.currentTimeMillis(), 
true);
+                    wordIndex.addEntries(rcGlobal, System.currentTimeMillis(), 
false);
                     log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() 
+ " url entries");
                 }
                 // the rcGlobal was flushed, empty it

Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2006-03-12 
18:08:48 UTC (rev 1879)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2006-03-13 
10:43:12 UTC (rev 1880)
@@ -369,9 +369,8 @@
         urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL);
         
         wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
-        int wordCacheMaxLow = (int) getConfigLong("wordCacheMaxLow", 8000);
-        int wordCacheMaxHigh = (int) getConfigLong("wordCacheMaxHigh", 10000);
-        wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
+        int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 
10000);
+        wordIndex.setMaxWordCount(wordCacheMaxCount);
         
         // start a cache manager
         log.logConfig("Starting HT Cache Manager");

Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndex.java  2006-03-12 18:08:48 UTC 
(rev 1879)
+++ trunk/source/de/anomic/plasma/plasmaWordIndex.java  2006-03-13 10:43:12 UTC 
(rev 1880)
@@ -90,22 +90,34 @@
         return databaseRoot;
     }
 
-    public int maxURLinWordCache() {
-        return ramCache.maxURLinWordCache();
+    public int maxURLinWCache() {
+        return ramCache.maxURLinWCache();
     }
 
-    public long minAgeOfWordCache() {
-        return ramCache.minAgeOfWordCache();
+    public long minAgeOfWCache() {
+        return ramCache.minAgeOfWCache();
     }
 
-    public long maxAgeOfWordCache() {
-        return ramCache.maxAgeOfWordCache();
+    public long maxAgeOfWCache() {
+        return ramCache.maxAgeOfWCache();
     }
 
-    public int wordCacheRAMSize() {
-        return ramCache.wordCacheRAMSize();
+    public long minAgeOfKCache() {
+        return ramCache.minAgeOfKCache();
     }
 
+    public long maxAgeOfKCache() {
+        return ramCache.maxAgeOfKCache();
+    }
+
+    public int wSize() {
+        return ramCache.wSize();
+    }
+
+    public int kSize() {
+        return ramCache.kSize();
+    }
+
     public int[] assortmentsSizes() {
         return assortmentCluster.sizes();
     }
@@ -118,48 +130,49 @@
         return assortmentCluster.cacheFillStatusCml();
     }
     
-    public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
-        ramCache.setMaxWords(maxWordsLow, maxWordsHigh);
+    public void setMaxWordCount(int maxWords) {
+        ramCache.setMaxWordCount(maxWords);
     }
 
-    public void flushControl(boolean highPriority) {
+    public void flushControl(boolean dhtCase) {
         // check for forced flush
-        if (highPriority) {
-            if (ramCache.size() > ramCache.getMaxWordsHigh()) {
-                while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
+        ramCache.shiftK2W();
+        if (dhtCase) {
+            if (ramCache.wSize() > ramCache.getMaxWordCount()) {
+                while (ramCache.wSize() + 500 > ramCache.getMaxWordCount()) {
                     flushCache(1);
                 }
             }
         } else {
-            while (ramCache.maxURLinWordCache() > 
plasmaWordIndexCache.ramCacheReferenceLimit) {
+            while (ramCache.maxURLinWCache() > 
plasmaWordIndexCache.wCacheReferenceLimit) {
                 flushCache(1);
             }
-            if (ramCache.size() > ramCache.getMaxWordsLow()) {
-                while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
+            if (ramCache.wSize() > ramCache.getMaxWordCount()) {
+                while (ramCache.wSize() + 500 > ramCache.getMaxWordCount()) {
                     flushCache(1);
                 }
             }
         }
     }
 
-    public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long 
updateTime, boolean highPriority) {
-        if (ramCache.addEntry(wordHash, entry, updateTime)) {
-            flushControl(highPriority);
+    public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long 
updateTime, boolean dhtCase) {
+        if (ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) {
+            flushControl(dhtCase);
             return true;
         }
         return false;
     }
     
-    public int addEntries(plasmaWordIndexEntryContainer entries, long 
updateTime, boolean highPriority) {
-        int added = ramCache.addEntries(entries, updateTime, highPriority);
+    public int addEntries(plasmaWordIndexEntryContainer entries, long 
updateTime, boolean dhtCase) {
+        int added = ramCache.addEntries(entries, updateTime, dhtCase);
 
         // force flush
-        flushControl(highPriority);
+        flushControl(dhtCase);
         return added;
     }
 
     public synchronized void flushCacheSome() {
-        int flushCount = ramCache.size() / 1000;
+        int flushCount = ramCache.wSize() / 1000;
         if (flushCount > 50) flushCount = 50;
         if (flushCount < 3) flushCount = 3;
         flushCache(flushCount);
@@ -167,7 +180,7 @@
     
     public synchronized void flushCache(int count) {
         for (int i = 0; i < count; i++) {
-            if (ramCache.size() == 0) break;
+            if (ramCache.wSize() == 0) break;
             flushCache(ramCache.bestFlushWordHash());
             try {Thread.sleep(10);} catch (InterruptedException e) {}
         }
@@ -316,7 +329,7 @@
 
     public int size() {
         return java.lang.Math.max(assortmentCluster.sizeTotal(),
-                        java.lang.Math.max(backend.size(), ramCache.size()));
+                        java.lang.Math.max(backend.size(), ramCache.wSize() + 
ramCache.kSize()));
     }
 
     public int indexSize(String wordHash) {

Modified: trunk/source/de/anomic/plasma/plasmaWordIndexCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndexCache.java     2006-03-12 
18:08:48 UTC (rev 1879)
+++ trunk/source/de/anomic/plasma/plasmaWordIndexCache.java     2006-03-13 
10:43:12 UTC (rev 1880)
@@ -56,21 +56,24 @@
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacySeedDB;
 
-public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
+public final class plasmaWordIndexCache /*implements 
plasmaWordIndexInterface*/ {
 
     // environment constants
     private static final String indexArrayFileName = "indexDump1.array";
-    public static final int  ramCacheReferenceLimit = 50;
-    public static final long ramCacheMaxAge         = 1000 * 60 * 60 * 2; // 
milliseconds; 2 Hours
-    public static final long ramCacheMinAge         = 1000 * 60 * 2; // 
milliseconds; 2 Minutes (Karenz for DHT Receive)
+    public static final int  wCacheReferenceLimit = 50;
+    public static final long wCacheMaxAge         = 1000 * 60 * 60 * 2; // 
milliseconds; 2 hours
+    public static final long wCacheMinAge         = 1000;               // 
milliseconds; 1 second
+    public static final long kCacheMaxAge         = 1000 * 60 * 2;      // 
milliseconds; 2 minutes
     
     // class variables
     private final File databaseRoot;
-    private final TreeMap cache;
+    private final TreeMap wCache; // wordhash-container
+    private final TreeMap kCache; // time-container; for karenz/DHT caching 
(set with high priority)
     private final kelondroMScoreCluster hashScore;
     private final kelondroMScoreCluster hashDate;
+    private long  kCacheInc = 0;
     private long  startTime;
-    private int   maxWordsLow, maxWordsHigh; // we have 2 cache limits for 
different priorities
+    private int   wCacheMaxCount;
     private final serverLog log;
 
     // calculated constants
@@ -85,12 +88,13 @@
         // creates a new index cache
         // the cache has a back-end where indexes that do not fit in the cache 
are flushed
         this.databaseRoot = databaseRoot;
-        this.cache = new TreeMap();
+        this.wCache = new TreeMap();
+        this.kCache = new TreeMap();
         this.hashScore = new kelondroMScoreCluster();
         this.hashDate  = new kelondroMScoreCluster();
+        this.kCacheInc = 0;
         this.startTime = System.currentTimeMillis();
-        this.maxWordsLow  =  8000;
-        this.maxWordsHigh = 10000;
+        this.wCacheMaxCount = 10000;
         this.log = log;
         
         // read in dump of last session
@@ -102,7 +106,7 @@
     }
 
     private void dump(int waitingSeconds) throws IOException {
-        log.logConfig("creating dump for index cache, " + cache.size() + " 
words (and much more urls)");
+        log.logConfig("creating dump for index cache, " + wCache.size() + " 
words (and much more urls)");
         File indexDumpFile = new File(databaseRoot, indexArrayFileName);
         if (indexDumpFile.exists()) indexDumpFile.delete();
         kelondroArray dumpArray = null;
@@ -110,15 +114,42 @@
             long startTime = System.currentTimeMillis();
             long messageTime = System.currentTimeMillis() + 5000;
             long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
-            synchronized (cache) {
-                Iterator i = cache.entrySet().iterator();
-                Map.Entry entry;
-                String wordHash;
-                plasmaWordIndexEntryContainer container;
-                long updateTime;
-                plasmaWordIndexEntry wordEntry;
-                byte[][] row = new byte[5][];
+            Map.Entry entry;
+            String wordHash;
+            plasmaWordIndexEntryContainer container;
+            long updateTime;
+            plasmaWordIndexEntry wordEntry;
+            byte[][] row = new byte[5][];
+            
+            // write kCache, this will be melted with the wCache upon load
+            synchronized (kCache) {
+                Iterator i = kCache.values().iterator();
                 while (i.hasNext()) {
+                    container = (plasmaWordIndexEntryContainer) i.next();
+
+                    // put entries on stack
+                    if (container != null) {
+                        Iterator ci = container.entries();
+                        while (ci.hasNext()) {
+                            wordEntry = (plasmaWordIndexEntry) ci.next();
+                            row[0] = container.wordHash().getBytes();
+                            row[1] = 
kelondroRecords.long2bytes(container.size(), 4);
+                            row[2] = 
kelondroRecords.long2bytes(container.updated(), 8);
+                            row[3] = wordEntry.getUrlHash().getBytes();
+                            row[4] = wordEntry.toEncodedForm().getBytes();
+                            dumpArray.set((int) urlcount++, row);
+                        }
+                    }
+                    wordcount++;
+                    i.remove(); // free some mem
+                    
+                }
+            }
+            
+            // write wCache
+            synchronized (wCache) {
+                Iterator i = wCache.entrySet().iterator();
+                while (i.hasNext()) {
                     // get entries
                     entry = (Map.Entry) i.next();
                     wordHash = (String) entry.getKey();
@@ -145,7 +176,7 @@
                     if (System.currentTimeMillis() > messageTime) {
                         // System.gc(); // for better statistic
                         wordsPerSecond = wordcount * 1000 / (1 + 
System.currentTimeMillis() - startTime);
-                        log.logInfo("dumping status: " + wordcount + " words 
done, " + (cache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem 
= " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
+                        log.logInfo("dumping status: " + wordcount + " words 
done, " + (wCache.size() / (wordsPerSecond + 1)) + " seconds remaining, free 
mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
                         messageTime = System.currentTimeMillis() + 5000;
                     }
                 }
@@ -164,7 +195,7 @@
         long messageTime = System.currentTimeMillis() + 5000;
         long urlCount = 0, urlsPerSecond = 0;
         try {
-            synchronized (cache) {
+            synchronized (wCache) {
                 int i = dumpArray.size();
                 String wordHash;
                 //long creationTime;
@@ -179,7 +210,7 @@
                     //creationTime = kelondroRecords.bytes2long(row[2]);
                     wordEntry = new plasmaWordIndexEntry(new String(row[3], 
"UTF-8"), new String(row[4], "UTF-8"));
                     // store to cache
-                    addEntry(wordHash, wordEntry, startTime);
+                    addEntry(wordHash, wordEntry, startTime, false);
                     urlCount++;
                     // protect against memory shortage
                     //while (rt.freeMemory() < 1000000) {flushFromMem(); 
java.lang.System.gc();}
@@ -194,7 +225,7 @@
             }
 
             dumpArray.close();
-            log.logConfig("restored " + cache.size() + " words in " + 
((System.currentTimeMillis() - startTime) / 1000) + " seconds");
+            log.logConfig("restored " + wCache.size() + " words in " + 
((System.currentTimeMillis() - startTime) / 1000) + " seconds");
         } catch (kelondroException e) {
             // restore failed
             log.logSevere("restore of indexCache array dump failed: " + 
e.getMessage(), e);
@@ -206,72 +237,94 @@
 
     // cache settings
 
-    public int maxURLinWordCache() {
+    public int maxURLinWCache() {
         if (hashScore.size() == 0) return 0;
         return hashScore.getMaxScore();
     }
 
-    public long minAgeOfWordCache() {
+    public long minAgeOfWCache() {
         if (hashDate.size() == 0) return 0;
         return System.currentTimeMillis() - longEmit(hashDate.getMaxScore());
     }
 
-    public long maxAgeOfWordCache() {
+    public long maxAgeOfWCache() {
         if (hashDate.size() == 0) return 0;
         return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
     }
 
-    public int wordCacheRAMSize() {
-        return cache.size();
+    public long minAgeOfKCache() {
+        if (kCache.size() == 0) return 0;
+        return System.currentTimeMillis() - ((Long) 
kCache.lastKey()).longValue();
     }
 
-    public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
-        this.maxWordsLow = maxWordsLow;
-        this.maxWordsHigh = maxWordsHigh;
+    public long maxAgeOfKCache() {
+        if (kCache.size() == 0) return 0;
+        return System.currentTimeMillis() - ((Long) 
kCache.firstKey()).longValue();
     }
-    
-    public int getMaxWordsLow() {
-        return this.maxWordsLow;
-    }
 
-    public int getMaxWordsHigh() {
-        return this.maxWordsHigh;
+    public void setMaxWordCount(int maxWords) {
+        this.wCacheMaxCount = maxWords;
     }
     
-    public int size() {
-        return cache.size();
+    public int getMaxWordCount() {
+        return this.wCacheMaxCount;
     }
+    
+    public int wSize() {
+        return wCache.size();
+    }
 
+    public int kSize() {
+        return kCache.size();
+    }
+
     public int indexSize(String wordHash) {
         int size = 0;
-        plasmaWordIndexEntryContainer cacheIndex = 
(plasmaWordIndexEntryContainer) cache.get(wordHash);
+        plasmaWordIndexEntryContainer cacheIndex = 
(plasmaWordIndexEntryContainer) wCache.get(wordHash);
         if (cacheIndex != null) size += cacheIndex.size();
         return size;
     }
     
     public Iterator wordHashes(String startWordHash, boolean rot) {
         if (rot) throw new UnsupportedOperationException("plasmaWordIndexCache 
cannot rotate");
-        return cache.tailMap(startWordHash).keySet().iterator();
+        return wCache.tailMap(startWordHash).keySet().iterator();
     }
 
+    public void shiftK2W() {
+        // find entries in kCache that are too old for that place and shift 
them to the wCache
+        long time;
+        Long l;
+        plasmaWordIndexEntryContainer container;
+        synchronized (kCache) {
+            while (kCache.size() > 0) {
+                l = (Long) kCache.firstKey();
+                time = l.longValue();
+                if (System.currentTimeMillis() - time < kCacheMaxAge) return;
+                container = (plasmaWordIndexEntryContainer) kCache.remove(l);
+                addEntries(container, container.updated(), false);
+            }
+        }
+    }
+    
     public String bestFlushWordHash() {
         // select appropriate hash
         // we have 2 different methods to find a good hash:
         // - the oldest entry in the cache
         // - the entry with maximum count
-        if (cache.size() == 0) return null;
+        shiftK2W();
+        if (wCache.size() == 0) return null;
         try {
-            synchronized (cache) {
+            synchronized (wCache) {
                 String hash = null;
                 int count = hashScore.getMaxScore();
-                if ((count > ramCacheReferenceLimit) &&
+                if ((count > wCacheReferenceLimit) &&
                     ((hash = (String) hashScore.getMaxObject()) != null) &&
-                    (System.currentTimeMillis() - 
longEmit(hashDate.getScore(hash)) > ramCacheMinAge)) {
+                    (System.currentTimeMillis() - 
longEmit(hashDate.getScore(hash)) > wCacheMinAge)) {
                     // flush high-score entries, but not if they are too 
'young'
                     return hash;
                 }
                 long oldestTime = longEmit(hashDate.getMinScore());
-                if (((System.currentTimeMillis() - oldestTime) > 
ramCacheMaxAge) &&
+                if (((System.currentTimeMillis() - oldestTime) > wCacheMaxAge) 
&&
                     ((hash = (String) hashDate.getMinObject()) != null)) {
                     // flush out-dated entries
                     return hash;
@@ -280,7 +333,7 @@
                 if (Runtime.getRuntime().freeMemory() < 10000000) {
                     // low-memory case
                     hash = (String) hashScore.getMaxObject(); // flush 
high-score entries (saves RAM)
-                    if (System.currentTimeMillis() - 
longEmit(hashDate.getScore(hash)) < ramCacheMinAge) {
+                    if (System.currentTimeMillis() - 
longEmit(hashDate.getScore(hash)) < wCacheMinAge) {
                         // to young, take it from the oldest entries
                         hash = (String) hashDate.getMinObject();
                     }
@@ -297,25 +350,19 @@
     }
 
     private int intTime(long longTime) {
-        return (int) ((longTime - startTime) / 1000);
+        return (int) Math.max(0, ((longTime - startTime) / 1000));
     }
 
     private long longEmit(int intTime) {
         return (((long) intTime) * (long) 1000) + startTime;
     }
     
-    /*
-    private long longTime(int intTime) {
-        return ((long) intTime) * ((long) 1000) + startTime;
-    }
-    */
-    
     public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean 
deleteIfEmpty) {
-        return (plasmaWordIndexEntryContainer) cache.get(wordHash);
+        return (plasmaWordIndexEntryContainer) wCache.get(wordHash);
     }
 
     public long getUpdateTime(String wordHash) {
-        plasmaWordIndexEntryContainer entries = 
(plasmaWordIndexEntryContainer) cache.get(wordHash);
+        plasmaWordIndexEntryContainer entries = 
(plasmaWordIndexEntryContainer) wCache.get(wordHash);
         if (entries == null) return 0;
         return entries.updated();
         /*
@@ -327,8 +374,8 @@
 
     public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
         // returns the index that had been deleted
-        synchronized (cache) {
-            plasmaWordIndexEntryContainer container = 
(plasmaWordIndexEntryContainer) cache.remove(wordHash);
+        synchronized (wCache) {
+            plasmaWordIndexEntryContainer container = 
(plasmaWordIndexEntryContainer) wCache.remove(wordHash);
             hashScore.deleteScore(wordHash);
             hashDate.deleteScore(wordHash);
             return container;
@@ -338,7 +385,7 @@
     public int removeEntries(String wordHash, String[] urlHashes, boolean 
deleteComplete) {
         if (urlHashes.length == 0) return 0;
         int count = 0;
-        synchronized (cache) {
+        synchronized (wCache) {
             plasmaWordIndexEntryContainer c = (plasmaWordIndexEntryContainer) 
deleteContainer(wordHash);
             if (c != null) {
                 count = c.removeEntries(wordHash, urlHashes, deleteComplete);
@@ -348,12 +395,13 @@
         return count;
     }
 
+    /*
     public int tryRemoveURLs(String urlHash) {
         // this tries to delete an index from the cache that has this
         // urlHash assigned. This can only work if the entry is really fresh
         // Such entries must be searched in the latest entries
         int delCount = 0;
-        synchronized (cache) {
+        synchronized (wCache) {
             Iterator i = hashDate.scores(false);
             String wordHash;
             long t;
@@ -362,11 +410,11 @@
                 wordHash = (String) i.next();
                 // check time
                 t = longEmit(hashDate.getScore(wordHash));
-                if (System.currentTimeMillis() - t > ramCacheMinAge) return 
delCount;
+                if (System.currentTimeMillis() - t > wCacheMinAge) return 
delCount;
                 // get container
-                c = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+                c = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
                 if (c.remove(urlHash) != null) {
-                    cache.put(wordHash, c);
+                    wCache.put(wordHash, c);
                     hashScore.decScore(wordHash);
                     delCount++;
                 }
@@ -374,50 +422,87 @@
         }
         return delCount;
     }
+    */
     
-    public int addEntries(plasmaWordIndexEntryContainer container, long 
updateTime, boolean highPriority) {
+    public int tryRemoveURLs(String urlHash) {
+        // this tries to delete an index from the cache that has this
+        // urlHash assigned. This can only work if the entry is really fresh
+        // Such entries must be searched in the latest entries
+        int delCount = 0;
+        synchronized (kCache) {
+            Iterator i = kCache.entrySet().iterator();
+            Map.Entry entry;
+            Long l;
+            plasmaWordIndexEntryContainer c;
+            while (i.hasNext()) {
+                entry = (Map.Entry) i.next();
+                l = (Long) entry.getKey();
+            
+                // get container
+                c = (plasmaWordIndexEntryContainer) entry.getValue();
+                if (c.remove(urlHash) != null) {
+                    if (c.size() == 0) {
+                        i.remove();
+                    } else {
+                        kCache.put(l, c); // superfluous?
+                    }
+                    delCount++;
+                }
+            }
+        }
+        return delCount;
+    }
+    
+    public int addEntries(plasmaWordIndexEntryContainer container, long 
updateTime, boolean dhtCase) {
         // this puts the entries into the cache, not into the assortment 
directly
-
         int added = 0;
-        // check cache space
 
-        //serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: 
cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
-
         // put new words into cache
-        String wordHash = container.wordHash();
-        plasmaWordIndexEntryContainer entries = null;
-        synchronized (cache) {
-            // put container into cache
-            entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // 
null pointer exception? wordhash != null! must be cache==null
+        if (dhtCase) synchronized (kCache) {
+            // put container into kCache
+            kCache.put(new Long(updateTime + kCacheInc), container);
+            kCacheInc++;
+            if (kCacheInc > 10000) kCacheInc = 0;
+            added = container.size();
+        } else synchronized (wCache) {
+            // put container into wCache
+            String wordHash = container.wordHash();
+            plasmaWordIndexEntryContainer entries = 
(plasmaWordIndexEntryContainer) wCache.get(wordHash); // null pointer 
exception? wordhash != null! must be cache==null
             if (entries == null) entries = new 
plasmaWordIndexEntryContainer(wordHash);
             added = entries.add(container);
             if (added > 0) {
-                cache.put(wordHash, entries);
+                wCache.put(wordHash, entries);
                 hashScore.addScore(wordHash, added);
                 hashDate.setScore(wordHash, intTime(updateTime));
             }
+            entries = null;
         }
-        entries = null;
         return added;
     }
 
-    public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, 
long updateTime) {
-        plasmaWordIndexEntryContainer container = null;
-        plasmaWordIndexEntry[] entries = null;
-        synchronized (cache) {
-            container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+    public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, 
long updateTime, boolean dhtCase) {
+        if (dhtCase) synchronized (kCache) {
+            // put container into kCache
+            plasmaWordIndexEntryContainer container = new 
plasmaWordIndexEntryContainer(wordHash);
+            container.add(newEntry);
+            kCache.put(new Long(updateTime + kCacheInc), container);
+            kCacheInc++;
+            if (kCacheInc > 10000) kCacheInc = 0;
+            return true;
+        } else synchronized (wCache) {
+            plasmaWordIndexEntryContainer container = 
(plasmaWordIndexEntryContainer) wCache.get(wordHash);
             if (container == null) container = new 
plasmaWordIndexEntryContainer(wordHash);
-            entries = new plasmaWordIndexEntry[] { newEntry };
+            plasmaWordIndexEntry[] entries = new plasmaWordIndexEntry[] { 
newEntry };
             if (container.add(entries, updateTime) > 0) {
-                cache.put(wordHash, container);
+                wCache.put(wordHash, container);
                 hashScore.incScore(wordHash);
                 hashDate.setScore(wordHash, intTime(updateTime));
                 return true;
             }
+            container = null;
+            entries = null;
+            return false;
         }
-        container = null;
-        entries = null;
-        return false;
     }
 
     public void close(int waitingSeconds) {

Modified: trunk/source/de/anomic/plasma/plasmaWordIndexInterface.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndexInterface.java 2006-03-12 
18:08:48 UTC (rev 1879)
+++ trunk/source/de/anomic/plasma/plasmaWordIndexInterface.java 2006-03-13 
10:43:12 UTC (rev 1880)
@@ -55,7 +55,8 @@
     public plasmaWordIndexEntryContainer deleteContainer(String wordHash);
     
     public int removeEntries(String wordHash, String[] urlHashes, boolean 
deleteComplete);
-    public int addEntries(plasmaWordIndexEntryContainer newEntries, long 
creationTime, boolean highPriority);
+    public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long 
updateTime, boolean dhtCase);
+    public int addEntries(plasmaWordIndexEntryContainer newEntries, long 
creationTime, boolean dhtCase);
 
     public void close(int waitingSeconds);
 

Modified: trunk/yacy.init
===================================================================
--- trunk/yacy.init     2006-03-12 18:08:48 UTC (rev 1879)
+++ trunk/yacy.init     2006-03-13 10:43:12 UTC (rev 1880)
@@ -310,18 +310,6 @@
 #staticIP if you have a static IP, you can use this setting
 staticIP=
 
-# if the process is running behind a NAT or ROUTER, we cannot easily identify
-# the public IP of the process. We can ask a public IP responder, but cannot
-# rely on it. Therefore, AnomicHTTPProxy includes it's own responder.
-# But for the first running peer this is not an option.
-# The author uses a DI-604 router, which can be
-# asked for the public IP. If you own a DI-604 as well, please set the
-# DI604use to true and put in your router password, it will not be used for any
-# other purpose of asking for the IP
-#DI604use=true
-DI604use=false
-DI604pw=
-
 # each time the proxy starts up, it can trigger the local browser to show the
 # status page. This is active by default, to make it easier for first-time
 # users to understand what this application does. You can disable browser
@@ -513,7 +501,6 @@
 # -Xms<size> set initial Java heap size
 javastart_Xms=Xms10m
 
-
 # performance properties for the word index cache
 # wordCacheMaxLow/High is the number of word indexes that shall be held in the
 # ram cache during indexing. When YaCy is shut down, this cache must be
@@ -522,9 +509,7 @@
 # remote index transmissions and search requests
 # maxWaitingWordFlush gives the number of seconds that the shutdown
 # may last for the word flush
-wordCacheMaxLow  = 12000
-wordCacheMaxHigh = 16000
-maxWaitingWordFlush = 180
+wordCacheMaxCount = 12000
 
 # Specifies if yacy can be used as transparent http proxy.
 # 

_______________________________________________
YaCy-svn mailing list
[email protected]
http://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an