Author: orbiter
Date: 2008-01-24 23:49:00 +0100 (Thu, 24 Jan 2008)
New Revision: 4397

Added:
   trunk/source/de/anomic/plasma/plasmaSearchAPI.java
Modified:
   trunk/htroot/IndexControlRWIs_p.html
   trunk/htroot/IndexControlRWIs_p.java
   trunk/source/de/anomic/http/httpc.java
   trunk/source/de/anomic/kelondro/kelondroDyn.java
   trunk/source/de/anomic/kelondro/kelondroEcoTable.java
   trunk/source/de/anomic/kelondro/kelondroRotateIterator.java
   trunk/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java
   trunk/source/de/anomic/plasma/plasmaCrawlProfile.java
   trunk/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
   trunk/source/de/anomic/plasma/plasmaHTCache.java
   trunk/source/de/anomic/plasma/plasmaSwitchboard.java
   trunk/source/de/anomic/yacy/yacyCore.java
   trunk/source/de/anomic/yacy/yacyNewsDB.java
   trunk/source/de/anomic/yacy/yacyNewsPool.java
   trunk/yacy.init
Log:
- refactoring of indexControlRWIs: moved statics to own class; better Dublin 
Core naming
- fix for 
http://forum.yacy-websuche.de/viewtopic.php?f=5&t=759&hilit=&p=4866#p4866
- some bugfixes in EcoTable according remove method
- switched more tables to Eco: crawl Profiles, htcache, seeddb, newsdb

Modified: trunk/htroot/IndexControlRWIs_p.html
===================================================================
--- trunk/htroot/IndexControlRWIs_p.html        2008-01-24 21:37:06 UTC (rev 
4396)
+++ trunk/htroot/IndexControlRWIs_p.html        2008-01-24 22:49:00 UTC (rev 
4397)
@@ -40,10 +40,10 @@
            <td colspan="1">document type</td>
       <tr class="TableCellDark">
         <td style="background-color:#FFFFFF">&nbsp;</td>
-        <td>reference</td>
         <td>description</td>
-        <td>author</td>
-        <td>tags</td>
+        <td>title</td>
+        <td>creator</td>
+        <td>subject</td>
         <td>url</td>
         <td>emphasized</td>
         <td>image</td>
@@ -55,10 +55,10 @@
       <tr class="TableCellDark">
         <td style="background-color:#FFFFFF">&nbsp;</td>
         <td>#[allurl]#</td>
-        <td>#[reference]#</td>
         <td>#[description]#</td>
-        <td>#[author]#</td>
-        <td>#[tag]#</td>
+        <td>#[title]#</td>
+        <td>#[creator]#</td>
+        <td>#[subject]#</td>
         <td>#[url]#</td>
         <td>#[emphasized]#</td>
         <td>#[image]#</td>
@@ -70,10 +70,10 @@
       <tr class="TableCellLight">
         <td class="TableCellDark">Selection</td>
         <td><input type="checkbox" name="allurl" id="allurl" checked="checked" 
/></td>
-        <td><input type="checkbox" name="reference" 
onclick="document.selection.allurl.checked=false" /></td>
         <td><input type="checkbox" name="description" 
onclick="document.selection.allurl.checked=false" /></td>
-        <td><input type="checkbox" name="author" 
onclick="document.selection.allurl.checked=false" /></td>
-        <td><input type="checkbox" name="tag" 
onclick="document.selection.allurl.checked=false" /></td>
+        <td><input type="checkbox" name="title" 
onclick="document.selection.allurl.checked=false" /></td>
+        <td><input type="checkbox" name="creator" 
onclick="document.selection.allurl.checked=false" /></td>
+        <td><input type="checkbox" name="subject" 
onclick="document.selection.allurl.checked=false" /></td>
         <td><input type="checkbox" name="url" 
onclick="document.selection.allurl.checked=false" /></td>
         <td><input type="checkbox" name="emphasized" 
onclick="document.selection.allurl.checked=false" /></td>
         <td><input type="checkbox" name="image" 
onclick="document.selection.allurl.checked=false" /></td>

Modified: trunk/htroot/IndexControlRWIs_p.java
===================================================================
--- trunk/htroot/IndexControlRWIs_p.java        2008-01-24 21:37:06 UTC (rev 
4396)
+++ trunk/htroot/IndexControlRWIs_p.java        2008-01-24 22:49:00 UTC (rev 
4397)
@@ -29,7 +29,6 @@
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -43,13 +42,11 @@
 import de.anomic.index.indexURLEntry;
 import de.anomic.kelondro.kelondroBitfield;
 import de.anomic.plasma.plasmaCondenser;
+import de.anomic.plasma.plasmaSearchAPI;
 import de.anomic.plasma.plasmaSearchEvent;
-import de.anomic.plasma.plasmaSearchQuery;
 import de.anomic.plasma.plasmaSearchRankingProcess;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.urlPattern.abstractURLPattern;
-import de.anomic.plasma.urlPattern.plasmaURLPattern;
-import de.anomic.server.serverDate;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
 import de.anomic.yacy.yacyClient;
@@ -92,7 +89,7 @@
             if (post.containsKey("keystringsearch")) {
                 keyhash = plasmaCondenser.word2hash(keystring);
                 prop.put("keyhash", keyhash);
-                final plasmaSearchRankingProcess ranking = 
genSearchresult(prop, sb, keyhash, null, sortorder, false);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
                 if (ranking.filteredCount() == 0) {
                     prop.put("searchresult", 1);
                     prop.put("searchresult_word", keystring);
@@ -103,7 +100,7 @@
                 if (keystring.length() == 0 || 
!plasmaCondenser.word2hash(keystring).equals(keyhash)) {
                     prop.put("keystring", "&lt;not possible to compute word 
from hash&gt;");
                 }
-                final plasmaSearchRankingProcess ranking = 
genSearchresult(prop, sb, keyhash, null, sortorder, false);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
                 if (ranking.filteredCount() == 0) {
                     prop.put("searchresult", 2);
                     prop.put("searchresult_wordhash", keyhash);
@@ -160,10 +157,10 @@
                 if (keystring.length() == 0 || 
!plasmaCondenser.word2hash(keystring).equals(keyhash)) {
                     prop.put("keystring", "&lt;not possible to compute word 
from hash&gt;");
                 }
-                kelondroBitfield flags = compileFlags(post);
+                kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
                 int count = (post.get("lines", "all").equals("all")) ? -1 : 
post.getInt("lines", -1);
-                final plasmaSearchRankingProcess ranking = 
genSearchresult(prop, sb, keyhash, flags, sortorder, true);
-                genURLList(prop, keyhash, keystring, ranking, flags, count, 
sortorder);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder, true);
+                plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, 
flags, count, sortorder);
             }
 
             // transfer to other peer
@@ -307,7 +304,7 @@
                 sb.wordIndex.removeEntries(keyhash, urlHashes);
             }
         
-            if (prop.getInt("searchresult", 0) == 3) listHosts(prop, keyhash);
+            if (prop.getInt("searchresult", 0) == 3) 
plasmaSearchAPI.listHosts(prop, keyhash);
         }
         
 
@@ -317,161 +314,4 @@
         return prop;
     }
     
-    private static kelondroBitfield compileFlags(serverObjects post) {
-        kelondroBitfield b = new kelondroBitfield(4);
-        if (post.get("allurl", "").equals("on")) return null;
-        if (post.get("flags") != null) {
-            if (post.get("flags","").length() == 0) return null;
-            return new kelondroBitfield(4, (String) post.get("flags"));
-        }
-        if (post.get("reference", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_description, true);
-        if (post.get("description", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_title, true);
-        if (post.get("author", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_creator, true);
-        if (post.get("tag", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_subject, true);
-        if (post.get("url", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_identifier, true);
-        if (post.get("emphasized", "").equals("on")) 
b.set(indexRWIEntry.flag_app_emphasized, true);
-        if (post.get("image", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasimage, true);
-        if (post.get("audio", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasaudio, true);
-        if (post.get("video", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasvideo, true);
-        if (post.get("app", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasapp, true);
-        if (post.get("indexof", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_indexof, true);
-        return b;
-    }
-    
-    private static void listHosts(serverObjects prop, String startHash) {
-        // list known hosts
-        yacySeed seed;
-        int hc = 0;
-        prop.put("searchresult_keyhash", startHash);
-        if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) {
-            Iterator<yacySeed> e = 
yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash);
-            while (e.hasNext()) {
-                seed = (yacySeed) e.next();
-                if (seed != null) {
-                    prop.put("searchresult_hosts_" + hc + "_hosthash", 
seed.hash);
-                    prop.putHTML("searchresult_hosts_" + hc + "_hostname", 
seed.hash + " " + seed.get(yacySeed.NAME, "nameless"));
-                    hc++;
-                }
-            }
-            prop.put("searchresult_hosts", hc);
-        } else {
-            prop.put("searchresult_hosts", "0");
-        }
-    }
-
-    private static plasmaSearchRankingProcess genSearchresult(serverObjects 
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int 
sortorder, boolean fetchURLs) {
-        plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, 
sb.getRanking(), filter);
-        plasmaSearchRankingProcess ranked = new 
plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE);
-        ranked.execQuery(fetchURLs);
-        
-        if (ranked.filteredCount() == 0) {
-            prop.put("searchresult", 2);
-            prop.put("searchresult_wordhash", keyhash);
-        } else {
-            prop.put("searchresult", 3);
-            prop.put("searchresult_allurl", ranked.filteredCount());
-            prop.put("searchresult_reference", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_description]);
-            prop.put("searchresult_description", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_title]);
-            prop.put("searchresult_author", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_creator]);
-            prop.put("searchresult_tag", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_subject]);
-            prop.put("searchresult_url", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_identifier]);
-            prop.put("searchresult_emphasized", 
ranked.flagCount()[indexRWIEntry.flag_app_emphasized]);
-            prop.put("searchresult_image", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasimage]);
-            prop.put("searchresult_audio", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasaudio]);
-            prop.put("searchresult_video", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasvideo]);
-            prop.put("searchresult_app", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasapp]);
-            prop.put("searchresult_indexof", 
ranked.flagCount()[plasmaCondenser.flag_cat_indexof]);
-        }
-        return ranked;
-    }
-    
-    private static void genURLList(serverObjects prop, String keyhash, String 
keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int 
maxlines, int ordering) {
-        // search for a word hash and generate a list of url links
-        prop.put("genUrlList_keyHash", keyhash);
-        
-        if (ranked.filteredCount() == 0) {
-            prop.put("genUrlList", 1);
-            prop.put("genUrlList_count", 0);
-            prop.put("searchresult", 2);
-        } else {
-            prop.put("genUrlList", 2);
-            prop.put("searchresult", 3);
-            prop.put("genUrlList_flags", (flags == null) ? "" : 
flags.exportB64());
-            prop.put("genUrlList_lines", maxlines);
-            prop.put("genUrlList_ordering", ordering);
-            int i = 0;
-            yacyURL url;
-            indexURLEntry entry;
-            String us;
-            long rn = -1;
-            while ((ranked.size() > 0) && ((entry = ranked.bestURL(false)) != 
null)) {
-                if ((entry == null) || (entry.comp() == null)) continue;
-                url = entry.comp().url();
-                if (url == null) continue;
-                us = url.toNormalform(false, false);
-                if (rn == -1) rn = entry.ranking();
-                prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
-                prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
-                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", 
entry.word().urlHash());
-                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", 
keystring);
-                prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", 
keyhash);
-                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", 
us);
-                prop.put("genUrlList_urlList_"+i+"_urlExists_urlStringShort", 
(us.length() > 40) ? (us.substring(0, 20) + "<br>" + us.substring(20,  40) + 
"...") : ((us.length() > 30) ? (us.substring(0, 20) + "<br>" + 
us.substring(20)) : us));
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ranking", 
(entry.ranking() - rn));
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", 
yacyURL.domLengthEstimation(entry.hash()));
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", 
plasmaSearchRankingProcess.ybr(entry.hash()));
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", 
ranked.getOrder().authority(entry.hash()));
-                prop.put("genUrlList_urlList_"+i+"_urlExists_date", 
serverDate.formatShortDay(new Date(entry.word().lastModified())));
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", 
entry.word().wordsintitle());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", 
entry.word().wordsintext());
-                
prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrasesintext", 
entry.word().phrasesintext());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_llocal", 
entry.word().llocal());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_lother", 
entry.word().lother());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_hitcount", 
entry.word().hitcount());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_worddistance", 
entry.word().worddistance());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_pos", 
entry.word().posintext());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrase", 
entry.word().posofphrase());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_posinphrase", 
entry.word().posinphrase());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_urlcomps", 
entry.word().urlcomps());
-                prop.putNum("genUrlList_urlList_"+i+"_urlExists_urllength", 
entry.word().urllength());
-                prop.put("genUrlList_urlList_"+i+"_urlExists_props",
-                               
((entry.word().flags().get(plasmaCondenser.flag_cat_indexof)) ? "appears on 
index page, " : "") +
-                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasimage)) ? "contains 
images, " : "") +
-                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains 
audio, " : "") +
-                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains 
video, " : "") +
-                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains 
applications, " : "") +
-                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_identifier)) ? "appears in 
url, " : "") +
-                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_title)) ? "appears in 
description, " : "") +
-                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_creator)) ? "appears in 
author, " : "") +
-                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_subject)) ? "appears in 
tags, " : "") +
-                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_description)) ? "appears 
in reference, " : "") +
-                        
((entry.word().flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears 
emphasized, " : "") +
-                        ((yacyURL.probablyRootURL(entry.word().urlHash())) ? 
"probably root url" : "")
-                );
-                if 
(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
-                    
prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1");
-                }
-                i++;
-                if ((maxlines >= 0) && (i >= maxlines)) break;
-            }
-            Iterator<String> iter = ranked.miss(); // iterates url hash strings
-            while (iter.hasNext()) {
-                us = (String) iter.next();
-                prop.put("genUrlList_urlList_"+i+"_urlExists", "0");
-                prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
-                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", 
us);
-                i++;
-            }
-            prop.put("genUrlList_urlList", i);
-            prop.putHTML("genUrlList_keyString", keystring);
-            prop.put("genUrlList_count", i);
-            putBlacklists(prop, 
listManager.getDirListing(listManager.listsPath));
-        }
-    }
-    
-    private static void putBlacklists(serverObjects prop, String[] lists) {
-        prop.put("genUrlList_blacklists", lists.length);
-        for (int i=0; i<lists.length; i++)
-            prop.put("genUrlList_blacklists_" + i + "_name", lists[i]);
-    }
 }

Modified: trunk/source/de/anomic/http/httpc.java
===================================================================
--- trunk/source/de/anomic/http/httpc.java      2008-01-24 21:37:06 UTC (rev 
4396)
+++ trunk/source/de/anomic/http/httpc.java      2008-01-24 22:49:00 UTC (rev 
4397)
@@ -417,11 +417,8 @@
             
             // if we reached this point, we should have a connection
         } catch (UnknownHostException e) {
-            if (this.socket != null) {
-                // no need to track this, the socket cannot be established
-                synchronized (activeConnections) 
{activeConnections.remove(this);}
-            }
-            this.socket = null;
+            serverLog.logFine("HTTPC", "Couldn't find host " + server);
+            close();
             throw new IOException("unknown host: " + server);
         } catch (IOException e) {
             // There was an error while connecting the socket, probably a 
SocketTimeoutException

Modified: trunk/source/de/anomic/kelondro/kelondroDyn.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroDyn.java    2008-01-24 21:37:06 UTC 
(rev 4396)
+++ trunk/source/de/anomic/kelondro/kelondroDyn.java    2008-01-24 22:49:00 UTC 
(rev 4397)
@@ -105,7 +105,7 @@
                 fbi = new kelondroEcoTable(file, rowdef, 
kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
             }
         }
-        this.index = (useObjectCache) ? (kelondroIndex) new kelondroCache(fbi) 
: fbi;
+        this.index = ((useObjectCache) && (!(fbi instanceof 
kelondroEcoTable))) ? (kelondroIndex) new kelondroCache(fbi) : fbi;
         this.keylen = key;
         this.reclen = nodesize;
         this.fillChar = fillChar;
@@ -345,7 +345,7 @@
         int recpos = 0;
         byte[] k;
         while (index.get(k = dynKey(key, recpos)) != null) {
-            index.remove(k, true);
+            index.remove(k, false);
             buffer.remove(k);
             recpos++;
         }
@@ -520,4 +520,4 @@
             return -1;
         }
     }
-}
+}
\ No newline at end of file

Modified: trunk/source/de/anomic/kelondro/kelondroEcoTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroEcoTable.java       2008-01-24 
21:37:06 UTC (rev 4396)
+++ trunk/source/de/anomic/kelondro/kelondroEcoTable.java       2008-01-24 
22:49:00 UTC (rev 4397)
@@ -119,6 +119,7 @@
             byte[] record = new byte[rowdef.objectsize];
             byte[] key = new byte[rowdef.primaryKeyLength];
             int fs = (int) file.size();
+            System.out.print("*** initializing RAM index for EcoTable " + 
tablefile + ":");
             for (int i = 0; i < fs; i++) {
                 // read entry
                 file.get(i, record, 0);
@@ -129,9 +130,17 @@
             
                 // write the tail into the table
                 if (table != null) table.addUnique(taildef.newEntry(record, 
rowdef.primaryKeyLength, true));
+                
+                if ((i % 10000) == 0) {
+                    System.out.print('.');
+                    System.out.flush();
+                }
             }
+            System.out.print(" -ordering- ..");
+            System.out.flush();
             // check consistency
             ArrayList<Integer[]> doubles = index.removeDoubles();
+            System.out.println(" -removed " + doubles.size() + " doubles- 
done.");
             if (doubles.size() > 0) {
                 System.out.println("DEBUG " + tablefile + ": WARNING - 
EcoTable " + tablefile + " has " + doubles.size() + " doubles");
                 // from all the doubles take one, put it back to the index and 
remove the others from the file
@@ -392,51 +401,57 @@
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
         assert ((table == null) || (table.size() == index.size()));
         assert keepOrder == false; // this class cannot keep the order during 
a remove
+        assert key.length == rowdef.primaryKeyLength;
         int i = index.geti(key);
         if (i == -1) return null; // nothing to do
         
         // prepare result
         byte[] b = new byte[rowdef.objectsize];
         byte[] p = new byte[rowdef.objectsize];
+        int sb = index.size();
         if (table == null) {
-            index.removei(key);
-            file.get(i, b, 0);
-            file.cleanLast(p, 0);
-            file.put(i, p, 0);
-            byte[] k = new byte[rowdef.primaryKeyLength];
-            System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
-            index.puti(k, i);
+            if (i == index.size() - 1) {
+                index.removei(key);
+                file.clean(i, b, 0);
+            } else {
+                index.removei(key);
+                file.get(i, b, 0);
+                file.cleanLast(p, 0);
+                file.put(i, p, 0);
+                byte[] k = new byte[rowdef.primaryKeyLength];
+                System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
+                index.puti(k, i);
+            }
             assert (file.size() == index.size());
-            assert ((table == null) || (table.size() == index.size()));
         } else {
+            // get result value from the table copy, so we don't need to read 
it from the file
             kelondroRow.Entry v = table.get(i);
-            assert key.length == rowdef.primaryKeyLength;
             System.arraycopy(key, 0, b, 0, key.length);
             System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, 
taildef.objectsize);
+            
             if (i == index.size() - 1) {
                 // special handling if the entry is the last entry in the file
                 index.removei(key);
                 table.removeRow(i, false);
                 file.clean(i);
-                assert (file.size() == index.size());
-                assert ((table == null) || (table.size() == index.size()));
             } else {
                 // switch values
+                index.removei(key);
+                
                 kelondroRow.Entry te = table.removeOne();
                 table.set(i, te);
 
                 file.cleanLast(p, 0);
                 file.put(i, p, 0);
                 kelondroRow.Entry lr = rowdef.newEntry(p);
-                
-                index.removei(key);
                 index.puti(lr.getPrimaryKeyBytes(), i);
-                assert (file.size() == index.size());
-                assert ((table == null) || (table.size() == index.size())) : 
"table.size() = " + table.size() + ", index.size() = " + index.size();
             }
+            assert (file.size() == index.size());
+            assert (table.size() == index.size()) : "table.size() = " + 
table.size() + ", index.size() = " + index.size();
         }
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
         assert ((table == null) || (table.size() == index.size()));
+        assert index.size() + 1 == sb : "index.size() = " + index.size() + ", 
sb = " + sb;
         return rowdef.newEntry(b);
     }
 
@@ -448,7 +463,7 @@
         kelondroRow.Entry lr = rowdef.newEntry(le);
         int i = index.removei(lr.getPrimaryKeyBytes());
         assert i >= 0;
-        table.removeRow(i, false);
+        if (table != null) table.removeOne();
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
         return lr;
     }

Modified: trunk/source/de/anomic/kelondro/kelondroRotateIterator.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroRotateIterator.java 2008-01-24 
21:37:06 UTC (rev 4396)
+++ trunk/source/de/anomic/kelondro/kelondroRotateIterator.java 2008-01-24 
22:49:00 UTC (rev 4397)
@@ -30,12 +30,14 @@
     
     kelondroCloneableIterator<E> a, clone;
     Object modifier;
+    boolean nempty;
     
     public kelondroRotateIterator(kelondroCloneableIterator<E> a, Object 
modifier) {
         // this works currently only for String-type key iterations
         this.a = a;
         this.modifier = modifier;
         this.clone = (kelondroCloneableIterator<E>) a.clone(modifier);
+        this.nempty = this.clone.hasNext();
     }
     
        public kelondroRotateIterator<E> clone(Object modifier) {
@@ -43,7 +45,7 @@
     }
     
     public boolean hasNext() {
-        return true;
+        return this.nempty;
     }
     
     public E next() {
@@ -52,6 +54,7 @@
        // from the hasNext() method
         if (!(a.hasNext())) {
             a = (kelondroCloneableIterator<E>) clone.clone(modifier);
+            assert a.hasNext();
         }
         return a.next();
     }

Modified: trunk/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java
===================================================================
--- trunk/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java 
2008-01-24 21:37:06 UTC (rev 4396)
+++ trunk/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java 
2008-01-24 22:49:00 UTC (rev 4397)
@@ -15,7 +15,7 @@
 public class plasmaCrawlNURLImporter extends AbstractImporter implements 
dbImporter {
 
        private File plasmaPath = null;
-    private HashSet importProfileHandleCache = new HashSet();
+    private HashSet<String> importProfileHandleCache = new HashSet<String>();
     private plasmaCrawlProfile importProfileDB;
     private plasmaCrawlNURL importNurlDB;
     private int importStartSize;
@@ -129,8 +129,8 @@
                     this.log.logInfo("Starting to import '" + 
this.importNurlDB.size() + "' entries not available in any stack.");
                 }
                 
-                // getting an interator and loop through the URL entries
-                Iterator entryIter = (stackTypes[stackType] == -1) ? 
this.importNurlDB.iterator(stackType) : null;
+                // getting an iterator and loop through the URL entries
+                Iterator<plasmaCrawlEntry> entryIter = (stackTypes[stackType] 
== -1) ? this.importNurlDB.iterator(stackType) : null;
                 while (true) {
                     
                     String nextHash = null;
@@ -147,7 +147,7 @@
                             if (!entryIter.hasNext()) break;
                             
                             this.urlCount++;
-                            nextEntry = (plasmaCrawlEntry) entryIter.next();
+                            nextEntry = entryIter.next();
                             nextHash = nextEntry.url().hash();
                         }
                     } catch (IOException e) {

Modified: trunk/source/de/anomic/plasma/plasmaCrawlProfile.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlProfile.java       2008-01-24 
21:37:06 UTC (rev 4396)
+++ trunk/source/de/anomic/plasma/plasmaCrawlProfile.java       2008-01-24 
22:49:00 UTC (rev 4397)
@@ -70,7 +70,7 @@
         this.profileTableFile = file;
         this.preloadTime = preloadTime;
         profileTableFile.getParentFile().mkdirs();
-        kelondroDyn dyn = new kelondroDyn(profileTableFile, true, true, 
preloadTime, yacySeedDB.commonHashLength, 2000, '#', 
kelondroNaturalOrder.naturalOrder, true, false, true);
+        kelondroDyn dyn = new kelondroDyn(profileTableFile, true, true, 
preloadTime, yacySeedDB.commonHashLength, 2000, '#', 
kelondroNaturalOrder.naturalOrder, false, false, true);
         profileTable = new kelondroMapObjects(dyn, 500);
     }
     
@@ -79,7 +79,7 @@
         if (profileTable != null) profileTable.close();
         if (!(profileTableFile.delete())) throw new RuntimeException("cannot 
delete crawl profile database");
         profileTableFile.getParentFile().mkdirs();
-        kelondroDyn dyn = new kelondroDyn(profileTableFile, true, true, 
preloadTime, yacySeedDB.commonHashLength, 2000, '#', 
kelondroNaturalOrder.naturalOrder, true, false, true);
+        kelondroDyn dyn = new kelondroDyn(profileTableFile, true, true, 
preloadTime, yacySeedDB.commonHashLength, 2000, '#', 
kelondroNaturalOrder.naturalOrder, false, false, true);
         profileTable = new kelondroMapObjects(dyn, 500);
     }
     

Modified: trunk/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java     2008-01-24 
21:37:06 UTC (rev 4396)
+++ trunk/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java     2008-01-24 
22:49:00 UTC (rev 4397)
@@ -72,7 +72,7 @@
         this.robotsTableFile = robotsTableFile;
         this.preloadTime = preloadTime;
         robotsTableFile.getParentFile().mkdirs();
-        robotsTable = new kelondroMapObjects(new kelondroDyn(robotsTableFile, 
true, true, preloadTime, 256, 512, '_', kelondroNaturalOrder.naturalOrder, 
true, false, true), 100);
+        robotsTable = new kelondroMapObjects(new kelondroDyn(robotsTableFile, 
true, true, preloadTime, 256, 512, '_', kelondroNaturalOrder.naturalOrder, 
false, false, true), 100);
     }
     
     private void resetDatabase() {
@@ -80,7 +80,7 @@
         if (robotsTable != null) robotsTable.close();
         if (!(robotsTableFile.delete())) throw new RuntimeException("cannot 
delete robots.txt database");
         robotsTableFile.getParentFile().mkdirs();
-        robotsTable = new kelondroMapObjects(new kelondroDyn(robotsTableFile, 
true, true, preloadTime, 256, 512, '_', kelondroNaturalOrder.naturalOrder, 
true, false, true), 100);
+        robotsTable = new kelondroMapObjects(new kelondroDyn(robotsTableFile, 
true, true, preloadTime, 256, 512, '_', kelondroNaturalOrder.naturalOrder, 
false, false, true), 100);
     }
     
     public void close() {
@@ -103,7 +103,7 @@
     
     public Entry getEntry(String hostName) {
         try {
-            Map record = this.robotsTable.getMap(hostName);
+            Map<String, String> record = this.robotsTable.getMap(hostName);
             if (record == null) return null;
             return new Entry(hostName, record);
         } catch (kelondroException e) {
@@ -114,14 +114,16 @@
     
     public Entry addEntry(
                String hostName, 
-               ArrayList disallowPathList, 
+               ArrayList<String> disallowPathList, 
                Date loadedDate, 
                Date modDate, 
                String eTag, 
                String sitemap,
                Integer crawlDelay
     ) {
-        Entry entry = new 
Entry(hostName,disallowPathList,loadedDate,modDate,eTag,sitemap,crawlDelay);
+        Entry entry = new Entry(
+                hostName, disallowPathList, loadedDate, modDate,
+                eTag, sitemap, crawlDelay);
         addEntry(entry);
         return entry;
     }
@@ -129,7 +131,7 @@
     public String addEntry(Entry entry) {
         // writes a new page and returns key
         try {
-            this.robotsTable.set(entry.hostName,entry.mem);
+            this.robotsTable.set(entry.hostName, entry.mem);
             return entry.hostName;
         } catch (IOException e) {
             return null;
@@ -145,16 +147,16 @@
         public static final String CRAWL_DELAY = "crawlDelay";
         
         // this is a simple record structure that hold all properties of a 
single crawl start
-        Map mem;
-        private LinkedList disallowPathList;
+        Map<String, String> mem;
+        private LinkedList<String> disallowPathList;
         String hostName;
         
-        public Entry(String hostName, Map mem) {
+        public Entry(String hostName, Map<String, String> mem) {
             this.hostName = hostName.toLowerCase();
             this.mem = mem; 
             
             if (this.mem.containsKey(DISALLOW_PATH_LIST)) {
-                this.disallowPathList = new LinkedList();
+                this.disallowPathList = new LinkedList<String>();
                 String csPl = (String) this.mem.get(DISALLOW_PATH_LIST);
                 if (csPl.length() > 0){
                     String[] pathArray = csPl.split(ROBOTS_DB_PATH_SEPARATOR);
@@ -163,13 +165,13 @@
                     }
                 }
             } else {
-                this.disallowPathList = new LinkedList();
+                this.disallowPathList = new LinkedList<String>();
             }
         }  
         
         public Entry(
                 String hostName, 
-                ArrayList disallowPathList, 
+                ArrayList<String> disallowPathList, 
                 Date loadedDate,
                 Date modDate,
                 String eTag,
@@ -179,9 +181,9 @@
             if ((hostName == null) || (hostName.length() == 0)) throw new 
IllegalArgumentException("The hostname is missing");
             
             this.hostName = hostName.trim().toLowerCase();
-            this.disallowPathList = new LinkedList();
+            this.disallowPathList = new LinkedList<String>();
             
-            this.mem = new HashMap(5);
+            this.mem = new HashMap<String, String>(5);
             if (loadedDate != null) 
this.mem.put(LOADED_DATE,Long.toString(loadedDate.getTime()));
             if (modDate != null) 
this.mem.put(MOD_DATE,Long.toString(modDate.getTime()));
             if (eTag != null) this.mem.put(ETAG,eTag);
@@ -259,9 +261,9 @@
             else  path = path.replaceAll(ROBOTS_DB_PATH_SEPARATOR,"%3B");
             
             
-            Iterator pathIter = this.disallowPathList.iterator();
+            Iterator<String> pathIter = this.disallowPathList.iterator();
             while (pathIter.hasNext()) {
-                String nextPath = (String) pathIter.next();
+                String nextPath = pathIter.next();
                 // allow rule
                 if (nextPath.startsWith("!") && nextPath.length() > 1 && 
path.startsWith(nextPath.substring(1))) {
                     return false;

Modified: trunk/source/de/anomic/plasma/plasmaHTCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaHTCache.java    2008-01-24 21:37:06 UTC 
(rev 4396)
+++ trunk/source/de/anomic/plasma/plasmaHTCache.java    2008-01-24 22:49:00 UTC 
(rev 4397)
@@ -93,7 +93,7 @@
 
 public final class plasmaHTCache {
     
-    public static final String DB_NAME = "responseHeader1.db";
+    public static final String DB_NAME = "responseHeader2.db";
     
     private static final int stackLimit = 150; // if we exceed that limit, we 
do not check idle
     public  static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of 
a day
@@ -307,7 +307,7 @@
     private static void openResponseHeaderDB(long preloadTime) {
         // open the response header database
         File dbfile = new File(cachePath, DB_NAME);
-        responseHeaderDB = new kelondroMapObjects(new kelondroDyn(dbfile, 
true, true, preloadTime, yacySeedDB.commonHashLength, 150, '#', 
kelondroBase64Order.enhancedCoder, true, false, true), 500);
+        responseHeaderDB = new kelondroMapObjects(new kelondroDyn(dbfile, 
true, true, preloadTime, yacySeedDB.commonHashLength, 150, '#', 
kelondroBase64Order.enhancedCoder, false, false, true), 500);
     }
     
     private static void deleteOldHTCache(File directory) {

Added: trunk/source/de/anomic/plasma/plasmaSearchAPI.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchAPI.java  2008-01-24 21:37:06 UTC 
(rev 4396)
+++ trunk/source/de/anomic/plasma/plasmaSearchAPI.java  2008-01-24 22:49:00 UTC 
(rev 4397)
@@ -0,0 +1,206 @@
+// plasmaSearchAPI.java
+// -----------------------
+// (C) 2008 by Michael Peter Christen; [EMAIL PROTECTED], Frankfurt a. M., 
Germany
+// first published 2008 on http://yacy.net
+// 
+// This is a part of YaCy, a peer-to-peer based web search engine
+// 
+// $LastChangedDate: 2007-11-14 01:15:28 +0000 (Mi, 14 Nov 2007) $
+// $LastChangedRevision: 4216 $
+// $LastChangedBy: orbiter $
+// 
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+package de.anomic.plasma;
+
+import java.util.Date;
+import java.util.Iterator;
+
+import de.anomic.data.listManager;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexURLEntry;
+import de.anomic.kelondro.kelondroBitfield;
+import de.anomic.plasma.urlPattern.plasmaURLPattern;
+import de.anomic.server.serverDate;
+import de.anomic.server.serverObjects;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
+import de.anomic.yacy.yacyURL;
+
+public class plasmaSearchAPI {
+    // collection of static methods for a search servlet. Exists only to 
prevent that the same processes are defined more than once.
+    
+
+    public static kelondroBitfield compileFlags(serverObjects post) {
+        kelondroBitfield b = new kelondroBitfield(4);
+        if (post.get("allurl", "").equals("on")) return null;
+        if (post.get("flags") != null) {
+            if (post.get("flags","").length() == 0) return null;
+            return new kelondroBitfield(4, (String) post.get("flags"));
+        }
+        if (post.get("description", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_description, true);
+        if (post.get("title", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_title, true);
+        if (post.get("creator", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_creator, true);
+        if (post.get("subject", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_subject, true);
+        if (post.get("url", "").equals("on")) 
b.set(indexRWIEntry.flag_app_dc_identifier, true);
+        if (post.get("emphasized", "").equals("on")) 
b.set(indexRWIEntry.flag_app_emphasized, true);
+        if (post.get("image", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasimage, true);
+        if (post.get("audio", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasaudio, true);
+        if (post.get("video", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasvideo, true);
+        if (post.get("app", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_hasapp, true);
+        if (post.get("indexof", "").equals("on")) 
b.set(plasmaCondenser.flag_cat_indexof, true);
+        return b;
+    }
+    
+    public static void listHosts(serverObjects prop, String startHash) {
+        // list known hosts
+        yacySeed seed;
+        int hc = 0;
+        prop.put("searchresult_keyhash", startHash);
+        if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) {
+            Iterator<yacySeed> e = 
yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash);
+            while (e.hasNext()) {
+                seed = (yacySeed) e.next();
+                if (seed != null) {
+                    prop.put("searchresult_hosts_" + hc + "_hosthash", 
seed.hash);
+                    prop.putHTML("searchresult_hosts_" + hc + "_hostname", 
seed.hash + " " + seed.get(yacySeed.NAME, "nameless"));
+                    hc++;
+                }
+            }
+            prop.put("searchresult_hosts", hc);
+        } else {
+            prop.put("searchresult_hosts", "0");
+        }
+    }
+
+    public static plasmaSearchRankingProcess genSearchresult(serverObjects 
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int 
sortorder, boolean fetchURLs) {
+        plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, 
sb.getRanking(), filter);
+        plasmaSearchRankingProcess ranked = new 
plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE);
+        ranked.execQuery(fetchURLs);
+        
+        if (ranked.filteredCount() == 0) {
+            prop.put("searchresult", 2);
+            prop.put("searchresult_wordhash", keyhash);
+        } else {
+            prop.put("searchresult", 3);
+            prop.put("searchresult_allurl", ranked.filteredCount());
+            prop.put("searchresult_description", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_description]);
+            prop.put("searchresult_title", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_title]);
+            prop.put("searchresult_creator", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_creator]);
+            prop.put("searchresult_subject", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_subject]);
+            prop.put("searchresult_url", 
ranked.flagCount()[indexRWIEntry.flag_app_dc_identifier]);
+            prop.put("searchresult_emphasized", 
ranked.flagCount()[indexRWIEntry.flag_app_emphasized]);
+            prop.put("searchresult_image", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasimage]);
+            prop.put("searchresult_audio", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasaudio]);
+            prop.put("searchresult_video", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasvideo]);
+            prop.put("searchresult_app", 
ranked.flagCount()[plasmaCondenser.flag_cat_hasapp]);
+            prop.put("searchresult_indexof", 
ranked.flagCount()[plasmaCondenser.flag_cat_indexof]);
+        }
+        return ranked;
+    }
+    
+    public static void genURLList(serverObjects prop, String keyhash, String 
keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int 
maxlines, int ordering) {
+        // search for a word hash and generate a list of url links
+        prop.put("genUrlList_keyHash", keyhash);
+        
+        if (ranked.filteredCount() == 0) {
+            prop.put("genUrlList", 1);
+            prop.put("genUrlList_count", 0);
+            prop.put("searchresult", 2);
+        } else {
+            prop.put("genUrlList", 2);
+            prop.put("searchresult", 3);
+            prop.put("genUrlList_flags", (flags == null) ? "" : 
flags.exportB64());
+            prop.put("genUrlList_lines", maxlines);
+            prop.put("genUrlList_ordering", ordering);
+            int i = 0;
+            yacyURL url;
+            indexURLEntry entry;
+            String us;
+            long rn = -1;
+            while ((ranked.size() > 0) && ((entry = ranked.bestURL(false)) != 
null)) {
+                if ((entry == null) || (entry.comp() == null)) continue;
+                url = entry.comp().url();
+                if (url == null) continue;
+                us = url.toNormalform(false, false);
+                if (rn == -1) rn = entry.ranking();
+                prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
+                prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
+                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", 
entry.word().urlHash());
+                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", 
keystring);
+                prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", 
keyhash);
+                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", 
us);
+                prop.put("genUrlList_urlList_"+i+"_urlExists_urlStringShort", 
(us.length() > 40) ? (us.substring(0, 20) + "<br>" + us.substring(20,  40) + 
"...") : ((us.length() > 30) ? (us.substring(0, 20) + "<br>" + 
us.substring(20)) : us));
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ranking", 
(entry.ranking() - rn));
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", 
yacyURL.domLengthEstimation(entry.hash()));
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", 
plasmaSearchRankingProcess.ybr(entry.hash()));
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", 
ranked.getOrder().authority(entry.hash()));
+                prop.put("genUrlList_urlList_"+i+"_urlExists_date", 
serverDate.formatShortDay(new Date(entry.word().lastModified())));
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", 
entry.word().wordsintitle());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", 
entry.word().wordsintext());
+                
prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrasesintext", 
entry.word().phrasesintext());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_llocal", 
entry.word().llocal());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_lother", 
entry.word().lother());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_hitcount", 
entry.word().hitcount());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_worddistance", 
entry.word().worddistance());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_pos", 
entry.word().posintext());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrase", 
entry.word().posofphrase());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_posinphrase", 
entry.word().posinphrase());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_urlcomps", 
entry.word().urlcomps());
+                prop.putNum("genUrlList_urlList_"+i+"_urlExists_urllength", 
entry.word().urllength());
+                prop.put("genUrlList_urlList_"+i+"_urlExists_props",
+                        
((entry.word().flags().get(plasmaCondenser.flag_cat_indexof)) ? "appears on 
index page, " : "") +
+                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasimage)) ? "contains 
images, " : "") +
+                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains 
audio, " : "") +
+                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains 
video, " : "") +
+                        
((entry.word().flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains 
applications, " : "") +
+                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_identifier)) ? "appears in 
url, " : "") +
+                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_title)) ? "appears in 
title, " : "") +
+                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_creator)) ? "appears in 
author, " : "") +
+                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_subject)) ? "appears in 
subject, " : "") +
+                        
((entry.word().flags().get(indexRWIEntry.flag_app_dc_description)) ? "appears 
in description, " : "") +
+                        
((entry.word().flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears 
emphasized, " : "") +
+                        ((yacyURL.probablyRootURL(entry.word().urlHash())) ? 
"probably root url" : "")
+                );
+                if 
(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
+                    
prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1");
+                }
+                i++;
+                if ((maxlines >= 0) && (i >= maxlines)) break;
+            }
+            Iterator<String> iter = ranked.miss(); // iterates url hash strings
+            while (iter.hasNext()) {
+                us = (String) iter.next();
+                prop.put("genUrlList_urlList_"+i+"_urlExists", "0");
+                prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
+                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", 
us);
+                i++;
+            }
+            prop.put("genUrlList_urlList", i);
+            prop.putHTML("genUrlList_keyString", keystring);
+            prop.put("genUrlList_count", i);
+            putBlacklists(prop, 
listManager.getDirListing(listManager.listsPath));
+        }
+    }
+    
+    public static void putBlacklists(serverObjects prop, String[] lists) {
+        prop.put("genUrlList_blacklists", lists.length);
+        for (int i=0; i<lists.length; i++)
+            prop.put("genUrlList_blacklists_" + i + "_name", lists[i]);
+    }
+}

Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2008-01-24 
21:37:06 UTC (rev 4396)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2008-01-24 
22:49:00 UTC (rev 4397)
@@ -856,15 +856,15 @@
      * 
      * @see plasmaSwitchboard#DBPATH for the folder this file lies in
      */
-    public static final String DBFILE_ACTIVE_CRAWL_PROFILES    = 
"crawlProfilesActive.db";
-    public static final String DBFILE_PASSIVE_CRAWL_PROFILES    = 
"crawlProfilesPassive.db";
+    public static final String DBFILE_ACTIVE_CRAWL_PROFILES    = 
"crawlProfilesActive1.db";
+    public static final String DBFILE_PASSIVE_CRAWL_PROFILES    = 
"crawlProfilesPassive1.db";
     /**
      * <p><code>public static final String 
<strong>DBFILE_CRAWL_ROBOTS</strong> = "crawlRobotsTxt.db"</code></p>
      * <p>Name of the file containing the database holding all 
<code>robots.txt</code>-entries of the lately crawled domains</p>
      * 
      * @see plasmaSwitchboard#DBPATH for the folder this file lies in
      */
-    public static final String DBFILE_CRAWL_ROBOTS      = "crawlRobotsTxt.db";
+    public static final String DBFILE_CRAWL_ROBOTS      = "crawlRobotsTxt1.db";
     /**
      * <p><code>public static final String <strong>DBFILE_USER</strong> = 
"DATA/SETTINGS/user.db"</code></p>
      * <p>Path to the user-DB, beginning from the YaCy-installation's 
top-folder. It holds all rights the created

Modified: trunk/source/de/anomic/yacy/yacyCore.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyCore.java   2008-01-24 21:37:06 UTC (rev 
4396)
+++ trunk/source/de/anomic/yacy/yacyCore.java   2008-01-24 22:49:00 UTC (rev 
4397)
@@ -134,14 +134,13 @@
         long memDHT_time = 
Long.parseLong(switchboard.getConfig("ramCacheDHT_time", "1000"));
         seedDB = new yacySeedDB(
                 sb,
-                new File(yacyDBPath, "seed1.new.db"),
-                new File(yacyDBPath, "seed1.old.db"),
-                new File(yacyDBPath, "seed1.pot.db"),
+                new File(yacyDBPath, "seed2.new.db"),
+                new File(yacyDBPath, "seed2.old.db"),
+                new File(yacyDBPath, "seed2.pot.db"),
                 memDHT_time);
 
         // create or init news database
-        long memNews_time = 
Long.parseLong(switchboard.getConfig("ramCacheNews_time", "1000"));
-        newsPool = new yacyNewsPool(yacyDBPath, memNews_time);
+        newsPool = new yacyNewsPool(yacyDBPath);
 
         loadSeedUploadMethods();
 

Modified: trunk/source/de/anomic/yacy/yacyNewsDB.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyNewsDB.java 2008-01-24 21:37:06 UTC (rev 
4396)
+++ trunk/source/de/anomic/yacy/yacyNewsDB.java 2008-01-24 22:49:00 UTC (rev 
4397)
@@ -50,30 +50,28 @@
 import java.util.Iterator;
 
 import de.anomic.kelondro.kelondroBase64Order;
-import de.anomic.kelondro.kelondroCache;
+import de.anomic.kelondro.kelondroEcoTable;
 import de.anomic.kelondro.kelondroException;
 import de.anomic.kelondro.kelondroIndex;
 import de.anomic.kelondro.kelondroRow;
-import de.anomic.kelondro.kelondroTree;
 import de.anomic.server.serverCodings;
 import de.anomic.server.serverDate;
 
 public class yacyNewsDB {
 
     private File path;
-    private long preloadTime;
     protected kelondroIndex news;
 
-    public yacyNewsDB(File path, long preloadTime) {
+    public yacyNewsDB(File path) {
         this.path = path;
-        this.preloadTime = preloadTime;
-        this.news = new kelondroCache(kelondroTree.open(path, true, 
preloadTime, yacyNewsRecord.rowdef));
+        this.news = new kelondroEcoTable(path, yacyNewsRecord.rowdef, 
kelondroEcoTable.tailCacheUsageAuto, 10, 0);
+        //this.news = new kelondroCache(kelondroTree.open(path, true, 
preloadTime, yacyNewsRecord.rowdef));
     }
 
     private void resetDB() {
         try {close();} catch (Exception e) {}
         if (path.exists()) path.delete();
-        this.news = new kelondroCache(kelondroTree.open(path, true, 
preloadTime, yacyNewsRecord.rowdef));
+        this.news = new kelondroEcoTable(path, yacyNewsRecord.rowdef, 
kelondroEcoTable.tailCacheUsageAuto, 10, 0);
     }
     
     public void close() {

Modified: trunk/source/de/anomic/yacy/yacyNewsPool.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyNewsPool.java       2008-01-24 21:37:06 UTC 
(rev 4396)
+++ trunk/source/de/anomic/yacy/yacyNewsPool.java       2008-01-24 22:49:00 UTC 
(rev 4397)
@@ -265,8 +265,8 @@
     private int maxDistribution;
     
     
-    public yacyNewsPool(File yacyDBPath, long preloadTime) {
-        newsDB = new yacyNewsDB(new File(yacyDBPath, "news1.db"), preloadTime);
+    public yacyNewsPool(File yacyDBPath) {
+        newsDB = new yacyNewsDB(new File(yacyDBPath, "news2.db"));
         outgoingNews  = new yacyNewsQueue(new File(yacyDBPath, 
"newsOut1.stack"), newsDB);
         publishedNews = new yacyNewsQueue(new File(yacyDBPath, 
"newsPublished1.stack"), newsDB);
         incomingNews  = new yacyNewsQueue(new File(yacyDBPath, 
"newsIn1.stack"), newsDB);

Modified: trunk/yacy.init
===================================================================
--- trunk/yacy.init     2008-01-24 21:37:06 UTC (rev 4396)
+++ trunk/yacy.init     2008-01-24 22:49:00 UTC (rev 4397)
@@ -627,9 +627,6 @@
 # ram cache for blog.db
 ramCacheBlog_time    =      500
 
-# ram cache for news1.db
-ramCacheNews_time    =     1000
-
 # ram cache for robotsTxt.db
 ramCacheRobots_time  =        0
 

_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an