Author: orbiter
Date: 2008-01-30 22:58:30 +0100 (Wed, 30 Jan 2008)
New Revision: 4420

Modified:
   trunk/htroot/Bookmarks.java
   trunk/htroot/CrawlResults.java
   trunk/htroot/IndexControlRWIs_p.java
   trunk/htroot/IndexControlURLs_p.java
   trunk/htroot/ViewFile.java
   trunk/htroot/js/yacysearch.js
   trunk/htroot/yacy/search.java
   trunk/htroot/yacy/urls.java
   trunk/htroot/yacysearch.html
   trunk/htroot/yacysearch.java
   trunk/htroot/yacysearchitem.html
   trunk/htroot/yacysearchitem.java
   trunk/source/de/anomic/index/indexURLEntry.java
   trunk/source/de/anomic/kelondro/kelondroEcoTable.java
   trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
   trunk/source/de/anomic/plasma/plasmaSearchAPI.java
   trunk/source/de/anomic/plasma/plasmaSearchEvent.java
   trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
   trunk/source/de/anomic/plasma/plasmaSnippetCache.java
   trunk/source/de/anomic/plasma/plasmaSwitchboard.java
   trunk/source/de/anomic/yacy/yacyClient.java
Log:
- more dublin core naming of page metadata
- better presentation of result counters in search results

Modified: trunk/htroot/Bookmarks.java
===================================================================
--- trunk/htroot/Bookmarks.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/Bookmarks.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -203,10 +203,10 @@
                             document = 
plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true);
                             prop.put("mode_edit", "0"); // create mode
                             prop.put("mode_url", 
comp.url().toNormalform(false, true));
-                            prop.putHTML("mode_title", comp.title());
-                            prop.putHTML("mode_description", (document == 
null) ? comp.title(): document.dc_title());
-                            prop.putHTML("mode_author", comp.author());
-                            prop.putHTML("mode_tags", (document == null) ? 
comp.tags() : document.dc_subject(','));
+                            prop.putHTML("mode_title", comp.dc_title());
+                            prop.putHTML("mode_description", (document == 
null) ? comp.dc_title(): document.dc_title());
+                            prop.putHTML("mode_author", comp.dc_creator());
+                            prop.putHTML("mode_tags", (document == null) ? 
comp.dc_subject() : document.dc_subject(','));
                             prop.putHTML("mode_path","");
                             prop.put("mode_public", "0");
                             prop.put("mode_feed", "0"); //TODO: check if it IS 
a feed

Modified: trunk/htroot/CrawlResults.java
===================================================================
--- trunk/htroot/CrawlResults.java      2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/CrawlResults.java      2008-01-30 21:58:30 UTC (rev 4420)
@@ -217,11 +217,11 @@
                         } else {
                             prop.put("table_indexed_" + cnt + 
"_showTitle_available", "1");
 
-                            if (comp.title() == null || 
comp.title().trim().length() == 0)
+                            if (comp.dc_title() == null || 
comp.dc_title().trim().length() == 0)
                                 prop.put("table_indexed_" + cnt + 
"_showTitle_available_nodescr", "0");
                             else
                                 prop.put("table_indexed_" + cnt + 
"_showTitle_available_nodescr", "1");
-                            prop.putHTML("table_indexed_" + cnt + 
"_showTitle_available_nodescr_urldescr", comp.title());
+                            prop.putHTML("table_indexed_" + cnt + 
"_showTitle_available_nodescr_urldescr", comp.dc_title());
 
                             prop.put("table_indexed_" + cnt + 
"_showTitle_available_cachepath", cachepath);
                             prop.putHTML("table_indexed_" + cnt + 
"_showTitle_available_urltitle", urlstr);

Modified: trunk/htroot/IndexControlRWIs_p.java
===================================================================
--- trunk/htroot/IndexControlRWIs_p.java        2008-01-30 00:15:43 UTC (rev 
4419)
+++ trunk/htroot/IndexControlRWIs_p.java        2008-01-30 21:58:30 UTC (rev 
4420)
@@ -89,7 +89,7 @@
             if (post.containsKey("keystringsearch")) {
                 keyhash = plasmaCondenser.word2hash(keystring);
                 prop.put("keyhash", keyhash);
-                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
                 if (ranking.filteredCount() == 0) {
                     prop.put("searchresult", 1);
                     prop.put("searchresult_word", keystring);
@@ -100,7 +100,7 @@
                 if (keystring.length() == 0 || 
!plasmaCondenser.word2hash(keystring).equals(keyhash)) {
                     prop.put("keystring", "<not possible to compute word 
from hash>");
                 }
-                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
                 if (ranking.filteredCount() == 0) {
                     prop.put("searchresult", 2);
                     prop.put("searchresult_wordhash", keyhash);
@@ -159,7 +159,7 @@
                 }
                 kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
                 int count = (post.get("lines", "all").equals("all")) ? -1 : 
post.getInt("lines", -1);
-                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder, true);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder);
                 plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, 
flags, count, sortorder);
             }
 

Modified: trunk/htroot/IndexControlURLs_p.java
===================================================================
--- trunk/htroot/IndexControlURLs_p.java        2008-01-30 00:15:43 UTC (rev 
4419)
+++ trunk/htroot/IndexControlURLs_p.java        2008-01-30 21:58:30 UTC (rev 
4420)
@@ -241,7 +241,7 @@
         prop.put("genUrlProfile", "2");
         prop.putHTML("genUrlProfile_urlNormalform", 
comp.url().toNormalform(false, true));
         prop.put("genUrlProfile_urlhash", urlhash);
-        prop.put("genUrlProfile_urlDescr", comp.title());
+        prop.put("genUrlProfile_urlDescr", comp.dc_title());
         prop.put("genUrlProfile_moddate", entry.moddate().toString());
         prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
         prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);

Modified: trunk/htroot/ViewFile.java
===================================================================
--- trunk/htroot/ViewFile.java  2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/ViewFile.java  2008-01-30 21:58:30 UTC (rev 4420)
@@ -124,7 +124,7 @@
                 return prop;
             }
             url = comp.url();
-            descr = comp.title();
+            descr = comp.dc_title();
             urlEntry.wordCount();
             size = urlEntry.size();
             pre = urlEntry.flags().get(plasmaCondenser.flag_cat_indexof);

Modified: trunk/htroot/js/yacysearch.js
===================================================================
--- trunk/htroot/js/yacysearch.js       2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/js/yacysearch.js       2008-01-30 21:58:30 UTC (rev 4420)
@@ -70,9 +70,12 @@
   }
 }
 
-function statistics(offset, items, global, total) {
+function statistics(offset, itemscount, totalcount, localResourceSize, 
remoteResourceSize, remoteIndexCount, remotePeerCount) {
   document.getElementById("resultsOffset").firstChild.nodeValue = offset;
-  document.getElementById("itemscount").firstChild.nodeValue = items;
-  document.getElementById("globalcount").firstChild.nodeValue = global;
-  document.getElementById("totalcount").firstChild.nodeValue = total;
+  document.getElementById("itemscount").firstChild.nodeValue = itemscount;
+  document.getElementById("totalcount").firstChild.nodeValue = totalcount;
+  document.getElementById("localResourceSize").firstChild.nodeValue = 
localResourceSize;
+  document.getElementById("remoteResourceSize").firstChild.nodeValue = 
remoteResourceSize;
+  document.getElementById("remoteIndexCount").firstChild.nodeValue = 
remoteIndexCount;
+  document.getElementById("remotePeerCount").firstChild.nodeValue = 
remotePeerCount;
 }
\ No newline at end of file

Modified: trunk/htroot/yacy/search.java
===================================================================
--- trunk/htroot/yacy/search.java       2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacy/search.java       2008-01-30 21:58:30 UTC (rev 4420)
@@ -183,7 +183,7 @@
             snippetComputationAllTime = theSearch.getSnippetComputationTime();
             
             // set statistic details of search result and find best result 
index set
-            if (theSearch.getLocalCount() == 0) {
+            if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
                 prop.put("indexcount", "");
                 prop.put("joincount", "0");
             } else {
@@ -207,11 +207,11 @@
                 }
                 prop.put("indexcount", indexcount.toString());
                 
-                if (theSearch.getLocalCount() == 0) {
+                if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
                     joincount = 0;
                     prop.put("joincount", "0");
                 } else {
-                    joincount = theSearch.getLocalCount();
+                    joincount = 
theSearch.getRankingResult().getLocalResourceSize();
                     prop.put("joincount", Integer.toString(joincount));
                     accu = theSearch.completeResults(duetime);
                 }

Modified: trunk/htroot/yacy/urls.java
===================================================================
--- trunk/htroot/yacy/urls.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacy/urls.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -104,11 +104,11 @@
                 referrer = sb.getURL(entry.referrerHash());
                 // create RSS entry
                 comp = entry.comp();
-                prop.put("item_" + c + "_title", comp.title());
+                prop.put("item_" + c + "_title", comp.dc_title());
                 prop.putHTML("item_" + c + "_link", 
comp.url().toNormalform(true, false));
                 prop.putHTML("item_" + c + "_referrer", (referrer == null) ? 
"" : referrer.toNormalform(true, false));
-                prop.putHTML("item_" + c + "_description", comp.title());
-                prop.put("item_" + c + "_author", comp.author());
+                prop.putHTML("item_" + c + "_description", comp.dc_title());
+                prop.put("item_" + c + "_author", comp.dc_creator());
                 prop.put("item_" + c + "_pubDate", 
serverDate.formatShortSecond(entry.moddate()));
                 prop.put("item_" + c + "_guid", entry.hash());
                 c++;

Modified: trunk/htroot/yacysearch.html
===================================================================
--- trunk/htroot/yacysearch.html        2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearch.html        2008-01-30 21:58:30 UTC (rev 4420)
@@ -99,7 +99,7 @@
        <p>No Results. (length of search words must be at least 3 
characters)</p>
        ::
        <div id="results"></div>
-       <span id="resCounter" style="display: inline;"><strong 
id="resultsOffset">#[offset]#</strong>-<strong 
id="itemscount">#[linkcount]#</strong> results from a total number of <strong 
id="totalcount">#[totalcount]#</strong> known#(globalresults)#.::, <strong 
id="globalcount">#[globalcount]#</strong> links from other YaCy 
peers.#(/globalresults)#</span>
+       <span id="resCounter" style="display: inline;"><strong 
id="resultsOffset">#[offset]#</strong>-<strong 
id="itemscount">#[itemscount]#</strong> results from a total number of <strong 
id="totalcount">#[totalcount]#</strong> known#(globalresults)#.:: (<strong 
id="localResourceSize">#[localResourceSize]#</strong> local, <strong 
id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong 
id="remoteIndexCount">#[remoteIndexCount]#</strong> links from <strong 
id="remotePeerCount">#[remotePeerCount]#</strong> other YaCy 
peers.#(/globalresults)#</span>
        <span id="resNav" style="display: inline;">#[resnav]#</span>
        ::
        <p>Searching the web with this peer is disabled for unauthorized users. 
Please <a href="Status.html?login=">log in</a> as administrator to use the 
search function</p>

Modified: trunk/htroot/yacysearch.java
===================================================================
--- trunk/htroot/yacysearch.java        2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearch.java        2008-01-30 21:58:30 UTC (rev 4420)
@@ -240,8 +240,8 @@
                         // create a news message
                         HashMap<String, String> map = new HashMap<String, 
String>();
                         map.put("url", comp.url().toNormalform(false, 
true).replace(',', '|'));
-                        map.put("title", comp.title().replace(',', ' '));
-                        map.put("description", ((document == null) ? 
comp.title() : document.dc_title()).replace(',', ' '));
+                        map.put("title", comp.dc_title().replace(',', ' '));
+                        map.put("description", ((document == null) ? 
comp.dc_title() : document.dc_title()).replace(',', ' '));
                         map.put("author", ((document == null) ? "" : 
document.dc_creator()));
                         map.put("tags", ((document == null) ? "" : 
document.dc_subject(' ')));
                         
yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_SURFTIPP_ADD,
 map));
@@ -306,12 +306,12 @@
 
             // log
             serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + 
theQuery.queryString + " - " +
-                    (theSearch.getLocalCount() + theSearch.getGlobalCount()) + 
" links found, " +
+                    (theSearch.getRankingResult().getLocalResourceSize() + 
theSearch.getRankingResult().getRemoteResourceSize()) + " links found, " +
                     ((System.currentTimeMillis() - timestamp) / 1000) + " 
seconds");
 
             // prepare search statistics
             Long trackerHandle = new Long(System.currentTimeMillis());
-            HashMap<String, Object> searchProfile = 
theQuery.resultProfile(theSearch.getLocalCount() + theSearch.getGlobalCount(), 
System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), 
theSearch.getSnippetComputationTime());
+            HashMap<String, Object> searchProfile = 
theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + 
theSearch.getRankingResult().getRemoteResourceSize(), 
System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), 
theSearch.getSnippetComputationTime());
             searchProfile.put("querystring", theQuery.queryString);
             searchProfile.put("time", trackerHandle);
             searchProfile.put("host", client);
@@ -323,13 +323,16 @@
             sb.localSearchTracker.put(client, handles);
         
             prop = new serverObjects();
-            prop.put("num-results_totalcount", 
yFormatter.number(theSearch.getLocalCount() + theSearch.getGlobalCount(), 
!rss));
-            prop.put("num-results_globalresults", "1");
-            prop.put("num-results_globalresults_globalcount", 
yFormatter.number(theSearch.getGlobalCount(), !rss));
             prop.put("num-results_offset", offset);
-            prop.put("num-results_linkcount", "0");
+            prop.put("num-results_itemscount", "0");
             prop.put("num-results_itemsPerPage", itemsPerPage);
-
+            prop.put("num-results_totalcount", 
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() + 
theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+            prop.put("num-results_globalresults", (globalsearch) ? "1" : "0");
+            prop.put("num-results_globalresults_localResourceSize", 
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss));
+            prop.put("num-results_globalresults_remoteResourceSize", 
yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+            prop.put("num-results_globalresults_remoteIndexCount", 
yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss));
+            prop.put("num-results_globalresults_remotePeerCount", 
yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss));
+            
             // compose page navigation
             StringBuffer resnav = new StringBuffer();
             int thispage = offset / theQuery.displayResults();
@@ -337,7 +340,7 @@
                 resnav.append(navurla(thispage - 1, display, theQuery));
                 resnav.append("<strong>&lt;</strong></a>&nbsp;");
             }
-            int numberofpages = Math.min(10, Math.min(thispage + 2, 
(theSearch.getGlobalCount() + theSearch.getLocalCount()) / 
theQuery.displayResults()));
+            int numberofpages = Math.min(10, Math.min(thispage + 2, 
(theSearch.getRankingResult().getRemoteResourceSize() + 
theSearch.getRankingResult().getLocalResourceSize()) / 
theQuery.displayResults()));
             for (int i = 0; i < numberofpages; i++) {
                 if (i == thispage) {
                     resnav.append("<strong>");

Modified: trunk/htroot/yacysearchitem.html
===================================================================
--- trunk/htroot/yacysearchitem.html    2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearchitem.html    2008-01-30 21:58:30 UTC (rev 4420)
@@ -62,7 +62,7 @@
 #(/rssreferences)#
 #(dynamic)#::
 <script type="text/javascript">
-statistics("#[offset]#", "#[items]#", "#[global]#", "#[total]#");
+statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#", 
"#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#", 
"#[remotePeerCount]#");
 progressbar.step(1);
 </script>
 #(/dynamic)#

Modified: trunk/htroot/yacysearchitem.java
===================================================================
--- trunk/htroot/yacysearchitem.java    2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearchitem.java    2008-01-30 21:58:30 UTC (rev 4420)
@@ -44,6 +44,7 @@
 import de.anomic.server.serverSwitch;
 import de.anomic.tools.crypt;
 import de.anomic.tools.nxTools;
+import de.anomic.tools.yFormatter;
 import de.anomic.yacy.yacyCore;
 import de.anomic.yacy.yacyNewsPool;
 import de.anomic.yacy.yacySeed;
@@ -85,9 +86,12 @@
         // dynamically update count values
         if (!rss) {
             prop.put("dynamic_offset", theQuery.neededResults() - 
theQuery.displayResults() + 1);
-            prop.put("dynamic_global", theSearch.getGlobalCount());
-            prop.put("dynamic_total", theSearch.getGlobalCount() + 
theSearch.getLocalCount());
-            prop.put("dynamic_items", (item < 0) ? theQuery.neededResults() : 
item + 1);
+            prop.put("dynamic_itemscount", (item < 0) ? 
theQuery.neededResults() : item + 1);
+            prop.put("dynamic_totalcount", 
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() + 
theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+            prop.put("dynamic_localResourceSize", 
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss));
+            prop.put("dynamic_remoteResourceSize", 
yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+            prop.put("dynamic_remoteIndexCount", 
yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss));
+            prop.put("dynamic_remotePeerCount", 
yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss));
             prop.put("dynamic", "1");
         }
         

Modified: trunk/source/de/anomic/index/indexURLEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexURLEntry.java     2008-01-30 00:15:43 UTC 
(rev 4419)
+++ trunk/source/de/anomic/index/indexURLEntry.java     2008-01-30 21:58:30 UTC 
(rev 4420)
@@ -120,9 +120,9 @@
     
     public indexURLEntry(
             yacyURL url,
-            String descr,
-            String author,
-            String tags,
+            String dc_title,
+            String dc_creator,
+            String dc_subject,
             String ETag,
             Date mod,
             Date load,
@@ -143,7 +143,7 @@
         // create new entry and store it into database
         this.entry = rowdef.newEntry();
         this.entry.setCol(col_hash, url.hash(), null);
-        this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, 
ETag));
+        this.entry.setCol(col_comp, encodeComp(url, dc_title, dc_creator, 
dc_subject, ETag));
         encodeDate(col_mod, mod);
         encodeDate(col_load, load);
         encodeDate(col_fresh, fresh);
@@ -175,12 +175,12 @@
         return new Date(86400000 * this.entry.getColLong(col));
     }
     
-    public static byte[] encodeComp(yacyURL url, String descr, String author, 
String tags, String ETag) {
+    public static byte[] encodeComp(yacyURL url, String dc_title, String 
dc_creator, String dc_subject, String ETag) {
         serverCharBuffer s = new serverCharBuffer(200);
         s.append(url.toNormalform(false, true)).append(10);
-        s.append(descr).append(10);
-        s.append(author).append(10);
-        s.append(tags).append(10);
+        s.append(dc_title).append(10);
+        s.append(dc_creator).append(10);
+        s.append(dc_subject).append(10);
         s.append(ETag).append(10);
         return s.toString().getBytes();
     }
@@ -203,13 +203,13 @@
             url = null;
         }
         String descr = crypt.simpleDecode(prop.getProperty("descr", ""), 
null); if (descr == null) descr = "";
-        String author = crypt.simpleDecode(prop.getProperty("author", ""), 
null); if (author == null) author = "";
+        String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), 
null); if (dc_creator == null) dc_creator = "";
         String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); 
if (tags == null) tags = "";
         String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null); 
if (ETag == null) ETag = "";
         
         this.entry = rowdef.newEntry();
         this.entry.setCol(col_hash, url.hash(), null);
-        this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, 
ETag));
+        this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, 
ETag));
         try {
             encodeDate(col_mod, 
serverDate.parseShortDay(prop.getProperty("mod", "20000101")));
         } catch (ParseException e) {
@@ -256,9 +256,9 @@
         try {
             s.append("hash=").append(hash());
             
s.append(",url=").append(crypt.simpleEncode(comp.url().toNormalform(false, 
true)));
-            s.append(",descr=").append(crypt.simpleEncode(comp.title()));
-            s.append(",author=").append(crypt.simpleEncode(comp.author()));
-            s.append(",tags=").append(crypt.simpleEncode(comp.tags()));
+            s.append(",descr=").append(crypt.simpleEncode(comp.dc_title()));
+            s.append(",author=").append(crypt.simpleEncode(comp.dc_creator()));
+            s.append(",tags=").append(crypt.simpleEncode(comp.dc_subject()));
             s.append(",ETag=").append(crypt.simpleEncode(comp.ETag()));
             s.append(",mod=").append(serverDate.formatShortDay(moddate()));
             s.append(",load=").append(serverDate.formatShortDay(loaddate()));
@@ -429,7 +429,7 @@
                 null, 
                 comp().url(), 
                 referrerHash(), 
-                comp().title(),
+                comp().dc_title(),
                 loaddate(), 
                 null,
                 0, 
@@ -455,7 +455,7 @@
 
     public class Components {
         private yacyURL url;
-        private String title, author, tags, ETag;
+        private String dc_title, dc_creator, dc_subject, ETag;
         
         public Components(String url, String urlhash, String title, String 
author, String tags, String ETag) {
             try {
@@ -463,22 +463,22 @@
             } catch (MalformedURLException e) {
                 this.url = null;
             }
-            this.title = title;
-            this.author = author;
-            this.tags = tags;
+            this.dc_title = title;
+            this.dc_creator = author;
+            this.dc_subject = tags;
             this.ETag = ETag;
         }
         public Components(yacyURL url, String descr, String author, String 
tags, String ETag) {
             this.url = url;
-            this.title = descr;
-            this.author = author;
-            this.tags = tags;
+            this.dc_title = descr;
+            this.dc_creator = author;
+            this.dc_subject = tags;
             this.ETag = ETag;
         }
         public yacyURL url()    { return this.url; }
-        public String  title()  { return this.title; }
-        public String  author() { return this.author; }
-        public String  tags()   { return this.tags; }
+        public String  dc_title()  { return this.dc_title; }
+        public String  dc_creator() { return this.dc_creator; }
+        public String  dc_subject()   { return this.dc_subject; }
         public String  ETag()   { return this.ETag; }
     }
     

Modified: trunk/source/de/anomic/kelondro/kelondroEcoTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroEcoTable.java       2008-01-30 
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/kelondro/kelondroEcoTable.java       2008-01-30 
21:58:30 UTC (rev 4420)
@@ -343,6 +343,7 @@
         } else {
             // read old value
             kelondroRow.Entry v = table.get(i);
+            assert v != null;
             System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, 
rowdef.primaryKeyLength);
             System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, 
rowdef.objectsize - rowdef.primaryKeyLength);
             // write new value

Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java  2008-01-30 00:15:43 UTC 
(rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java  2008-01-30 21:58:30 UTC 
(rev 4420)
@@ -72,10 +72,10 @@
 import de.anomic.kelondro.kelondroCache;
 import de.anomic.kelondro.kelondroCloneableIterator;
 import de.anomic.kelondro.kelondroException;
-import de.anomic.kelondro.kelondroSplitTable;
 import de.anomic.kelondro.kelondroIndex;
 import de.anomic.kelondro.kelondroRow;
 import de.anomic.kelondro.kelondroRowSet;
+import de.anomic.kelondro.kelondroSplitTable;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverCodings;
 import de.anomic.server.logging.serverLog;
@@ -623,14 +623,14 @@
                                        pw.println(url);
                                    }
                                    if (format == 1) {
-                                       pw.println("<a href=\"" + url + "\">" + 
htmlTools.encodeUnicode2html(comp.title(), true, true) + "</a><br>");
+                                       pw.println("<a href=\"" + url + "\">" + 
htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</a><br>");
                                    }
                                    if (format == 2) {
                                        pw.println("<item>");
-                                       pw.println("<title>" + 
htmlTools.encodeUnicode2html(comp.title(), true, true) + "</title>");
+                                       pw.println("<title>" + 
htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</title>");
                                        pw.println("<link>" + 
yacyURL.escape(url) + "</link>");
-                                       if (comp.author().length() > 0) 
pw.println("<author>" + htmlTools.encodeUnicode2html(comp.author(), true, true) 
+ "</author>");
-                                       if (comp.tags().length() > 0) 
pw.println("<description>" + htmlTools.encodeUnicode2html(comp.tags(), true, 
true) + "</description>");
+                                       if (comp.dc_creator().length() > 0) 
pw.println("<author>" + htmlTools.encodeUnicode2html(comp.dc_creator(), true, 
true) + "</author>");
+                                       if (comp.dc_subject().length() > 0) 
pw.println("<description>" + htmlTools.encodeUnicode2html(comp.dc_subject(), 
true, true) + "</description>");
                                        pw.println("<pubDate>" + 
entry.moddate().toString() + "</pubDate>");
                                        pw.println("<guid 
isPermaLink=\"false\">" + entry.hash() + "</guid>");
                                        pw.println("</item>");

Modified: trunk/source/de/anomic/plasma/plasmaSearchAPI.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchAPI.java  2008-01-30 00:15:43 UTC 
(rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSearchAPI.java  2008-01-30 21:58:30 UTC 
(rev 4420)
@@ -88,10 +88,10 @@
         }
     }
 
-    public static plasmaSearchRankingProcess genSearchresult(serverObjects 
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int 
sortorder, boolean fetchURLs) {
+    public static plasmaSearchRankingProcess genSearchresult(serverObjects 
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int 
sortorder) {
         plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, 
sb.getRanking(), filter);
         plasmaSearchRankingProcess ranked = new 
plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE);
-        ranked.execQuery(fetchURLs);
+        ranked.execQuery();
         
         if (ranked.filteredCount() == 0) {
             prop.put("searchresult", 2);

Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchEvent.java        2008-01-30 
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java        2008-01-30 
21:58:30 UTC (rev 4420)
@@ -76,7 +76,6 @@
     public  TreeMap<String, String> IAResults;
     public  TreeMap<String, Integer> IACount;
     public  String IAmaxcounthash, IAneardhthash;
-    private int localcount;
     private resultWorker[] workerThreads;
     private ArrayList<ResultEntry> resultList;
     //private int resultListLock; // a pointer that shows that all elements 
below this pointer are fixed and may not be changed again
@@ -101,7 +100,6 @@
         this.IACount = new TreeMap<String, Integer>();
         this.IAmaxcounthash = null;
         this.IAneardhthash = null;
-        this.localcount = 0;
         this.urlRetrievalAllTime = 0;
         this.snippetComputationAllTime = 0;
         this.workerThreads = null;
@@ -157,8 +155,7 @@
         } else {
             // do a local search
             this.rankedCache = new plasmaSearchRankingProcess(wordIndex, 
query, 2, max_results_preparation);
-            this.rankedCache.execQuery(true);
-            this.localcount = this.rankedCache.filteredCount();
+            this.rankedCache.execQuery();
             //plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, 
false, 2, ranking, process);
             
             if (generateAbstracts) {
@@ -249,8 +246,7 @@
             // sort the local containers and truncate it to a limited count,
             // so following sortings together with the global results will be 
fast
             synchronized (rankedCache) {
-                rankedCache.execQuery(true);
-                localcount = rankedCache.filteredCount();
+                rankedCache.execQuery();
             }
         }
     }
@@ -291,13 +287,13 @@
         
         long startTime = System.currentTimeMillis();
         indexURLEntry.Components comp = page.comp();
-        String pagetitle = comp.title().toLowerCase();
+        String pagetitle = comp.dc_title().toLowerCase();
         if (comp.url() == null) {
             registerFailure(page.hash(), "url corrupted (null)");
             return null; // rare case where the url is corrupted
         }
         String pageurl = comp.url().toString().toLowerCase();
-        String pageauthor = comp.author().toLowerCase();
+        String pageauthor = comp.dc_creator().toLowerCase();
         long dbRetrievalTime = System.currentTimeMillis() - startTime;
         
         // check exclusion
@@ -315,7 +311,7 @@
         // check constraints
         if ((query.constraint != null) &&
             (query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
-            (!(comp.title().startsWith("Index of")))) {
+            (!(comp.dc_title().startsWith("Index of")))) {
             final Iterator<String> wi = query.queryHashes.iterator();
             while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), 
page.hash());
             registerFailure(page.hash(), "index-of constraint not fullfilled");
@@ -423,14 +419,10 @@
         return secondarySearchThreads;
     }
     
-    public int getLocalCount() {
-        return this.localcount;
+    public plasmaSearchRankingProcess getRankingResult() {
+        return this.rankedCache;
     }
     
-    public int getGlobalCount() {
-        return this.rankedCache.getGlobalCount();
-    }
-    
     public long getURLRetrievalTime() {
         return this.urlRetrievalAllTime;
     }
@@ -465,7 +457,7 @@
             if ((query.onlineSnippetFetch) &&
                 (!event.anyWorkerAlive()) &&
                 (event.resultList.size() < query.neededResults() + 10) &&
-                ((event.getLocalCount() + event.getGlobalCount()) > 
event.resultList.size())) {
+                ((event.getRankingResult().getLocalResourceSize() + 
event.getRankingResult().getRemoteResourceSize()) > event.resultList.size())) {
                 // set new timeout
                 event.eventTime = System.currentTimeMillis();
                 // start worker threads to fetch urls and snippets
@@ -764,7 +756,7 @@
                                 ("yacyshare " +
                                  filename.replace('?', ' ') +
                                  " " +
-                                 urlcomps.title()).getBytes(), 
"UTF-8").keySet(),
+                                 urlcomps.dc_title()).getBytes(), 
"UTF-8").keySet(),
                                  urlentry.hash());
                         wordIndex.loadedURL.remove(urlentry.hash()); // clean 
up
                         throw new RuntimeException("index void");
@@ -794,7 +786,7 @@
             return (alternative_urlname == null) ? 
urlcomps.url().toNormalform(false, true) : alternative_urlname;
         }
         public String title() {
-            return urlcomps.title();
+            return urlcomps.dc_title();
         }
         public plasmaSnippetCache.TextSnippet textSnippet() {
             return this.textSnippet;

Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java       
2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java       
2008-01-30 21:58:30 UTC (rev 4420)
@@ -57,9 +57,8 @@
     private HashMap<String, String> handover; // key = urlhash, value = 
urlstring; used for double-check of urls that had been handed over to search 
process
     private plasmaSearchQuery query;
     private int sortorder;
-    private int filteredCount;
     private int maxentries;
-    private int globalcount;
+    private int remote_peerCount, remote_indexCount, remote_resourceSize, 
local_resourceSize;
     private indexRWIEntryOrder order;
     private HashMap<String, Object> urlhashes; // map for double-check; 
String/Long relation, addresses ranking number (backreference for deletion)
     private kelondroMScoreCluster<String> ref;  // reference score computation 
for the commonSense heuristic
@@ -76,11 +75,13 @@
         this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
         this.doubleDomCache = new HashMap<String, TreeMap<Object, 
indexRWIRowEntry>>();
         this.handover = new HashMap<String, String>();
-        this.filteredCount = 0;
         this.order = null;
         this.query = query;
         this.maxentries = maxentries;
-        this.globalcount = 0;
+        this.remote_peerCount = 0;
+        this.remote_indexCount = 0;
+        this.remote_resourceSize = 0;
+        this.local_resourceSize = 0;
         this.urlhashes = new HashMap<String, Object>();
         this.ref = new kelondroMScoreCluster<String>();
         this.misses = new TreeSet<String>();
@@ -90,7 +91,7 @@
         for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
     }
     
-    public void execQuery(boolean fetchURLs) {
+    public void execQuery() {
         
         long timer = System.currentTimeMillis();
         this.localSearchContainerMaps = wordIndex.localSearchContainers(query, 
null);
@@ -113,16 +114,24 @@
         }
         
         if (sortorder == 2) {
-            insertRanked(index, true);
+            insertRanked(index, true, index.size());
         } else {            
-            insertNoOrder(index, fetchURLs);
+            insertNoOrder(index, true, index.size());
         }
     }
     
-    private void insertNoOrder(indexContainer index, boolean local) {
+    private void insertNoOrder(indexContainer index, boolean local, int 
fullResource) {
         final Iterator<indexRWIRowEntry> en = index.entries();
         // generate a new map where the urls are sorted (not by hash but by 
the url text)
         
+        if (local) {
+            this.local_resourceSize += fullResource;
+        } else {
+            this.remote_resourceSize += fullResource;
+            this.remote_peerCount++;
+            this.remote_indexCount += index.size();
+        }
+        
         indexRWIRowEntry ientry;
         indexURLEntry uentry;
         String u;
@@ -141,20 +150,14 @@
             if (sortorder == 0) {
                 this.sortedRWIEntries.put(ientry.urlHash(), ientry);
                 this.urlhashes.put(ientry.urlHash(), ientry.urlHash());
-                filteredCount++;
             } else {
-                if (local) {
-                    uentry = wordIndex.loadedURL.load(ientry.urlHash(), 
ientry, 0);
-                    if (uentry == null) {
-                        this.misses.add(ientry.urlHash());
-                    } else {
-                        u = uentry.comp().url().toNormalform(false, true);
-                        this.sortedRWIEntries.put(u, ientry);
-                        this.urlhashes.put(ientry.urlHash(), u);
-                        filteredCount++;
-                    }
+                uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
+                if (uentry == null) {
+                    this.misses.add(ientry.urlHash());
                 } else {
-                    filteredCount++;
+                    u = uentry.comp().url().toNormalform(false, true);
+                    this.sortedRWIEntries.put(u, ientry);
+                    this.urlhashes.put(ientry.urlHash(), u);
                 }
             }
             
@@ -163,12 +166,18 @@
         } // end loop
     }
     
-    public void insertRanked(indexContainer index, boolean local) {
+    public void insertRanked(indexContainer index, boolean local, int 
fullResource) {
         // we collect the urlhashes and construct a list with urlEntry objects
         // attention: if minEntries is too high, this method will not 
terminate within the maxTime
 
         assert (index != null);
         if (index.size() == 0) return;
+        if (local) {
+            this.local_resourceSize += fullResource;
+        } else {
+            this.remote_resourceSize += fullResource;
+            this.remote_peerCount++;
+        }
         
         long timer = System.currentTimeMillis();
         if (this.order == null) {
@@ -224,11 +233,8 @@
             }
             
             // increase counter for statistics
-            if (!local) this.globalcount++;
+            if (!local) this.remote_indexCount++;
         }
-        this.filteredCount = sortedRWIEntries.size();
-        //long sc = Math.max(1, System.currentTimeMillis() - s0);
-        //System.out.println("###DEBUG### time to sort " + container.size() + 
" entries to " + this.filteredCount + ": " + sc + " milliseconds, " + 
(container.size() / sc) + " entries/millisecond, ranking = " + tc);
         
         //if ((query.neededResults() > 0) && (container.size() > 
query.neededResults())) remove(true, true);
         serverProfiling.update("SEARCH", new 
plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.PRESORT, 
index.size(), System.currentTimeMillis() - timer));
@@ -350,14 +356,34 @@
        return flagcount;
     }
     
+    // "results from a total number of <remote_resourceSize + 
local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> 
remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
+    
     public int filteredCount() {
-        return this.filteredCount;
+        // the number of index entries that are considered as result set
+        return this.sortedRWIEntries.size();
     }
 
-    public int getGlobalCount() {
-        return this.globalcount;
+    public int getRemoteIndexCount() {
+        // the number of result contributions from all the remote peers
+        return this.remote_indexCount;
     }
     
+    public int getRemotePeerCount() {
+        // the number of remote peers that have contributed
+        return this.remote_peerCount;
+    }
+    
+    public int getRemoteResourceSize() {
+        // the number of all hits in all the remote peers
+        return this.remote_resourceSize;
+    }
+    
+    public int getLocalResourceSize() {
+        // the number of hits in the local peer (index size, size of the 
collection in the own index)
+        return this.local_resourceSize;
+    }
+    
+    
     public indexRWIEntry remove(String urlHash) {
         Object r = (Long) urlhashes.get(urlHash);
         if (r == null) return null;

Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSnippetCache.java       2008-01-30 
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java       2008-01-30 
21:58:30 UTC (rev 4420)
@@ -874,7 +874,7 @@
             plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID);
             assert plasmaSwitchboard.getSwitchboard() != null;
             assert plasmaSwitchboard.getSwitchboard().wordIndex != null;
-            assert event != null;
+            assert event != null : "eventID = " + eventID;
             assert event.getQuery() != null;
             
plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes,
 urlHash);
             event.remove(urlHash);

Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2008-01-30 
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2008-01-30 
21:58:30 UTC (rev 4420)
@@ -2205,7 +2205,7 @@
             /* 
=========================================================================
              * CREATE INDEX
              * 
========================================================================= */  
-            String docDescription = document.dc_title();
+            String dc_title = document.dc_title();
             yacyURL referrerURL = entry.referrerURL();
 
             String noIndexReason = 
plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR;
@@ -2236,9 +2236,9 @@
                     long ldate = System.currentTimeMillis();
                     indexURLEntry newEntry = new indexURLEntry(
                             entry.url(),                               // URL
-                            docDescription,                            // 
document description
-                            document.dc_creator(),                      // 
author
-                            document.dc_subject(' '),                 // tags
+                            dc_title,                            // document 
description
+                            document.dc_creator(),                     // 
author
+                            document.dc_subject(' '),                  // tags
                             "",                                        // ETag
                             docDate,                                   // 
modification date
                             new Date(),                                // 
loaded date
@@ -2406,7 +2406,7 @@
                             // of string concatenation
                             log.logInfo("*Indexed " + words + " words in URL " 
+ entry.url() +
                                     " [" + entry.urlHash() + "]" +
-                                    "\n\tDescription:  " + docDescription +
+                                    "\n\tDescription:  " + dc_title +
                                     "\n\tMimeType: "  + document.dc_format() + 
" | Charset: " + document.getCharset() + " | " +
                                     "Size: " + document.getTextLength() + " 
bytes | " +
                                     "Anchors: " + ((document.getAnchors() == 
null) ? 0 : document.getAnchors().size()) +
@@ -2430,7 +2430,7 @@
                         }
                     } else {
                         log.logFine("Not Indexed Resource '" + 
entry.url().toNormalform(false, true) + "': process case=" + processCase);
-                        addURLtoErrorDB(entry.url(), referrerURL.hash(), 
initiatorPeerHash, docDescription, 
plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield());
+                        addURLtoErrorDB(entry.url(), referrerURL.hash(), 
initiatorPeerHash, dc_title, 
plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield());
                     }
                 } catch (Exception ee) {
                     if (ee instanceof InterruptedException) throw 
(InterruptedException)ee;
@@ -2443,7 +2443,7 @@
                         if (clusterhashes != null) 
initiatorPeer.setAlternativeAddress((String) 
clusterhashes.get(initiatorPeer.hash));
                         yacyClient.crawlReceipt(initiatorPeer, "crawl", 
"exception", ee.getMessage(), null, "");
                     }
-                    addURLtoErrorDB(entry.url(), (referrerURL == null) ? null 
: referrerURL.hash(), initiatorPeerHash, docDescription, 
plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield());
+                    addURLtoErrorDB(entry.url(), (referrerURL == null) ? null 
: referrerURL.hash(), initiatorPeerHash, dc_title, 
plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield());
                 }
                 
             } else {
@@ -2451,7 +2451,7 @@
                 checkInterruption();
                 
                 log.logInfo("Not indexed any word in URL " + entry.url() + "; 
cause: " + noIndexReason);
-                addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : 
referrerURL.hash(), initiatorPeerHash, docDescription, noIndexReason, new 
kelondroBitfield());
+                addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : 
referrerURL.hash(), initiatorPeerHash, dc_title, noIndexReason, new 
kelondroBitfield());
                 if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && 
(initiatorPeer != null)) {
                     if (clusterhashes != null) 
initiatorPeer.setAlternativeAddress((String) 
clusterhashes.get(initiatorPeer.hash));
                     yacyClient.crawlReceipt(initiatorPeer, "crawl", 
"rejected", noIndexReason, null, "");

Modified: trunk/source/de/anomic/yacy/yacyClient.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyClient.java 2008-01-30 00:15:43 UTC (rev 
4419)
+++ trunk/source/de/anomic/yacy/yacyClient.java 2008-01-30 21:58:30 UTC (rev 
4420)
@@ -477,9 +477,10 @@
                // now create a plasmaIndex out of this result
                // System.out.println("yacyClient: " + ((urlhashes.length() == 
0) ? "primary" : "secondary")+ " search result = " + result.toString()); // 
debug
                
-               int results = 0;
+               int results = 0, joincount = 0;
         try {
-            results = Integer.parseInt((String) result.get("count"));
+            results = Integer.parseInt(result.get("count"));
+            joincount = Integer.parseInt(result.get("joincount"));
         } catch (NumberFormatException e) {
             yacyCore.log.logFine("SEARCH failed FROM " + target.hash + ":" + 
target.getName() + ", wrong output format");
             yacyCore.peerActions.peerDeparture(target, "search request to peer 
created number format exception");
@@ -557,7 +558,7 @@
         // store remote result to local result container
         synchronized (containerCache) {
             // insert one container into the search result buffer
-            containerCache.insertRanked(container[0], false); // one is enough
+            containerCache.insertRanked(container[0], false, joincount); // 
one is enough
             
             // integrate remote topwords
             String references = (String) result.get("references");

_______________________________________________
YaCy-svn mailing list
YaCy-svn@lists.berlios.de
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an