Author: orbiter Date: 2008-01-30 22:58:30 +0100 (Wed, 30 Jan 2008) New Revision: 4420
Modified: trunk/htroot/Bookmarks.java trunk/htroot/CrawlResults.java trunk/htroot/IndexControlRWIs_p.java trunk/htroot/IndexControlURLs_p.java trunk/htroot/ViewFile.java trunk/htroot/js/yacysearch.js trunk/htroot/yacy/search.java trunk/htroot/yacy/urls.java trunk/htroot/yacysearch.html trunk/htroot/yacysearch.java trunk/htroot/yacysearchitem.html trunk/htroot/yacysearchitem.java trunk/source/de/anomic/index/indexURLEntry.java trunk/source/de/anomic/kelondro/kelondroEcoTable.java trunk/source/de/anomic/plasma/plasmaCrawlLURL.java trunk/source/de/anomic/plasma/plasmaSearchAPI.java trunk/source/de/anomic/plasma/plasmaSearchEvent.java trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java trunk/source/de/anomic/plasma/plasmaSnippetCache.java trunk/source/de/anomic/plasma/plasmaSwitchboard.java trunk/source/de/anomic/yacy/yacyClient.java Log: - more dublin core naming of page metadata - better presentation of result counters in search results Modified: trunk/htroot/Bookmarks.java =================================================================== --- trunk/htroot/Bookmarks.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/Bookmarks.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -203,10 +203,10 @@ document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true); prop.put("mode_edit", "0"); // create mode prop.put("mode_url", comp.url().toNormalform(false, true)); - prop.putHTML("mode_title", comp.title()); - prop.putHTML("mode_description", (document == null) ? comp.title(): document.dc_title()); - prop.putHTML("mode_author", comp.author()); - prop.putHTML("mode_tags", (document == null) ? comp.tags() : document.dc_subject(',')); + prop.putHTML("mode_title", comp.dc_title()); + prop.putHTML("mode_description", (document == null) ? comp.dc_title(): document.dc_title()); + prop.putHTML("mode_author", comp.dc_creator()); + prop.putHTML("mode_tags", (document == null) ? comp.dc_subject() : document.dc_subject(',')); prop.putHTML("mode_path",""); prop.put("mode_public", "0"); prop.put("mode_feed", "0"); //TODO: check if it IS a feed Modified: trunk/htroot/CrawlResults.java =================================================================== --- trunk/htroot/CrawlResults.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/CrawlResults.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -217,11 +217,11 @@ } else { prop.put("table_indexed_" + cnt + "_showTitle_available", "1"); - if (comp.title() == null || comp.title().trim().length() == 0) + if (comp.dc_title() == null || comp.dc_title().trim().length() == 0) prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "0"); else prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "1"); - prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", comp.title()); + prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", comp.dc_title()); prop.put("table_indexed_" + cnt + "_showTitle_available_cachepath", cachepath); prop.putHTML("table_indexed_" + cnt + "_showTitle_available_urltitle", urlstr); Modified: trunk/htroot/IndexControlRWIs_p.java =================================================================== --- trunk/htroot/IndexControlRWIs_p.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/IndexControlRWIs_p.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -89,7 +89,7 @@ if (post.containsKey("keystringsearch")) { keyhash = plasmaCondenser.word2hash(keystring); prop.put("keyhash", keyhash); - final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false); + final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder); if (ranking.filteredCount() == 0) { prop.put("searchresult", 1); prop.put("searchresult_word", keystring); @@ -100,7 +100,7 @@ if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { prop.put("keystring", "<not possible to compute word from hash>"); } - final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false); + final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder); if (ranking.filteredCount() == 0) { prop.put("searchresult", 2); prop.put("searchresult_wordhash", keyhash); @@ -159,7 +159,7 @@ } kelondroBitfield flags = plasmaSearchAPI.compileFlags(post); int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1); - final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder, true); + final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder); plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, flags, count, sortorder); } Modified: trunk/htroot/IndexControlURLs_p.java =================================================================== --- trunk/htroot/IndexControlURLs_p.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/IndexControlURLs_p.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -241,7 +241,7 @@ prop.put("genUrlProfile", "2"); prop.putHTML("genUrlProfile_urlNormalform", comp.url().toNormalform(false, true)); prop.put("genUrlProfile_urlhash", urlhash); - prop.put("genUrlProfile_urlDescr", comp.title()); + prop.put("genUrlProfile_urlDescr", comp.dc_title()); prop.put("genUrlProfile_moddate", entry.moddate().toString()); prop.put("genUrlProfile_loaddate", entry.loaddate().toString()); prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1); Modified: trunk/htroot/ViewFile.java =================================================================== --- trunk/htroot/ViewFile.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/ViewFile.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -124,7 +124,7 @@ return prop; } url = comp.url(); - descr = comp.title(); + descr = comp.dc_title(); urlEntry.wordCount(); size = urlEntry.size(); pre = urlEntry.flags().get(plasmaCondenser.flag_cat_indexof); Modified: trunk/htroot/js/yacysearch.js =================================================================== --- trunk/htroot/js/yacysearch.js 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/js/yacysearch.js 2008-01-30 21:58:30 UTC (rev 4420) @@ -70,9 +70,12 @@ } } -function statistics(offset, items, global, total) { +function statistics(offset, itemscount, totalcount, localResourceSize, remoteResourceSize, remoteIndexCount, remotePeerCount) { document.getElementById("resultsOffset").firstChild.nodeValue = offset; - document.getElementById("itemscount").firstChild.nodeValue = items; - document.getElementById("globalcount").firstChild.nodeValue = global; - document.getElementById("totalcount").firstChild.nodeValue = total; + document.getElementById("itemscount").firstChild.nodeValue = itemscount; + document.getElementById("totalcount").firstChild.nodeValue = totalcount; + document.getElementById("localResourceSize").firstChild.nodeValue = localResourceSize; + document.getElementById("remoteResourceSize").firstChild.nodeValue = remoteResourceSize; + document.getElementById("remoteIndexCount").firstChild.nodeValue = remoteIndexCount; + document.getElementById("remotePeerCount").firstChild.nodeValue = remotePeerCount; } \ No newline at end of file Modified: trunk/htroot/yacy/search.java =================================================================== --- trunk/htroot/yacy/search.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/yacy/search.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -183,7 +183,7 @@ snippetComputationAllTime = theSearch.getSnippetComputationTime(); // set statistic details of search result and find best result index set - if (theSearch.getLocalCount() == 0) { + if (theSearch.getRankingResult().getLocalResourceSize() == 0) { prop.put("indexcount", ""); prop.put("joincount", "0"); } else { @@ -207,11 +207,11 @@ } prop.put("indexcount", indexcount.toString()); - if (theSearch.getLocalCount() == 0) { + if (theSearch.getRankingResult().getLocalResourceSize() == 0) { joincount = 0; prop.put("joincount", "0"); } else { - joincount = theSearch.getLocalCount(); + joincount = theSearch.getRankingResult().getLocalResourceSize(); prop.put("joincount", Integer.toString(joincount)); accu = theSearch.completeResults(duetime); } Modified: trunk/htroot/yacy/urls.java =================================================================== --- trunk/htroot/yacy/urls.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/yacy/urls.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -104,11 +104,11 @@ referrer = sb.getURL(entry.referrerHash()); // create RSS entry comp = entry.comp(); - prop.put("item_" + c + "_title", comp.title()); + prop.put("item_" + c + "_title", comp.dc_title()); prop.putHTML("item_" + c + "_link", comp.url().toNormalform(true, false)); prop.putHTML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false)); - prop.putHTML("item_" + c + "_description", comp.title()); - prop.put("item_" + c + "_author", comp.author()); + prop.putHTML("item_" + c + "_description", comp.dc_title()); + prop.put("item_" + c + "_author", comp.dc_creator()); prop.put("item_" + c + "_pubDate", serverDate.formatShortSecond(entry.moddate())); prop.put("item_" + c + "_guid", entry.hash()); c++; Modified: trunk/htroot/yacysearch.html =================================================================== --- trunk/htroot/yacysearch.html 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/yacysearch.html 2008-01-30 21:58:30 UTC (rev 4420) @@ -99,7 +99,7 @@ <p>No Results. (length of search words must be at least 3 characters)</p> :: <div id="results"></div> - <span id="resCounter" style="display: inline;"><strong id="resultsOffset">#[offset]#</strong>-<strong id="itemscount">#[linkcount]#</strong> results from a total number of <strong id="totalcount">#[totalcount]#</strong> known#(globalresults)#.::, <strong id="globalcount">#[globalcount]#</strong> links from other YaCy peers.#(/globalresults)#</span> + <span id="resCounter" style="display: inline;"><strong id="resultsOffset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> results from a total number of <strong id="totalcount">#[totalcount]#</strong> known#(globalresults)#.:: (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> links from <strong id="remotePeerCount">#[remotePeerCount]#</strong> other YaCy peers.#(/globalresults)#</span> <span id="resNav" style="display: inline;">#[resnav]#</span> :: <p>Searching the web with this peer is disabled for unauthorized users. Please <a href="Status.html?login=">log in</a> as administrator to use the search function</p> Modified: trunk/htroot/yacysearch.java =================================================================== --- trunk/htroot/yacysearch.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/yacysearch.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -240,8 +240,8 @@ // create a news message HashMap<String, String> map = new HashMap<String, String>(); map.put("url", comp.url().toNormalform(false, true).replace(',', '|')); - map.put("title", comp.title().replace(',', ' ')); - map.put("description", ((document == null) ? comp.title() : document.dc_title()).replace(',', ' ')); + map.put("title", comp.dc_title().replace(',', ' ')); + map.put("description", ((document == null) ? comp.dc_title() : document.dc_title()).replace(',', ' ')); map.put("author", ((document == null) ? "" : document.dc_creator())); map.put("tags", ((document == null) ? "" : document.dc_subject(' '))); yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_SURFTIPP_ADD, map)); @@ -306,12 +306,12 @@ // log serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " + - (theSearch.getLocalCount() + theSearch.getGlobalCount()) + " links found, " + + (theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize()) + " links found, " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); // prepare search statistics Long trackerHandle = new Long(System.currentTimeMillis()); - HashMap<String, Object> searchProfile = theQuery.resultProfile(theSearch.getLocalCount() + theSearch.getGlobalCount(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime()); + HashMap<String, Object> searchProfile = theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime()); searchProfile.put("querystring", theQuery.queryString); searchProfile.put("time", trackerHandle); searchProfile.put("host", client); @@ -323,13 +323,16 @@ sb.localSearchTracker.put(client, handles); prop = new serverObjects(); - prop.put("num-results_totalcount", yFormatter.number(theSearch.getLocalCount() + theSearch.getGlobalCount(), !rss)); - prop.put("num-results_globalresults", "1"); - prop.put("num-results_globalresults_globalcount", yFormatter.number(theSearch.getGlobalCount(), !rss)); prop.put("num-results_offset", offset); - prop.put("num-results_linkcount", "0"); + prop.put("num-results_itemscount", "0"); prop.put("num-results_itemsPerPage", itemsPerPage); - + prop.put("num-results_totalcount", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), !rss)); + prop.put("num-results_globalresults", (globalsearch) ? "1" : "0"); + prop.put("num-results_globalresults_localResourceSize", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss)); + prop.put("num-results_globalresults_remoteResourceSize", yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss)); + prop.put("num-results_globalresults_remoteIndexCount", yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss)); + prop.put("num-results_globalresults_remotePeerCount", yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss)); + // compose page navigation StringBuffer resnav = new StringBuffer(); int thispage = offset / theQuery.displayResults(); @@ -337,7 +340,7 @@ resnav.append(navurla(thispage - 1, display, theQuery)); resnav.append("<strong><</strong></a> "); } - int numberofpages = Math.min(10, Math.min(thispage + 2, (theSearch.getGlobalCount() + theSearch.getLocalCount()) / theQuery.displayResults())); + int numberofpages = Math.min(10, Math.min(thispage + 2, (theSearch.getRankingResult().getRemoteResourceSize() + theSearch.getRankingResult().getLocalResourceSize()) / theQuery.displayResults())); for (int i = 0; i < numberofpages; i++) { if (i == thispage) { resnav.append("<strong>"); Modified: trunk/htroot/yacysearchitem.html =================================================================== --- trunk/htroot/yacysearchitem.html 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/yacysearchitem.html 2008-01-30 21:58:30 UTC (rev 4420) @@ -62,7 +62,7 @@ #(/rssreferences)# #(dynamic)#:: <script type="text/javascript"> -statistics("#[offset]#", "#[items]#", "#[global]#", "#[total]#"); +statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#", "#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#", "#[remotePeerCount]#"); progressbar.step(1); </script> #(/dynamic)# Modified: trunk/htroot/yacysearchitem.java =================================================================== --- trunk/htroot/yacysearchitem.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/htroot/yacysearchitem.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -44,6 +44,7 @@ import de.anomic.server.serverSwitch; import de.anomic.tools.crypt; import de.anomic.tools.nxTools; +import de.anomic.tools.yFormatter; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacySeed; @@ -85,9 +86,12 @@ // dynamically update count values if (!rss) { prop.put("dynamic_offset", theQuery.neededResults() - theQuery.displayResults() + 1); - prop.put("dynamic_global", theSearch.getGlobalCount()); - prop.put("dynamic_total", theSearch.getGlobalCount() + theSearch.getLocalCount()); - prop.put("dynamic_items", (item < 0) ? theQuery.neededResults() : item + 1); + prop.put("dynamic_itemscount", (item < 0) ? theQuery.neededResults() : item + 1); + prop.put("dynamic_totalcount", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), !rss)); + prop.put("dynamic_localResourceSize", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss)); + prop.put("dynamic_remoteResourceSize", yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss)); + prop.put("dynamic_remoteIndexCount", yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss)); + prop.put("dynamic_remotePeerCount", yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss)); prop.put("dynamic", "1"); } Modified: trunk/source/de/anomic/index/indexURLEntry.java =================================================================== --- trunk/source/de/anomic/index/indexURLEntry.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/index/indexURLEntry.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -120,9 +120,9 @@ public indexURLEntry( yacyURL url, - String descr, - String author, - String tags, + String dc_title, + String dc_creator, + String dc_subject, String ETag, Date mod, Date load, @@ -143,7 +143,7 @@ // create new entry and store it into database this.entry = rowdef.newEntry(); this.entry.setCol(col_hash, url.hash(), null); - this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag)); + this.entry.setCol(col_comp, encodeComp(url, dc_title, dc_creator, dc_subject, ETag)); encodeDate(col_mod, mod); encodeDate(col_load, load); encodeDate(col_fresh, fresh); @@ -175,12 +175,12 @@ return new Date(86400000 * this.entry.getColLong(col)); } - public static byte[] encodeComp(yacyURL url, String descr, String author, String tags, String ETag) { + public static byte[] encodeComp(yacyURL url, String dc_title, String dc_creator, String dc_subject, String ETag) { serverCharBuffer s = new serverCharBuffer(200); s.append(url.toNormalform(false, true)).append(10); - s.append(descr).append(10); - s.append(author).append(10); - s.append(tags).append(10); + s.append(dc_title).append(10); + s.append(dc_creator).append(10); + s.append(dc_subject).append(10); s.append(ETag).append(10); return s.toString().getBytes(); } @@ -203,13 +203,13 @@ url = null; } String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = ""; - String author = crypt.simpleDecode(prop.getProperty("author", ""), null); if (author == null) author = ""; + String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = ""; String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = ""; String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null); if (ETag == null) ETag = ""; this.entry = rowdef.newEntry(); this.entry.setCol(col_hash, url.hash(), null); - this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag)); + this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, ETag)); try { encodeDate(col_mod, serverDate.parseShortDay(prop.getProperty("mod", "20000101"))); } catch (ParseException e) { @@ -256,9 +256,9 @@ try { s.append("hash=").append(hash()); s.append(",url=").append(crypt.simpleEncode(comp.url().toNormalform(false, true))); - s.append(",descr=").append(crypt.simpleEncode(comp.title())); - s.append(",author=").append(crypt.simpleEncode(comp.author())); - s.append(",tags=").append(crypt.simpleEncode(comp.tags())); + s.append(",descr=").append(crypt.simpleEncode(comp.dc_title())); + s.append(",author=").append(crypt.simpleEncode(comp.dc_creator())); + s.append(",tags=").append(crypt.simpleEncode(comp.dc_subject())); s.append(",ETag=").append(crypt.simpleEncode(comp.ETag())); s.append(",mod=").append(serverDate.formatShortDay(moddate())); s.append(",load=").append(serverDate.formatShortDay(loaddate())); @@ -429,7 +429,7 @@ null, comp().url(), referrerHash(), - comp().title(), + comp().dc_title(), loaddate(), null, 0, @@ -455,7 +455,7 @@ public class Components { private yacyURL url; - private String title, author, tags, ETag; + private String dc_title, dc_creator, dc_subject, ETag; public Components(String url, String urlhash, String title, String author, String tags, String ETag) { try { @@ -463,22 +463,22 @@ } catch (MalformedURLException e) { this.url = null; } - this.title = title; - this.author = author; - this.tags = tags; + this.dc_title = title; + this.dc_creator = author; + this.dc_subject = tags; this.ETag = ETag; } public Components(yacyURL url, String descr, String author, String tags, String ETag) { this.url = url; - this.title = descr; - this.author = author; - this.tags = tags; + this.dc_title = descr; + this.dc_creator = author; + this.dc_subject = tags; this.ETag = ETag; } public yacyURL url() { return this.url; } - public String title() { return this.title; } - public String author() { return this.author; } - public String tags() { return this.tags; } + public String dc_title() { return this.dc_title; } + public String dc_creator() { return this.dc_creator; } + public String dc_subject() { return this.dc_subject; } public String ETag() { return this.ETag; } } Modified: trunk/source/de/anomic/kelondro/kelondroEcoTable.java =================================================================== --- trunk/source/de/anomic/kelondro/kelondroEcoTable.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/kelondro/kelondroEcoTable.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -343,6 +343,7 @@ } else { // read old value kelondroRow.Entry v = table.get(i); + assert v != null; System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, rowdef.primaryKeyLength); System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength); // write new value Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -72,10 +72,10 @@ import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroException; -import de.anomic.kelondro.kelondroSplitTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; +import de.anomic.kelondro.kelondroSplitTable; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverCodings; import de.anomic.server.logging.serverLog; @@ -623,14 +623,14 @@ pw.println(url); } if (format == 1) { - pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "</a><br>"); + pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</a><br>"); } if (format == 2) { pw.println("<item>"); - pw.println("<title>" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "</title>"); + pw.println("<title>" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</title>"); pw.println("<link>" + yacyURL.escape(url) + "</link>"); - if (comp.author().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.author(), true, true) + "</author>"); - if (comp.tags().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.tags(), true, true) + "</description>"); + if (comp.dc_creator().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.dc_creator(), true, true) + "</author>"); + if (comp.dc_subject().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.dc_subject(), true, true) + "</description>"); pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>"); pw.println("<guid isPermaLink=\"false\">" + entry.hash() + "</guid>"); pw.println("</item>"); Modified: trunk/source/de/anomic/plasma/plasmaSearchAPI.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSearchAPI.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/plasma/plasmaSearchAPI.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -88,10 +88,10 @@ } } - public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder, boolean fetchURLs) { + public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder) { plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, sb.getRanking(), filter); plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE); - ranked.execQuery(fetchURLs); + ranked.execQuery(); if (ranked.filteredCount() == 0) { prop.put("searchresult", 2); Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -76,7 +76,6 @@ public TreeMap<String, String> IAResults; public TreeMap<String, Integer> IACount; public String IAmaxcounthash, IAneardhthash; - private int localcount; private resultWorker[] workerThreads; private ArrayList<ResultEntry> resultList; //private int resultListLock; // a pointer that shows that all elements below this pointer are fixed and may not be changed again @@ -101,7 +100,6 @@ this.IACount = new TreeMap<String, Integer>(); this.IAmaxcounthash = null; this.IAneardhthash = null; - this.localcount = 0; this.urlRetrievalAllTime = 0; this.snippetComputationAllTime = 0; this.workerThreads = null; @@ -157,8 +155,7 @@ } else { // do a local search this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, 2, max_results_preparation); - this.rankedCache.execQuery(true); - this.localcount = this.rankedCache.filteredCount(); + this.rankedCache.execQuery(); //plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process); if (generateAbstracts) { @@ -249,8 +246,7 @@ // sort the local containers and truncate it to a limited count, // so following sortings together with the global results will be fast synchronized (rankedCache) { - rankedCache.execQuery(true); - localcount = rankedCache.filteredCount(); + rankedCache.execQuery(); } } } @@ -291,13 +287,13 @@ long startTime = System.currentTimeMillis(); indexURLEntry.Components comp = page.comp(); - String pagetitle = comp.title().toLowerCase(); + String pagetitle = comp.dc_title().toLowerCase(); if (comp.url() == null) { registerFailure(page.hash(), "url corrupted (null)"); return null; // rare case where the url is corrupted } String pageurl = comp.url().toString().toLowerCase(); - String pageauthor = comp.author().toLowerCase(); + String pageauthor = comp.dc_creator().toLowerCase(); long dbRetrievalTime = System.currentTimeMillis() - startTime; // check exclusion @@ -315,7 +311,7 @@ // check constraints if ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof)) && - (!(comp.title().startsWith("Index of")))) { + (!(comp.dc_title().startsWith("Index of")))) { final Iterator<String> wi = query.queryHashes.iterator(); while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash()); registerFailure(page.hash(), "index-of constraint not fullfilled"); @@ -423,14 +419,10 @@ return secondarySearchThreads; } - public int getLocalCount() { - return this.localcount; + public plasmaSearchRankingProcess getRankingResult() { + return this.rankedCache; } - public int getGlobalCount() { - return this.rankedCache.getGlobalCount(); - } - public long getURLRetrievalTime() { return this.urlRetrievalAllTime; } @@ -465,7 +457,7 @@ if ((query.onlineSnippetFetch) && (!event.anyWorkerAlive()) && (event.resultList.size() < query.neededResults() + 10) && - ((event.getLocalCount() + event.getGlobalCount()) > event.resultList.size())) { + ((event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize()) > event.resultList.size())) { // set new timeout event.eventTime = System.currentTimeMillis(); // start worker threads to fetch urls and snippets @@ -764,7 +756,7 @@ ("yacyshare " + filename.replace('?', ' ') + " " + - urlcomps.title()).getBytes(), "UTF-8").keySet(), + urlcomps.dc_title()).getBytes(), "UTF-8").keySet(), urlentry.hash()); wordIndex.loadedURL.remove(urlentry.hash()); // clean up throw new RuntimeException("index void"); @@ -794,7 +786,7 @@ return (alternative_urlname == null) ? urlcomps.url().toNormalform(false, true) : alternative_urlname; } public String title() { - return urlcomps.title(); + return urlcomps.dc_title(); } public plasmaSnippetCache.TextSnippet textSnippet() { return this.textSnippet; Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -57,9 +57,8 @@ private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process private plasmaSearchQuery query; private int sortorder; - private int filteredCount; private int maxentries; - private int globalcount; + private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize; private indexRWIEntryOrder order; private HashMap<String, Object> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private kelondroMScoreCluster<String> ref; // reference score computation for the commonSense heuristic @@ -76,11 +75,13 @@ this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>(); this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIRowEntry>>(); this.handover = new HashMap<String, String>(); - this.filteredCount = 0; this.order = null; this.query = query; this.maxentries = maxentries; - this.globalcount = 0; + this.remote_peerCount = 0; + this.remote_indexCount = 0; + this.remote_resourceSize = 0; + this.local_resourceSize = 0; this.urlhashes = new HashMap<String, Object>(); this.ref = new kelondroMScoreCluster<String>(); this.misses = new TreeSet<String>(); @@ -90,7 +91,7 @@ for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} } - public void execQuery(boolean fetchURLs) { + public void execQuery() { long timer = System.currentTimeMillis(); this.localSearchContainerMaps = wordIndex.localSearchContainers(query, null); @@ -113,16 +114,24 @@ } if (sortorder == 2) { - insertRanked(index, true); + insertRanked(index, true, index.size()); } else { - insertNoOrder(index, fetchURLs); + insertNoOrder(index, true, index.size()); } } - private void insertNoOrder(indexContainer index, boolean local) { + private void insertNoOrder(indexContainer index, boolean local, int fullResource) { final Iterator<indexRWIRowEntry> en = index.entries(); // generate a new map where the urls are sorted (not by hash but by the url text) + if (local) { + this.local_resourceSize += fullResource; + } else { + this.remote_resourceSize += fullResource; + this.remote_peerCount++; + this.remote_indexCount += index.size(); + } + indexRWIRowEntry ientry; indexURLEntry uentry; String u; @@ -141,20 +150,14 @@ if (sortorder == 0) { this.sortedRWIEntries.put(ientry.urlHash(), ientry); this.urlhashes.put(ientry.urlHash(), ientry.urlHash()); - filteredCount++; } else { - if (local) { - uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0); - if (uentry == null) { - this.misses.add(ientry.urlHash()); - } else { - u = uentry.comp().url().toNormalform(false, true); - this.sortedRWIEntries.put(u, ientry); - this.urlhashes.put(ientry.urlHash(), u); - filteredCount++; - } + uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0); + if (uentry == null) { + this.misses.add(ientry.urlHash()); } else { - filteredCount++; + u = uentry.comp().url().toNormalform(false, true); + this.sortedRWIEntries.put(u, ientry); + this.urlhashes.put(ientry.urlHash(), u); } } @@ -163,12 +166,18 @@ } // end loop } - public void insertRanked(indexContainer index, boolean local) { + public void insertRanked(indexContainer index, boolean local, int fullResource) { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime assert (index != null); if (index.size() == 0) return; + if (local) { + this.local_resourceSize += fullResource; + } else { + this.remote_resourceSize += fullResource; + this.remote_peerCount++; + } long timer = System.currentTimeMillis(); if (this.order == null) { @@ -224,11 +233,8 @@ } // increase counter for statistics - if (!local) this.globalcount++; + if (!local) this.remote_indexCount++; } - this.filteredCount = sortedRWIEntries.size(); - //long sc = Math.max(1, System.currentTimeMillis() - s0); - //System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc); //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer)); @@ -350,14 +356,34 @@ return flagcount; } + // "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers." + public int filteredCount() { - return this.filteredCount; + // the number of index entries that are considered as result set + return this.sortedRWIEntries.size(); } - public int getGlobalCount() { - return this.globalcount; + public int getRemoteIndexCount() { + // the number of result contributions from all the remote peers + return this.remote_indexCount; } + public int getRemotePeerCount() { + // the number of remote peers that have contributed + return this.remote_peerCount; + } + + public int getRemoteResourceSize() { + // the number of all hits in all the remote peers + return this.remote_resourceSize; + } + + public int getLocalResourceSize() { + // the number of hits in the local peer (index size, size of the collection in the own index) + return this.local_resourceSize; + } + + public indexRWIEntry remove(String urlHash) { Object r = (Long) urlhashes.get(urlHash); if (r == null) return null; Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -874,7 +874,7 @@ plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID); assert plasmaSwitchboard.getSwitchboard() != null; assert plasmaSwitchboard.getSwitchboard().wordIndex != null; - assert event != null; + assert event != null : "eventID = " + eventID; assert event.getQuery() != null; plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes, urlHash); event.remove(urlHash); Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -2205,7 +2205,7 @@ /* ========================================================================= * CREATE INDEX * ========================================================================= */ - String docDescription = document.dc_title(); + String dc_title = document.dc_title(); yacyURL referrerURL = entry.referrerURL(); String noIndexReason = plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR; @@ -2236,9 +2236,9 @@ long ldate = System.currentTimeMillis(); indexURLEntry newEntry = new indexURLEntry( entry.url(), // URL - docDescription, // document description - document.dc_creator(), // author - document.dc_subject(' '), // tags + dc_title, // document description + document.dc_creator(), // author + document.dc_subject(' '), // tags "", // ETag docDate, // modification date new Date(), // loaded date @@ -2406,7 +2406,7 @@ // of string concatenation log.logInfo("*Indexed " + words + " words in URL " + entry.url() + " [" + entry.urlHash() + "]" + - "\n\tDescription: " + docDescription + + "\n\tDescription: " + dc_title + "\n\tMimeType: " + document.dc_format() + " | Charset: " + document.getCharset() + " | " + "Size: " + document.getTextLength() + " bytes | " + "Anchors: " + ((document.getAnchors() == null) ? 0 : document.getAnchors().size()) + @@ -2430,7 +2430,7 @@ } } else { log.logFine("Not Indexed Resource '" + entry.url().toNormalform(false, true) + "': process case=" + processCase); - addURLtoErrorDB(entry.url(), referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), referrerURL.hash(), initiatorPeerHash, dc_title, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield()); } } catch (Exception ee) { if (ee instanceof InterruptedException) throw (InterruptedException)ee; @@ -2443,7 +2443,7 @@ if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash)); yacyClient.crawlReceipt(initiatorPeer, "crawl", "exception", ee.getMessage(), null, ""); } - addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, dc_title, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield()); } } else { @@ -2451,7 +2451,7 @@ checkInterruption(); log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason); - addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, noIndexReason, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, dc_title, noIndexReason, new kelondroBitfield()); if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) { if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash)); yacyClient.crawlReceipt(initiatorPeer, "crawl", "rejected", noIndexReason, null, ""); Modified: trunk/source/de/anomic/yacy/yacyClient.java =================================================================== --- trunk/source/de/anomic/yacy/yacyClient.java 2008-01-30 00:15:43 UTC (rev 4419) +++ trunk/source/de/anomic/yacy/yacyClient.java 2008-01-30 21:58:30 UTC (rev 4420) @@ -477,9 +477,10 @@ // now create a plasmaIndex out of this result // System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug - int results = 0; + int results = 0, joincount = 0; try { - results = Integer.parseInt((String) result.get("count")); + results = Integer.parseInt(result.get("count")); + joincount = Integer.parseInt(result.get("joincount")); } catch (NumberFormatException e) { yacyCore.log.logFine("SEARCH failed FROM " + target.hash + ":" + target.getName() + ", wrong output format"); yacyCore.peerActions.peerDeparture(target, "search request to peer created number format exception"); @@ -557,7 +558,7 @@ // store remote result to local result container synchronized (containerCache) { // insert one container into the search result buffer - containerCache.insertRanked(container[0], false); // one is enough + containerCache.insertRanked(container[0], false, joincount); // one is enough // integrate remote topwords String references = (String) result.get("references"); _______________________________________________ YaCy-svn mailing list YaCy-svn@lists.berlios.de https://lists.berlios.de/mailman/listinfo/yacy-svn