Author: orbiter
Date: 2008-01-30 22:58:30 +0100 (Wed, 30 Jan 2008)
New Revision: 4420
Modified:
trunk/htroot/Bookmarks.java
trunk/htroot/CrawlResults.java
trunk/htroot/IndexControlRWIs_p.java
trunk/htroot/IndexControlURLs_p.java
trunk/htroot/ViewFile.java
trunk/htroot/js/yacysearch.js
trunk/htroot/yacy/search.java
trunk/htroot/yacy/urls.java
trunk/htroot/yacysearch.html
trunk/htroot/yacysearch.java
trunk/htroot/yacysearchitem.html
trunk/htroot/yacysearchitem.java
trunk/source/de/anomic/index/indexURLEntry.java
trunk/source/de/anomic/kelondro/kelondroEcoTable.java
trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
trunk/source/de/anomic/plasma/plasmaSearchAPI.java
trunk/source/de/anomic/plasma/plasmaSearchEvent.java
trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
trunk/source/de/anomic/plasma/plasmaSnippetCache.java
trunk/source/de/anomic/plasma/plasmaSwitchboard.java
trunk/source/de/anomic/yacy/yacyClient.java
Log:
- more dublin core naming of page metadata
- better presentation of result counters in search results
Modified: trunk/htroot/Bookmarks.java
===================================================================
--- trunk/htroot/Bookmarks.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/Bookmarks.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -203,10 +203,10 @@
document =
plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true);
prop.put("mode_edit", "0"); // create mode
prop.put("mode_url",
comp.url().toNormalform(false, true));
- prop.putHTML("mode_title", comp.title());
- prop.putHTML("mode_description", (document ==
null) ? comp.title(): document.dc_title());
- prop.putHTML("mode_author", comp.author());
- prop.putHTML("mode_tags", (document == null) ?
comp.tags() : document.dc_subject(','));
+ prop.putHTML("mode_title", comp.dc_title());
+ prop.putHTML("mode_description", (document ==
null) ? comp.dc_title(): document.dc_title());
+ prop.putHTML("mode_author", comp.dc_creator());
+ prop.putHTML("mode_tags", (document == null) ?
comp.dc_subject() : document.dc_subject(','));
prop.putHTML("mode_path","");
prop.put("mode_public", "0");
prop.put("mode_feed", "0"); //TODO: check if it IS
a feed
Modified: trunk/htroot/CrawlResults.java
===================================================================
--- trunk/htroot/CrawlResults.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/CrawlResults.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -217,11 +217,11 @@
} else {
prop.put("table_indexed_" + cnt +
"_showTitle_available", "1");
- if (comp.title() == null ||
comp.title().trim().length() == 0)
+ if (comp.dc_title() == null ||
comp.dc_title().trim().length() == 0)
prop.put("table_indexed_" + cnt +
"_showTitle_available_nodescr", "0");
else
prop.put("table_indexed_" + cnt +
"_showTitle_available_nodescr", "1");
- prop.putHTML("table_indexed_" + cnt +
"_showTitle_available_nodescr_urldescr", comp.title());
+ prop.putHTML("table_indexed_" + cnt +
"_showTitle_available_nodescr_urldescr", comp.dc_title());
prop.put("table_indexed_" + cnt +
"_showTitle_available_cachepath", cachepath);
prop.putHTML("table_indexed_" + cnt +
"_showTitle_available_urltitle", urlstr);
Modified: trunk/htroot/IndexControlRWIs_p.java
===================================================================
--- trunk/htroot/IndexControlRWIs_p.java 2008-01-30 00:15:43 UTC (rev
4419)
+++ trunk/htroot/IndexControlRWIs_p.java 2008-01-30 21:58:30 UTC (rev
4420)
@@ -89,7 +89,7 @@
if (post.containsKey("keystringsearch")) {
keyhash = plasmaCondenser.word2hash(keystring);
prop.put("keyhash", keyhash);
- final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
+ final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 1);
prop.put("searchresult_word", keystring);
@@ -100,7 +100,7 @@
if (keystring.length() == 0 ||
!plasmaCondenser.word2hash(keystring).equals(keyhash)) {
prop.put("keystring", "<not possible to compute word
from hash>");
}
- final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
+ final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 2);
prop.put("searchresult_wordhash", keyhash);
@@ -159,7 +159,7 @@
}
kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
int count = (post.get("lines", "all").equals("all")) ? -1 :
post.getInt("lines", -1);
- final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder, true);
+ final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder);
plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking,
flags, count, sortorder);
}
Modified: trunk/htroot/IndexControlURLs_p.java
===================================================================
--- trunk/htroot/IndexControlURLs_p.java 2008-01-30 00:15:43 UTC (rev
4419)
+++ trunk/htroot/IndexControlURLs_p.java 2008-01-30 21:58:30 UTC (rev
4420)
@@ -241,7 +241,7 @@
prop.put("genUrlProfile", "2");
prop.putHTML("genUrlProfile_urlNormalform",
comp.url().toNormalform(false, true));
prop.put("genUrlProfile_urlhash", urlhash);
- prop.put("genUrlProfile_urlDescr", comp.title());
+ prop.put("genUrlProfile_urlDescr", comp.dc_title());
prop.put("genUrlProfile_moddate", entry.moddate().toString());
prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);
Modified: trunk/htroot/ViewFile.java
===================================================================
--- trunk/htroot/ViewFile.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/ViewFile.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -124,7 +124,7 @@
return prop;
}
url = comp.url();
- descr = comp.title();
+ descr = comp.dc_title();
urlEntry.wordCount();
size = urlEntry.size();
pre = urlEntry.flags().get(plasmaCondenser.flag_cat_indexof);
Modified: trunk/htroot/js/yacysearch.js
===================================================================
--- trunk/htroot/js/yacysearch.js 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/js/yacysearch.js 2008-01-30 21:58:30 UTC (rev 4420)
@@ -70,9 +70,12 @@
}
}
-function statistics(offset, items, global, total) {
+function statistics(offset, itemscount, totalcount, localResourceSize,
remoteResourceSize, remoteIndexCount, remotePeerCount) {
document.getElementById("resultsOffset").firstChild.nodeValue = offset;
- document.getElementById("itemscount").firstChild.nodeValue = items;
- document.getElementById("globalcount").firstChild.nodeValue = global;
- document.getElementById("totalcount").firstChild.nodeValue = total;
+ document.getElementById("itemscount").firstChild.nodeValue = itemscount;
+ document.getElementById("totalcount").firstChild.nodeValue = totalcount;
+ document.getElementById("localResourceSize").firstChild.nodeValue =
localResourceSize;
+ document.getElementById("remoteResourceSize").firstChild.nodeValue =
remoteResourceSize;
+ document.getElementById("remoteIndexCount").firstChild.nodeValue =
remoteIndexCount;
+ document.getElementById("remotePeerCount").firstChild.nodeValue =
remotePeerCount;
}
\ No newline at end of file
Modified: trunk/htroot/yacy/search.java
===================================================================
--- trunk/htroot/yacy/search.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacy/search.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -183,7 +183,7 @@
snippetComputationAllTime = theSearch.getSnippetComputationTime();
// set statistic details of search result and find best result
index set
- if (theSearch.getLocalCount() == 0) {
+ if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
prop.put("indexcount", "");
prop.put("joincount", "0");
} else {
@@ -207,11 +207,11 @@
}
prop.put("indexcount", indexcount.toString());
- if (theSearch.getLocalCount() == 0) {
+ if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
joincount = 0;
prop.put("joincount", "0");
} else {
- joincount = theSearch.getLocalCount();
+ joincount =
theSearch.getRankingResult().getLocalResourceSize();
prop.put("joincount", Integer.toString(joincount));
accu = theSearch.completeResults(duetime);
}
Modified: trunk/htroot/yacy/urls.java
===================================================================
--- trunk/htroot/yacy/urls.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacy/urls.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -104,11 +104,11 @@
referrer = sb.getURL(entry.referrerHash());
// create RSS entry
comp = entry.comp();
- prop.put("item_" + c + "_title", comp.title());
+ prop.put("item_" + c + "_title", comp.dc_title());
prop.putHTML("item_" + c + "_link",
comp.url().toNormalform(true, false));
prop.putHTML("item_" + c + "_referrer", (referrer == null) ?
"" : referrer.toNormalform(true, false));
- prop.putHTML("item_" + c + "_description", comp.title());
- prop.put("item_" + c + "_author", comp.author());
+ prop.putHTML("item_" + c + "_description", comp.dc_title());
+ prop.put("item_" + c + "_author", comp.dc_creator());
prop.put("item_" + c + "_pubDate",
serverDate.formatShortSecond(entry.moddate()));
prop.put("item_" + c + "_guid", entry.hash());
c++;
Modified: trunk/htroot/yacysearch.html
===================================================================
--- trunk/htroot/yacysearch.html 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearch.html 2008-01-30 21:58:30 UTC (rev 4420)
@@ -99,7 +99,7 @@
<p>No Results. (length of search words must be at least 3
characters)</p>
::
<div id="results"></div>
- <span id="resCounter" style="display: inline;"><strong
id="resultsOffset">#[offset]#</strong>-<strong
id="itemscount">#[linkcount]#</strong> results from a total number of <strong
id="totalcount">#[totalcount]#</strong> known#(globalresults)#.::, <strong
id="globalcount">#[globalcount]#</strong> links from other YaCy
peers.#(/globalresults)#</span>
+ <span id="resCounter" style="display: inline;"><strong
id="resultsOffset">#[offset]#</strong>-<strong
id="itemscount">#[itemscount]#</strong> results from a total number of <strong
id="totalcount">#[totalcount]#</strong> known#(globalresults)#.:: (<strong
id="localResourceSize">#[localResourceSize]#</strong> local, <strong
id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong
id="remoteIndexCount">#[remoteIndexCount]#</strong> links from <strong
id="remotePeerCount">#[remotePeerCount]#</strong> other YaCy
peers.#(/globalresults)#</span>
<span id="resNav" style="display: inline;">#[resnav]#</span>
::
<p>Searching the web with this peer is disabled for unauthorized users.
Please <a href="Status.html?login=">log in</a> as administrator to use the
search function</p>
Modified: trunk/htroot/yacysearch.java
===================================================================
--- trunk/htroot/yacysearch.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearch.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -240,8 +240,8 @@
// create a news message
HashMap<String, String> map = new HashMap<String,
String>();
map.put("url", comp.url().toNormalform(false,
true).replace(',', '|'));
- map.put("title", comp.title().replace(',', ' '));
- map.put("description", ((document == null) ?
comp.title() : document.dc_title()).replace(',', ' '));
+ map.put("title", comp.dc_title().replace(',', ' '));
+ map.put("description", ((document == null) ?
comp.dc_title() : document.dc_title()).replace(',', ' '));
map.put("author", ((document == null) ? "" :
document.dc_creator()));
map.put("tags", ((document == null) ? "" :
document.dc_subject(' ')));
yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_SURFTIPP_ADD,
map));
@@ -306,12 +306,12 @@
// log
serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " +
theQuery.queryString + " - " +
- (theSearch.getLocalCount() + theSearch.getGlobalCount()) +
" links found, " +
+ (theSearch.getRankingResult().getLocalResourceSize() +
theSearch.getRankingResult().getRemoteResourceSize()) + " links found, " +
((System.currentTimeMillis() - timestamp) / 1000) + "
seconds");
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
- HashMap<String, Object> searchProfile =
theQuery.resultProfile(theSearch.getLocalCount() + theSearch.getGlobalCount(),
System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(),
theSearch.getSnippetComputationTime());
+ HashMap<String, Object> searchProfile =
theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() +
theSearch.getRankingResult().getRemoteResourceSize(),
System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(),
theSearch.getSnippetComputationTime());
searchProfile.put("querystring", theQuery.queryString);
searchProfile.put("time", trackerHandle);
searchProfile.put("host", client);
@@ -323,13 +323,16 @@
sb.localSearchTracker.put(client, handles);
prop = new serverObjects();
- prop.put("num-results_totalcount",
yFormatter.number(theSearch.getLocalCount() + theSearch.getGlobalCount(),
!rss));
- prop.put("num-results_globalresults", "1");
- prop.put("num-results_globalresults_globalcount",
yFormatter.number(theSearch.getGlobalCount(), !rss));
prop.put("num-results_offset", offset);
- prop.put("num-results_linkcount", "0");
+ prop.put("num-results_itemscount", "0");
prop.put("num-results_itemsPerPage", itemsPerPage);
-
+ prop.put("num-results_totalcount",
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() +
theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+ prop.put("num-results_globalresults", (globalsearch) ? "1" : "0");
+ prop.put("num-results_globalresults_localResourceSize",
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss));
+ prop.put("num-results_globalresults_remoteResourceSize",
yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+ prop.put("num-results_globalresults_remoteIndexCount",
yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss));
+ prop.put("num-results_globalresults_remotePeerCount",
yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss));
+
// compose page navigation
StringBuffer resnav = new StringBuffer();
int thispage = offset / theQuery.displayResults();
@@ -337,7 +340,7 @@
resnav.append(navurla(thispage - 1, display, theQuery));
resnav.append("<strong><</strong></a> ");
}
- int numberofpages = Math.min(10, Math.min(thispage + 2,
(theSearch.getGlobalCount() + theSearch.getLocalCount()) /
theQuery.displayResults()));
+ int numberofpages = Math.min(10, Math.min(thispage + 2,
(theSearch.getRankingResult().getRemoteResourceSize() +
theSearch.getRankingResult().getLocalResourceSize()) /
theQuery.displayResults()));
for (int i = 0; i < numberofpages; i++) {
if (i == thispage) {
resnav.append("<strong>");
Modified: trunk/htroot/yacysearchitem.html
===================================================================
--- trunk/htroot/yacysearchitem.html 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearchitem.html 2008-01-30 21:58:30 UTC (rev 4420)
@@ -62,7 +62,7 @@
#(/rssreferences)#
#(dynamic)#::
<script type="text/javascript">
-statistics("#[offset]#", "#[items]#", "#[global]#", "#[total]#");
+statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#",
"#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#",
"#[remotePeerCount]#");
progressbar.step(1);
</script>
#(/dynamic)#
Modified: trunk/htroot/yacysearchitem.java
===================================================================
--- trunk/htroot/yacysearchitem.java 2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/htroot/yacysearchitem.java 2008-01-30 21:58:30 UTC (rev 4420)
@@ -44,6 +44,7 @@
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
+import de.anomic.tools.yFormatter;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacySeed;
@@ -85,9 +86,12 @@
// dynamically update count values
if (!rss) {
prop.put("dynamic_offset", theQuery.neededResults() -
theQuery.displayResults() + 1);
- prop.put("dynamic_global", theSearch.getGlobalCount());
- prop.put("dynamic_total", theSearch.getGlobalCount() +
theSearch.getLocalCount());
- prop.put("dynamic_items", (item < 0) ? theQuery.neededResults() :
item + 1);
+ prop.put("dynamic_itemscount", (item < 0) ?
theQuery.neededResults() : item + 1);
+ prop.put("dynamic_totalcount",
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() +
theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+ prop.put("dynamic_localResourceSize",
yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss));
+ prop.put("dynamic_remoteResourceSize",
yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss));
+ prop.put("dynamic_remoteIndexCount",
yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss));
+ prop.put("dynamic_remotePeerCount",
yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss));
prop.put("dynamic", "1");
}
Modified: trunk/source/de/anomic/index/indexURLEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexURLEntry.java 2008-01-30 00:15:43 UTC
(rev 4419)
+++ trunk/source/de/anomic/index/indexURLEntry.java 2008-01-30 21:58:30 UTC
(rev 4420)
@@ -120,9 +120,9 @@
public indexURLEntry(
yacyURL url,
- String descr,
- String author,
- String tags,
+ String dc_title,
+ String dc_creator,
+ String dc_subject,
String ETag,
Date mod,
Date load,
@@ -143,7 +143,7 @@
// create new entry and store it into database
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash(), null);
- this.entry.setCol(col_comp, encodeComp(url, descr, author, tags,
ETag));
+ this.entry.setCol(col_comp, encodeComp(url, dc_title, dc_creator,
dc_subject, ETag));
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
@@ -175,12 +175,12 @@
return new Date(86400000 * this.entry.getColLong(col));
}
- public static byte[] encodeComp(yacyURL url, String descr, String author,
String tags, String ETag) {
+ public static byte[] encodeComp(yacyURL url, String dc_title, String
dc_creator, String dc_subject, String ETag) {
serverCharBuffer s = new serverCharBuffer(200);
s.append(url.toNormalform(false, true)).append(10);
- s.append(descr).append(10);
- s.append(author).append(10);
- s.append(tags).append(10);
+ s.append(dc_title).append(10);
+ s.append(dc_creator).append(10);
+ s.append(dc_subject).append(10);
s.append(ETag).append(10);
return s.toString().getBytes();
}
@@ -203,13 +203,13 @@
url = null;
}
String descr = crypt.simpleDecode(prop.getProperty("descr", ""),
null); if (descr == null) descr = "";
- String author = crypt.simpleDecode(prop.getProperty("author", ""),
null); if (author == null) author = "";
+ String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""),
null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null);
if (tags == null) tags = "";
String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null);
if (ETag == null) ETag = "";
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash(), null);
- this.entry.setCol(col_comp, encodeComp(url, descr, author, tags,
ETag));
+ this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags,
ETag));
try {
encodeDate(col_mod,
serverDate.parseShortDay(prop.getProperty("mod", "20000101")));
} catch (ParseException e) {
@@ -256,9 +256,9 @@
try {
s.append("hash=").append(hash());
s.append(",url=").append(crypt.simpleEncode(comp.url().toNormalform(false,
true)));
- s.append(",descr=").append(crypt.simpleEncode(comp.title()));
- s.append(",author=").append(crypt.simpleEncode(comp.author()));
- s.append(",tags=").append(crypt.simpleEncode(comp.tags()));
+ s.append(",descr=").append(crypt.simpleEncode(comp.dc_title()));
+ s.append(",author=").append(crypt.simpleEncode(comp.dc_creator()));
+ s.append(",tags=").append(crypt.simpleEncode(comp.dc_subject()));
s.append(",ETag=").append(crypt.simpleEncode(comp.ETag()));
s.append(",mod=").append(serverDate.formatShortDay(moddate()));
s.append(",load=").append(serverDate.formatShortDay(loaddate()));
@@ -429,7 +429,7 @@
null,
comp().url(),
referrerHash(),
- comp().title(),
+ comp().dc_title(),
loaddate(),
null,
0,
@@ -455,7 +455,7 @@
public class Components {
private yacyURL url;
- private String title, author, tags, ETag;
+ private String dc_title, dc_creator, dc_subject, ETag;
public Components(String url, String urlhash, String title, String
author, String tags, String ETag) {
try {
@@ -463,22 +463,22 @@
} catch (MalformedURLException e) {
this.url = null;
}
- this.title = title;
- this.author = author;
- this.tags = tags;
+ this.dc_title = title;
+ this.dc_creator = author;
+ this.dc_subject = tags;
this.ETag = ETag;
}
public Components(yacyURL url, String descr, String author, String
tags, String ETag) {
this.url = url;
- this.title = descr;
- this.author = author;
- this.tags = tags;
+ this.dc_title = descr;
+ this.dc_creator = author;
+ this.dc_subject = tags;
this.ETag = ETag;
}
public yacyURL url() { return this.url; }
- public String title() { return this.title; }
- public String author() { return this.author; }
- public String tags() { return this.tags; }
+ public String dc_title() { return this.dc_title; }
+ public String dc_creator() { return this.dc_creator; }
+ public String dc_subject() { return this.dc_subject; }
public String ETag() { return this.ETag; }
}
Modified: trunk/source/de/anomic/kelondro/kelondroEcoTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroEcoTable.java 2008-01-30
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/kelondro/kelondroEcoTable.java 2008-01-30
21:58:30 UTC (rev 4420)
@@ -343,6 +343,7 @@
} else {
// read old value
kelondroRow.Entry v = table.get(i);
+ assert v != null;
System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0,
rowdef.primaryKeyLength);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength,
rowdef.objectsize - rowdef.primaryKeyLength);
// write new value
Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-01-30 00:15:43 UTC
(rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-01-30 21:58:30 UTC
(rev 4420)
@@ -72,10 +72,10 @@
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroException;
-import de.anomic.kelondro.kelondroSplitTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
+import de.anomic.kelondro.kelondroSplitTable;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.logging.serverLog;
@@ -623,14 +623,14 @@
pw.println(url);
}
if (format == 1) {
- pw.println("<a href=\"" + url + "\">" +
htmlTools.encodeUnicode2html(comp.title(), true, true) + "</a><br>");
+ pw.println("<a href=\"" + url + "\">" +
htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</a><br>");
}
if (format == 2) {
pw.println("<item>");
- pw.println("<title>" +
htmlTools.encodeUnicode2html(comp.title(), true, true) + "</title>");
+ pw.println("<title>" +
htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</title>");
pw.println("<link>" +
yacyURL.escape(url) + "</link>");
- if (comp.author().length() > 0)
pw.println("<author>" + htmlTools.encodeUnicode2html(comp.author(), true, true)
+ "</author>");
- if (comp.tags().length() > 0)
pw.println("<description>" + htmlTools.encodeUnicode2html(comp.tags(), true,
true) + "</description>");
+ if (comp.dc_creator().length() > 0)
pw.println("<author>" + htmlTools.encodeUnicode2html(comp.dc_creator(), true,
true) + "</author>");
+ if (comp.dc_subject().length() > 0)
pw.println("<description>" + htmlTools.encodeUnicode2html(comp.dc_subject(),
true, true) + "</description>");
pw.println("<pubDate>" +
entry.moddate().toString() + "</pubDate>");
pw.println("<guid
isPermaLink=\"false\">" + entry.hash() + "</guid>");
pw.println("</item>");
Modified: trunk/source/de/anomic/plasma/plasmaSearchAPI.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchAPI.java 2008-01-30 00:15:43 UTC
(rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSearchAPI.java 2008-01-30 21:58:30 UTC
(rev 4420)
@@ -88,10 +88,10 @@
}
}
- public static plasmaSearchRankingProcess genSearchresult(serverObjects
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int
sortorder, boolean fetchURLs) {
+ public static plasmaSearchRankingProcess genSearchresult(serverObjects
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int
sortorder) {
plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1,
sb.getRanking(), filter);
plasmaSearchRankingProcess ranked = new
plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE);
- ranked.execQuery(fetchURLs);
+ ranked.execQuery();
if (ranked.filteredCount() == 0) {
prop.put("searchresult", 2);
Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-01-30
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-01-30
21:58:30 UTC (rev 4420)
@@ -76,7 +76,6 @@
public TreeMap<String, String> IAResults;
public TreeMap<String, Integer> IACount;
public String IAmaxcounthash, IAneardhthash;
- private int localcount;
private resultWorker[] workerThreads;
private ArrayList<ResultEntry> resultList;
//private int resultListLock; // a pointer that shows that all elements
below this pointer are fixed and may not be changed again
@@ -101,7 +100,6 @@
this.IACount = new TreeMap<String, Integer>();
this.IAmaxcounthash = null;
this.IAneardhthash = null;
- this.localcount = 0;
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.workerThreads = null;
@@ -157,8 +155,7 @@
} else {
// do a local search
this.rankedCache = new plasmaSearchRankingProcess(wordIndex,
query, 2, max_results_preparation);
- this.rankedCache.execQuery(true);
- this.localcount = this.rankedCache.filteredCount();
+ this.rankedCache.execQuery();
//plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query,
false, 2, ranking, process);
if (generateAbstracts) {
@@ -249,8 +246,7 @@
// sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be
fast
synchronized (rankedCache) {
- rankedCache.execQuery(true);
- localcount = rankedCache.filteredCount();
+ rankedCache.execQuery();
}
}
}
@@ -291,13 +287,13 @@
long startTime = System.currentTimeMillis();
indexURLEntry.Components comp = page.comp();
- String pagetitle = comp.title().toLowerCase();
+ String pagetitle = comp.dc_title().toLowerCase();
if (comp.url() == null) {
registerFailure(page.hash(), "url corrupted (null)");
return null; // rare case where the url is corrupted
}
String pageurl = comp.url().toString().toLowerCase();
- String pageauthor = comp.author().toLowerCase();
+ String pageauthor = comp.dc_creator().toLowerCase();
long dbRetrievalTime = System.currentTimeMillis() - startTime;
// check exclusion
@@ -315,7 +311,7 @@
// check constraints
if ((query.constraint != null) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
- (!(comp.title().startsWith("Index of")))) {
+ (!(comp.dc_title().startsWith("Index of")))) {
final Iterator<String> wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(),
page.hash());
registerFailure(page.hash(), "index-of constraint not fullfilled");
@@ -423,14 +419,10 @@
return secondarySearchThreads;
}
- public int getLocalCount() {
- return this.localcount;
+ public plasmaSearchRankingProcess getRankingResult() {
+ return this.rankedCache;
}
- public int getGlobalCount() {
- return this.rankedCache.getGlobalCount();
- }
-
public long getURLRetrievalTime() {
return this.urlRetrievalAllTime;
}
@@ -465,7 +457,7 @@
if ((query.onlineSnippetFetch) &&
(!event.anyWorkerAlive()) &&
(event.resultList.size() < query.neededResults() + 10) &&
- ((event.getLocalCount() + event.getGlobalCount()) >
event.resultList.size())) {
+ ((event.getRankingResult().getLocalResourceSize() +
event.getRankingResult().getRemoteResourceSize()) > event.resultList.size())) {
// set new timeout
event.eventTime = System.currentTimeMillis();
// start worker threads to fetch urls and snippets
@@ -764,7 +756,7 @@
("yacyshare " +
filename.replace('?', ' ') +
" " +
- urlcomps.title()).getBytes(),
"UTF-8").keySet(),
+ urlcomps.dc_title()).getBytes(),
"UTF-8").keySet(),
urlentry.hash());
wordIndex.loadedURL.remove(urlentry.hash()); // clean
up
throw new RuntimeException("index void");
@@ -794,7 +786,7 @@
return (alternative_urlname == null) ?
urlcomps.url().toNormalform(false, true) : alternative_urlname;
}
public String title() {
- return urlcomps.title();
+ return urlcomps.dc_title();
}
public plasmaSnippetCache.TextSnippet textSnippet() {
return this.textSnippet;
Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-01-30 00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-01-30 21:58:30 UTC (rev 4420)
@@ -57,9 +57,8 @@
private HashMap<String, String> handover; // key = urlhash, value =
urlstring; used for double-check of urls that had been handed over to search
process
private plasmaSearchQuery query;
private int sortorder;
- private int filteredCount;
private int maxentries;
- private int globalcount;
+ private int remote_peerCount, remote_indexCount, remote_resourceSize,
local_resourceSize;
private indexRWIEntryOrder order;
private HashMap<String, Object> urlhashes; // map for double-check;
String/Long relation, addresses ranking number (backreference for deletion)
private kelondroMScoreCluster<String> ref; // reference score computation
for the commonSense heuristic
@@ -76,11 +75,13 @@
this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
this.doubleDomCache = new HashMap<String, TreeMap<Object,
indexRWIRowEntry>>();
this.handover = new HashMap<String, String>();
- this.filteredCount = 0;
this.order = null;
this.query = query;
this.maxentries = maxentries;
- this.globalcount = 0;
+ this.remote_peerCount = 0;
+ this.remote_indexCount = 0;
+ this.remote_resourceSize = 0;
+ this.local_resourceSize = 0;
this.urlhashes = new HashMap<String, Object>();
this.ref = new kelondroMScoreCluster<String>();
this.misses = new TreeSet<String>();
@@ -90,7 +91,7 @@
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
}
- public void execQuery(boolean fetchURLs) {
+ public void execQuery() {
long timer = System.currentTimeMillis();
this.localSearchContainerMaps = wordIndex.localSearchContainers(query,
null);
@@ -113,16 +114,24 @@
}
if (sortorder == 2) {
- insertRanked(index, true);
+ insertRanked(index, true, index.size());
} else {
- insertNoOrder(index, fetchURLs);
+ insertNoOrder(index, true, index.size());
}
}
- private void insertNoOrder(indexContainer index, boolean local) {
+ private void insertNoOrder(indexContainer index, boolean local, int
fullResource) {
final Iterator<indexRWIRowEntry> en = index.entries();
// generate a new map where the urls are sorted (not by hash but by
the url text)
+ if (local) {
+ this.local_resourceSize += fullResource;
+ } else {
+ this.remote_resourceSize += fullResource;
+ this.remote_peerCount++;
+ this.remote_indexCount += index.size();
+ }
+
indexRWIRowEntry ientry;
indexURLEntry uentry;
String u;
@@ -141,20 +150,14 @@
if (sortorder == 0) {
this.sortedRWIEntries.put(ientry.urlHash(), ientry);
this.urlhashes.put(ientry.urlHash(), ientry.urlHash());
- filteredCount++;
} else {
- if (local) {
- uentry = wordIndex.loadedURL.load(ientry.urlHash(),
ientry, 0);
- if (uentry == null) {
- this.misses.add(ientry.urlHash());
- } else {
- u = uentry.comp().url().toNormalform(false, true);
- this.sortedRWIEntries.put(u, ientry);
- this.urlhashes.put(ientry.urlHash(), u);
- filteredCount++;
- }
+ uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
+ if (uentry == null) {
+ this.misses.add(ientry.urlHash());
} else {
- filteredCount++;
+ u = uentry.comp().url().toNormalform(false, true);
+ this.sortedRWIEntries.put(u, ientry);
+ this.urlhashes.put(ientry.urlHash(), u);
}
}
@@ -163,12 +166,18 @@
} // end loop
}
- public void insertRanked(indexContainer index, boolean local) {
+ public void insertRanked(indexContainer index, boolean local, int
fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not
terminate within the maxTime
assert (index != null);
if (index.size() == 0) return;
+ if (local) {
+ this.local_resourceSize += fullResource;
+ } else {
+ this.remote_resourceSize += fullResource;
+ this.remote_peerCount++;
+ }
long timer = System.currentTimeMillis();
if (this.order == null) {
@@ -224,11 +233,8 @@
}
// increase counter for statistics
- if (!local) this.globalcount++;
+ if (!local) this.remote_indexCount++;
}
- this.filteredCount = sortedRWIEntries.size();
- //long sc = Math.max(1, System.currentTimeMillis() - s0);
- //System.out.println("###DEBUG### time to sort " + container.size() +
" entries to " + this.filteredCount + ": " + sc + " milliseconds, " +
(container.size() / sc) + " entries/millisecond, ranking = " + tc);
//if ((query.neededResults() > 0) && (container.size() >
query.neededResults())) remove(true, true);
serverProfiling.update("SEARCH", new
plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.PRESORT,
index.size(), System.currentTimeMillis() - timer));
@@ -350,14 +356,34 @@
return flagcount;
}
+ // "results from a total number of <remote_resourceSize +
local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize>
remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
+
public int filteredCount() {
- return this.filteredCount;
+ // the number of index entries that are considered as result set
+ return this.sortedRWIEntries.size();
}
- public int getGlobalCount() {
- return this.globalcount;
+ public int getRemoteIndexCount() {
+ // the number of result contributions from all the remote peers
+ return this.remote_indexCount;
}
+ public int getRemotePeerCount() {
+ // the number of remote peers that have contributed
+ return this.remote_peerCount;
+ }
+
+ public int getRemoteResourceSize() {
+ // the number of all hits in all the remote peers
+ return this.remote_resourceSize;
+ }
+
+ public int getLocalResourceSize() {
+ // the number of hits in the local peer (index size, size of the
collection in the own index)
+ return this.local_resourceSize;
+ }
+
+
public indexRWIEntry remove(String urlHash) {
Object r = (Long) urlhashes.get(urlHash);
if (r == null) return null;
Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-30
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-30
21:58:30 UTC (rev 4420)
@@ -874,7 +874,7 @@
plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID);
assert plasmaSwitchboard.getSwitchboard() != null;
assert plasmaSwitchboard.getSwitchboard().wordIndex != null;
- assert event != null;
+ assert event != null : "eventID = " + eventID;
assert event.getQuery() != null;
plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes,
urlHash);
event.remove(urlHash);
Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-01-30
00:15:43 UTC (rev 4419)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-01-30
21:58:30 UTC (rev 4420)
@@ -2205,7 +2205,7 @@
/*
=========================================================================
* CREATE INDEX
*
========================================================================= */
- String docDescription = document.dc_title();
+ String dc_title = document.dc_title();
yacyURL referrerURL = entry.referrerURL();
String noIndexReason =
plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR;
@@ -2236,9 +2236,9 @@
long ldate = System.currentTimeMillis();
indexURLEntry newEntry = new indexURLEntry(
entry.url(), // URL
- docDescription, //
document description
- document.dc_creator(), //
author
- document.dc_subject(' '), // tags
+ dc_title, // document
description
+ document.dc_creator(), //
author
+ document.dc_subject(' '), // tags
"", // ETag
docDate, //
modification date
new Date(), //
loaded date
@@ -2406,7 +2406,7 @@
// of string concatenation
log.logInfo("*Indexed " + words + " words in URL "
+ entry.url() +
" [" + entry.urlHash() + "]" +
- "\n\tDescription: " + docDescription +
+ "\n\tDescription: " + dc_title +
"\n\tMimeType: " + document.dc_format() +
" | Charset: " + document.getCharset() + " | " +
"Size: " + document.getTextLength() + "
bytes | " +
"Anchors: " + ((document.getAnchors() ==
null) ? 0 : document.getAnchors().size()) +
@@ -2430,7 +2430,7 @@
}
} else {
log.logFine("Not Indexed Resource '" +
entry.url().toNormalform(false, true) + "': process case=" + processCase);
- addURLtoErrorDB(entry.url(), referrerURL.hash(),
initiatorPeerHash, docDescription,
plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield());
+ addURLtoErrorDB(entry.url(), referrerURL.hash(),
initiatorPeerHash, dc_title,
plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield());
}
} catch (Exception ee) {
if (ee instanceof InterruptedException) throw
(InterruptedException)ee;
@@ -2443,7 +2443,7 @@
if (clusterhashes != null)
initiatorPeer.setAlternativeAddress((String)
clusterhashes.get(initiatorPeer.hash));
yacyClient.crawlReceipt(initiatorPeer, "crawl",
"exception", ee.getMessage(), null, "");
}
- addURLtoErrorDB(entry.url(), (referrerURL == null) ? null
: referrerURL.hash(), initiatorPeerHash, docDescription,
plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield());
+ addURLtoErrorDB(entry.url(), (referrerURL == null) ? null
: referrerURL.hash(), initiatorPeerHash, dc_title,
plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield());
}
} else {
@@ -2451,7 +2451,7 @@
checkInterruption();
log.logInfo("Not indexed any word in URL " + entry.url() + ";
cause: " + noIndexReason);
- addURLtoErrorDB(entry.url(), (referrerURL == null) ? null :
referrerURL.hash(), initiatorPeerHash, docDescription, noIndexReason, new
kelondroBitfield());
+ addURLtoErrorDB(entry.url(), (referrerURL == null) ? null :
referrerURL.hash(), initiatorPeerHash, dc_title, noIndexReason, new
kelondroBitfield());
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) &&
(initiatorPeer != null)) {
if (clusterhashes != null)
initiatorPeer.setAlternativeAddress((String)
clusterhashes.get(initiatorPeer.hash));
yacyClient.crawlReceipt(initiatorPeer, "crawl",
"rejected", noIndexReason, null, "");
Modified: trunk/source/de/anomic/yacy/yacyClient.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyClient.java 2008-01-30 00:15:43 UTC (rev
4419)
+++ trunk/source/de/anomic/yacy/yacyClient.java 2008-01-30 21:58:30 UTC (rev
4420)
@@ -477,9 +477,10 @@
// now create a plasmaIndex out of this result
// System.out.println("yacyClient: " + ((urlhashes.length() ==
0) ? "primary" : "secondary")+ " search result = " + result.toString()); //
debug
- int results = 0;
+ int results = 0, joincount = 0;
try {
- results = Integer.parseInt((String) result.get("count"));
+ results = Integer.parseInt(result.get("count"));
+ joincount = Integer.parseInt(result.get("joincount"));
} catch (NumberFormatException e) {
yacyCore.log.logFine("SEARCH failed FROM " + target.hash + ":" +
target.getName() + ", wrong output format");
yacyCore.peerActions.peerDeparture(target, "search request to peer
created number format exception");
@@ -557,7 +558,7 @@
// store remote result to local result container
synchronized (containerCache) {
// insert one container into the search result buffer
- containerCache.insertRanked(container[0], false); // one is enough
+ containerCache.insertRanked(container[0], false, joincount); //
one is enough
// integrate remote topwords
String references = (String) result.get("references");
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn