Author: orbiter
Date: 2008-03-05 22:46:55 +0100 (Wed, 05 Mar 2008)
New Revision: 4529
Added:
trunk/htroot/yacy/user/sidebar_history.html
trunk/htroot/yacy/user/sidebar_history.java
trunk/htroot/yacy/user/sidebar_navigation.html
trunk/htroot/yacy/user/sidebar_navigation.java
Modified:
trunk/htroot/IndexControlRWIs_p.html
trunk/htroot/IndexControlRWIs_p.java
trunk/htroot/yacy/user/ysearch.html
trunk/htroot/yacy/user/ysearchitem.html
trunk/htroot/yacy/user/ysearchitem.java
trunk/source/de/anomic/plasma/plasmaSearchAPI.java
trunk/source/de/anomic/plasma/plasmaSearchEvent.java
trunk/source/de/anomic/plasma/plasmaSearchQuery.java
trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
trunk/source/de/anomic/plasma/plasmaSwitchboard.java
trunk/source/de/anomic/server/serverProcessor.java
trunk/source/de/anomic/yacy/yacyURL.java
Log:
- some refactoring in search process
- separated sidebars in new search interface and placed them in their own files
which can be put in into the search page like plug-ins
Modified: trunk/htroot/IndexControlRWIs_p.html
===================================================================
--- trunk/htroot/IndexControlRWIs_p.html 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/htroot/IndexControlRWIs_p.html 2008-03-05 21:46:55 UTC (rev
4529)
@@ -92,11 +92,6 @@
<input type="radio" name="lines" value="100" checked="checked"
/>100
<input type="radio" name="lines" value="1000" />1000
</dd>
- <dt class="TableCellDark">Ordering of list:</dt>
- <dd><input type="radio" name="ordering" value="2" checked="checked"
/>by Ranking
- <input type="radio" name="ordering" value="0"/>by URL
- <input type="radio" name="ordering" value="1" />by URL
Hash
- </dd>
<dt class="TableCellLight"></dt>
<dd><input type="submit" name="urllist" value="List Selected URLs" />
</dd>
Modified: trunk/htroot/IndexControlRWIs_p.java
===================================================================
--- trunk/htroot/IndexControlRWIs_p.java 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/htroot/IndexControlRWIs_p.java 2008-03-05 21:46:55 UTC (rev
4529)
@@ -77,7 +77,6 @@
// default values
String keystring = post.get("keystring", "").trim();
String keyhash = post.get("keyhash", "").trim();
- int sortorder = post.getInt("ordering", 0);
prop.putHTML("keystring", keystring);
prop.put("keyhash", keyhash);
@@ -89,7 +88,7 @@
if (post.containsKey("keystringsearch")) {
keyhash = plasmaCondenser.word2hash(keystring);
prop.put("keyhash", keyhash);
- final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
+ final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 1);
prop.put("searchresult_word", keystring);
@@ -100,7 +99,7 @@
if (keystring.length() == 0 ||
!plasmaCondenser.word2hash(keystring).equals(keyhash)) {
prop.put("keystring", "<not possible to compute word
from hash>");
}
- final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
+ final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 2);
prop.put("searchresult_wordhash", keyhash);
@@ -159,8 +158,8 @@
}
kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
int count = (post.get("lines", "all").equals("all")) ? -1 :
post.getInt("lines", -1);
- final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder);
- plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking,
flags, count, sortorder);
+ final plasmaSearchRankingProcess ranking =
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags);
+ plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking,
flags, count);
}
// transfer to other peer
Added: trunk/htroot/yacy/user/sidebar_history.html
===================================================================
--- trunk/htroot/yacy/user/sidebar_history.html 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/htroot/yacy/user/sidebar_history.html 2008-03-05 21:46:55 UTC (rev
4529)
@@ -0,0 +1,14 @@
+#(history)#::
+ <div id="partners" class="boxed">
+ <h2 class="title">Recent Searches</h2>
+ <div class="content">
+ <ul>
+ #{list}#
+ <li><a
href="/yacy/user/ysearch.html?search=#[querystring]#&resource=#[searchdom]#&contentdom=#[contentdom]#">#[querystring]#</a></li>
+ #{/list}#
+ </ul>
+ <p>The search history is only visible for users from host #[host]#</p>
+ </div>
+ </div>
+#(/history)#
+
Added: trunk/htroot/yacy/user/sidebar_history.java
===================================================================
--- trunk/htroot/yacy/user/sidebar_history.java 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/htroot/yacy/user/sidebar_history.java 2008-03-05 21:46:55 UTC (rev
4529)
@@ -0,0 +1,68 @@
+// sidebar_history.java
+// (C) 2008 by Michael Peter Christen; [EMAIL PROTECTED], Frankfurt a. M.,
Germany
+// first published 03.03.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import java.util.HashSet;
+import java.util.Iterator;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaSearchQuery;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class sidebar_history {
+
+ public static serverObjects respond(httpHeader header, serverObjects post,
serverSwitch env) {
+ final plasmaSwitchboard sb = (plasmaSwitchboard) env;
+ final serverObjects prop = new serverObjects();
+
+ // list search history
+ Iterator<plasmaSearchQuery> i = sb.localSearches.iterator();
+ String client = (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
+ plasmaSearchQuery query;
+ int c = 0;
+ HashSet<String> visibleQueries = new HashSet<String>();
+ while (i.hasNext()) {
+ query = i.next();
+ if (query.resultcount == 0) continue;
+ if (query.offset != 0) continue;
+ if (!query.host.equals(client)) continue; // the search history
should only be visible from the user who initiated the search
+ if (visibleQueries.contains(query.queryString)) continue; // avoid
doubles
+ visibleQueries.add(query.queryString);
+ prop.put("history_list_" + c + "_querystring", query.queryString);
+ prop.put("history_list_" + c + "_searchdom", query.searchdom());
+ prop.put("history_list_" + c + "_contentdom", query.contentdom());
+ c++;
+ if (c >= 10) break;
+ }
+ prop.put("history_list", c);
+ prop.put("history_host", client);
+ if (c == 0) prop.put("history", 0); else prop.put("history", 1); //
switch on if there is anything to see
+
+ return prop;
+ }
+
+}
Added: trunk/htroot/yacy/user/sidebar_navigation.html
===================================================================
--- trunk/htroot/yacy/user/sidebar_navigation.html 2008-03-05 18:56:01 UTC
(rev 4528)
+++ trunk/htroot/yacy/user/sidebar_navigation.html 2008-03-05 21:46:55 UTC
(rev 4529)
@@ -0,0 +1,37 @@
+#(rssreferences)#::
+<yacyTopwords:topwords>
+#{words}#
+<yacyTopwords:item><yacyTopwords:word>#[word]#</yacyTopwords:word></yacyTopwords:item>
+#{/words}#
+</yacyTopwords:topwords>
+#(/rssreferences)#
+#(navigation)#::
+ <div id="navigate" class="boxed">
+ <h2 class="title">Navigate</h2>
+ <div class="content">
+ <p><Strong>Page</strong>:
+ #[resnav]#
+ </p>
+ #(topwords)#::
+ <p><strong>Category</strong>:
+ <select
onchange="window.location.href=this.options[this.selectedIndex].value">
+ <option selected="selected">-select-</option>
+ #{words}#
+ <option
value="/yacy/user/ysearch.html?search=#[newsearch]#&Enter=Search&count=#[count]#&offset=#[offset]#&resource=#[resource]#&contentdom=#[contentdom]#&zone=#[zonecode]#">#[word]#</option>
+ #{/words}#
+ </select>
+ </p>
+ #(/topwords)#
+ #(languagezone)#::
+ <p><strong>Language Zone</strong>:
+ <select
onchange="window.location.href=this.options[this.selectedIndex].value">
+ <option selected="selected">-select-</option>
+ #{zones}#
+ <option
value="/yacy/user/ysearch.html?search=#[search]#&Enter=Search&count=#[count]#&offset=#[offset]#&resource=#[resource]#&contentdom=#[contentdom]#&zone=#[zonecode]#">#[zone]#</option>
+ #{/zones}#
+ </select>
+ </p>
+ #(/languagezone)#
+ </div>
+ </div>
+#(/navigation)#
Added: trunk/htroot/yacy/user/sidebar_navigation.java
===================================================================
--- trunk/htroot/yacy/user/sidebar_navigation.java 2008-03-05 18:56:01 UTC
(rev 4528)
+++ trunk/htroot/yacy/user/sidebar_navigation.java 2008-03-05 21:46:55 UTC
(rev 4529)
@@ -0,0 +1,173 @@
+// sidebar_navigation.java
+// (C) 2008 by Michael Peter Christen; [EMAIL PROTECTED], Frankfurt a. M.,
Germany
+// first published 03.03.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import de.anomic.http.httpHeader;
+import de.anomic.kelondro.kelondroMSetTools;
+import de.anomic.kelondro.kelondroNaturalOrder;
+import de.anomic.plasma.plasmaSearchEvent;
+import de.anomic.plasma.plasmaSearchQuery;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class sidebar_navigation {
+
+ private static final int MAX_TOPWORDS = 24;
+
+ public static serverObjects respond(httpHeader header, serverObjects post,
serverSwitch env) {
+ final serverObjects prop = new serverObjects();
+
+ String eventID = post.get("eventID", "");
+ boolean rss = post.get("rss", "false").equals("true");
+
+ // default settings for blank item
+ prop.put("navigation", "0");
+ prop.put("rssreferences", "0");
+
+ // find search event
+ plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(eventID);
+ if (theSearch == null) {
+ // the event does not exist, show empty page
+ return prop;
+ }
+ plasmaSearchQuery theQuery = theSearch.getQuery();
+ int offset = theQuery.neededResults() - theQuery.displayResults();
+ int totalcount = theSearch.getRankingResult().getLocalResourceSize() +
theSearch.getRankingResult().getRemoteResourceSize();
+
+ // attach the bottom line with search references (topwords)
+ final Set<String> references = theSearch.references(20);
+ if (references.size() > 0) {
+ // get the topwords
+ final TreeSet<String> topwords = new
TreeSet<String>(kelondroNaturalOrder.naturalComparator);
+ String tmp = "";
+ Iterator<String> i = references.iterator();
+ while (i.hasNext()) {
+ tmp = i.next();
+ if (tmp.matches("[a-z]+")) {
+ topwords.add(tmp);
+ }
+ }
+
+ // filter out the badwords
+ final TreeSet<String> filteredtopwords =
kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
+ if (filteredtopwords.size() > 0) {
+ kelondroMSetTools.excludeDestructive(topwords,
plasmaSwitchboard.badwords);
+ }
+
+ // avoid stopwords being topwords
+ if (env.getConfig("filterOutStopwordsFromTopwords",
"true").equals("true")) {
+ if ((plasmaSwitchboard.stopwords != null) &&
(plasmaSwitchboard.stopwords.size() > 0)) {
+ kelondroMSetTools.excludeDestructive(topwords,
plasmaSwitchboard.stopwords);
+ }
+ }
+
+ if (rss) {
+ String word;
+ int hintcount = 0;
+ final Iterator<String> iter = topwords.iterator();
+ while (iter.hasNext()) {
+ word = (String) iter.next();
+ if (word != null) {
+ prop.putHTML("rssreferences_words_" + hintcount +
"_word", word);
+ }
+ prop.put("rssreferences_words", hintcount);
+ if (hintcount++ > MAX_TOPWORDS) {
+ break;
+ }
+ }
+ prop.put("rssreferences", "1");
+ } else {
+ String word;
+ int hintcount = 0;
+ final Iterator<String> iter = topwords.iterator();
+ while (iter.hasNext()) {
+ word = (String) iter.next();
+ if ((theQuery == null) || (theQuery.queryString == null))
break;
+ if (word != null) {
+ prop.putHTML("navigation_topwords_words_" + hintcount
+ "_word", word);
+ prop.putHTML("navigation_topwords_words_" + hintcount
+ "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
+ prop.put("navigation_topwords_words_" + hintcount +
"_count", theQuery.displayResults());
+ prop.put("navigation_topwords_words_" + hintcount +
"_offset", "0");
+ prop.put("navigation_topwords_words_" + hintcount +
"_contentdom", theQuery.contentdom());
+ prop.put("navigation_topwords_words_" + hintcount +
"_resource", theQuery.searchdom());
+ prop.put("navigation_topwords_words_" + hintcount +
"_zonecode", theQuery.zonecode);
+ }
+ prop.put("navigation_topwords_words", hintcount);
+ if (hintcount++ > MAX_TOPWORDS) {
+ break;
+ }
+ }
+ prop.put("navigation_topwords", "1");
+ }
+ }
+
+ // compose page navigation
+ StringBuffer resnav = new StringBuffer();
+ int thispage = offset / theQuery.displayResults();
+ if (thispage == 0) resnav.append("< "); else {
+ resnav.append(navurla(thispage - 1, theQuery));
+ resnav.append("<strong><</strong></a> ");
+ }
+ int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount /
theQuery.displayResults()));
+ for (int j = 0; j < numberofpages; j++) {
+ if (j == thispage) {
+ resnav.append("<strong>");
+ resnav.append(j + 1);
+ resnav.append("</strong> ");
+ } else {
+ resnav.append(navurla(j, theQuery));
+ resnav.append(j + 1);
+ resnav.append("</a> ");
+ }
+ }
+ if (thispage >= numberofpages) resnav.append(">"); else {
+ resnav.append(navurla(thispage + 1, theQuery));
+ resnav.append("<strong>></strong></a>");
+ }
+ prop.put("navigation_resnav", resnav.toString());
+ prop.put("navigation", "1");
+
+ return prop;
+ }
+
+ private static String navurla(int page, plasmaSearchQuery theQuery) {
+ return
+ "<a href=\"ysearch.html?search=" + theQuery.queryString() +
+ "&count="+ theQuery.displayResults() +
+ "&offset=" + (page * theQuery.displayResults()) +
+ "&resource=" + theQuery.searchdom() +
+ "&urlmaskfilter=" + theQuery.urlMask +
+ "&prefermaskfilter=" + theQuery.prefer +
+ "&cat=href&constraint=" + ((theQuery.constraint == null) ? ""
: theQuery.constraint.exportB64()) +
+ "&contentdom=" + theQuery.contentdom() +
+ "&former=" + theQuery.queryString() + "\">";
+ }
+
+}
Modified: trunk/htroot/yacy/user/ysearch.html
===================================================================
--- trunk/htroot/yacy/user/ysearch.html 2008-03-05 18:56:01 UTC (rev 4528)
+++ trunk/htroot/yacy/user/ysearch.html 2008-03-05 21:46:55 UTC (rev 4529)
@@ -141,7 +141,8 @@
<div id="sidebar">
<!-- attach the bottomline -->
-<!--#include
virtual="/yacy/user/ysearchitem.html?bottomline=true&eventID=#[eventID]#" -->
+<!--#include virtual="/yacy/user/sidebar_navigation.html?eventID=#[eventID]#"
-->
+<!--#include virtual="/yacy/user/sidebar_history.html?eventID=#[eventID]#" -->
</div>
</div>
Modified: trunk/htroot/yacy/user/ysearchitem.html
===================================================================
--- trunk/htroot/yacy/user/ysearchitem.html 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/htroot/yacy/user/ysearchitem.html 2008-03-05 21:46:55 UTC (rev
4529)
@@ -38,50 +38,9 @@
<guid isPermaLink="false">#[urlhash]#</guid>
</item>
#(/rss)#
-#(rssreferences)#::
-<yacyTopwords:topwords>
-#{words}#
-<yacyTopwords:item><yacyTopwords:word>#[word]#</yacyTopwords:word></yacyTopwords:item>
-#{/words}#
-</yacyTopwords:topwords>
-#(/rssreferences)#
#(dynamic)#::
<script type="text/javascript">
statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#",
"#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#",
"#[remotePeerCount]#");
progressbar.step(1);
</script>
#(/dynamic)#
-#(navigation)#::
- <div id="navigate" class="boxed">
- <h2 class="title">Navigate</h2>
- <div class="content">
- <p><Strong>Page</strong>:
- #[resnav]#
- </p>
- #(topwords)#::
- <p><strong>Category</strong>:
- <select
onchange="window.location.href=this.options[this.selectedIndex].value">
- <option selected="selected">-select-</option>
- #{words}#
- <option
value="/yacy/user/ysearch.html?search=#[newsearch]#&Enter=Search&count=#[count]#&offset=#[offset]#&resource=#[resource]#&contentdom=#[contentdom]#">#[word]#</option>
- #{/words}#
- </select>
- </p>
- #(/topwords)#
- </div>
- </div>
-#(/navigation)#
-#(history)#::
- <div id="partners" class="boxed">
- <h2 class="title">Recent Searches</h2>
- <div class="content">
- <ul>
- #{list}#
- <li><a
href="/yacy/user/ysearch.html?search=#[querystring]#&resource=#[searchdom]#&contentdom=#[contentdom]#">#[querystring]#</a></li>
- #{/list}#
- </ul>
- <p>The search history is only visible for users from host #[host]#</p>
- </div>
- </div>
-#(/history)#
-
Modified: trunk/htroot/yacy/user/ysearchitem.java
===================================================================
--- trunk/htroot/yacy/user/ysearchitem.java 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/htroot/yacy/user/ysearchitem.java 2008-03-05 21:46:55 UTC (rev
4529)
@@ -28,14 +28,9 @@
import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
-import de.anomic.kelondro.kelondroMSetTools;
-import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProcess;
@@ -53,14 +48,12 @@
private static boolean col = true;
private static final int namelength = 60;
private static final int urllength = 120;
- private static final int MAX_TOPWORDS = 24;
public static serverObjects respond(httpHeader header, serverObjects post,
serverSwitch env) {
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
final serverObjects prop = new serverObjects();
String eventID = post.get("eventID", "");
- boolean bottomline = post.get("bottomline", "false").equals("true");
boolean rss = post.get("rss", "false").equals("true");
int item = post.getInt("item", -1);
boolean auth = ((String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") ||
sb.verifyAuthentication(header, true);
@@ -69,8 +62,6 @@
prop.put("content", "0");
prop.put("rss", "0");
prop.put("references", "0");
- prop.put("rssreferences", "0");
- prop.put("navigation", "0");
prop.put("dynamic", "0");
// find search event
@@ -81,7 +72,6 @@
}
plasmaSearchQuery theQuery = theSearch.getQuery();
int offset = theQuery.neededResults() - theQuery.displayResults();
- int totalcount = theSearch.getRankingResult().getLocalResourceSize() +
theSearch.getRankingResult().getRemoteResourceSize();
// dynamically update count values
if (!rss) {
@@ -95,129 +85,7 @@
prop.put("dynamic_resnav", "");
prop.put("dynamic", "1");
}
-
- if (bottomline) {
- // attach the bottom line with search references (topwords)
- final Set<String> references = theSearch.references(20);
- if (references.size() > 0) {
- // get the topwords
- final TreeSet<String> topwords = new
TreeSet<String>(kelondroNaturalOrder.naturalComparator);
- String tmp = "";
- Iterator<String> i = references.iterator();
- while (i.hasNext()) {
- tmp = i.next();
- if (tmp.matches("[a-z]+")) {
- topwords.add(tmp);
- }
- }
- // filter out the badwords
- final TreeSet<String> filteredtopwords =
kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
- if (filteredtopwords.size() > 0) {
- kelondroMSetTools.excludeDestructive(topwords,
plasmaSwitchboard.badwords);
- }
-
- // avoid stopwords being topwords
- if (env.getConfig("filterOutStopwordsFromTopwords",
"true").equals("true")) {
- if ((plasmaSwitchboard.stopwords != null) &&
(plasmaSwitchboard.stopwords.size() > 0)) {
- kelondroMSetTools.excludeDestructive(topwords,
plasmaSwitchboard.stopwords);
- }
- }
-
- if (rss) {
- String word;
- int hintcount = 0;
- final Iterator<String> iter = topwords.iterator();
- while (iter.hasNext()) {
- word = (String) iter.next();
- if (word != null) {
- prop.putHTML("rssreferences_words_" + hintcount +
"_word", word);
- }
- prop.put("rssreferences_words", hintcount);
- if (hintcount++ > MAX_TOPWORDS) {
- break;
- }
- }
- prop.put("rssreferences", "1");
- } else {
- String word;
- int hintcount = 0;
- final Iterator<String> iter = topwords.iterator();
- while (iter.hasNext()) {
- word = (String) iter.next();
- if ((theQuery == null) || (theQuery.queryString ==
null)) break;
- if (word != null) {
- prop.putHTML("navigation_topwords_words_" +
hintcount + "_word", word);
- prop.putHTML("navigation_topwords_words_" +
hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
- prop.put("navigation_topwords_words_" + hintcount
+ "_count", theQuery.displayResults());
- prop.put("navigation_topwords_words_" + hintcount
+ "_offset", "0");
- prop.put("navigation_topwords_words_" + hintcount
+ "_contentdom", theQuery.contentdom());
- prop.put("navigation_topwords_words_" + hintcount
+ "_resource", theQuery.searchdom());
- }
- prop.put("navigation_topwords_words", hintcount);
- if (hintcount++ > MAX_TOPWORDS) {
- break;
- }
- }
- prop.put("navigation_topwords", "1");
- }
-
- }
-
- // compose page navigation
- StringBuffer resnav = new StringBuffer();
- int thispage = offset / theQuery.displayResults();
- if (thispage == 0) resnav.append("< "); else {
- resnav.append(navurla(thispage - 1, theQuery));
- resnav.append("<strong><</strong></a> ");
- }
- int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount
/ theQuery.displayResults()));
- for (int j = 0; j < numberofpages; j++) {
- if (j == thispage) {
- resnav.append("<strong>");
- resnav.append(j + 1);
- resnav.append("</strong> ");
- } else {
- resnav.append(navurla(j, theQuery));
- resnav.append(j + 1);
- resnav.append("</a> ");
- }
- }
- if (thispage >= numberofpages) resnav.append(">"); else {
- resnav.append(navurla(thispage + 1, theQuery));
- resnav.append("<strong>></strong></a>");
- }
- prop.put("navigation_resnav", resnav.toString());
- prop.put("navigation", "1");
-
- // list search history
- Iterator<plasmaSearchQuery> i = sb.localSearches.iterator();
- String client = (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
- plasmaSearchQuery query;
- int c = 0;
- HashSet<String> visibleQueries = new HashSet<String>();
- while (i.hasNext()) {
- query = i.next();
- if (query.resultcount == 0) continue;
- if (query.offset != 0) continue;
- if (!query.host.equals(client)) continue; // the search
history should only be visible from the user who initiated the search
- if (visibleQueries.contains(query.queryString)) continue; //
avoid doubles
- visibleQueries.add(query.queryString);
- prop.put("history_list_" + c + "_querystring",
query.queryString);
- prop.put("history_list_" + c + "_searchdom",
query.searchdom());
- prop.put("history_list_" + c + "_contentdom",
query.contentdom());
- c++;
- if (c >= 10) break;
- }
- prop.put("history_list", c);
- prop.put("history_host", client);
- if (c == 0) prop.put("history", 0); else prop.put("history", 1);
// switch on if there is anything to see
-
- return prop;
- }
-
- prop.put("rss", "0");
-
if (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) {
// text search
@@ -331,18 +199,4 @@
return s.substring(0, length - (s.length() - p) - 3) + "..." +
s.substring(p);
}
-
- private static String navurla(int page, plasmaSearchQuery theQuery) {
- return
- "<a href=\"ysearch.html?search=" + theQuery.queryString() +
- "&count="+ theQuery.displayResults() +
- "&offset=" + (page * theQuery.displayResults()) +
- "&resource=" + theQuery.searchdom() +
- "&urlmaskfilter=" + theQuery.urlMask +
- "&prefermaskfilter=" + theQuery.prefer +
- "&cat=href&constraint=" + ((theQuery.constraint == null) ? ""
: theQuery.constraint.exportB64()) +
- "&contentdom=" + theQuery.contentdom() +
- "&former=" + theQuery.queryString() + "\">";
- }
-
}
Modified: trunk/source/de/anomic/plasma/plasmaSearchAPI.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchAPI.java 2008-03-05 18:56:01 UTC
(rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchAPI.java 2008-03-05 21:46:55 UTC
(rev 4529)
@@ -88,9 +88,9 @@
}
}
- public static plasmaSearchRankingProcess genSearchresult(serverObjects
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int
sortorder) {
+ public static plasmaSearchRankingProcess genSearchresult(serverObjects
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter) {
plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1,
sb.getRanking(), filter);
- plasmaSearchRankingProcess ranked = new
plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE,
1);
+ plasmaSearchRankingProcess ranked = new
plasmaSearchRankingProcess(sb.wordIndex, query, Integer.MAX_VALUE, 1);
ranked.execQuery();
if (ranked.filteredCount() == 0) {
@@ -114,7 +114,7 @@
return ranked;
}
- public static void genURLList(serverObjects prop, String keyhash, String
keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int
maxlines, int ordering) {
+ public static void genURLList(serverObjects prop, String keyhash, String
keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int
maxlines) {
// search for a word hash and generate a list of url links
prop.put("genUrlList_keyHash", keyhash);
@@ -127,7 +127,6 @@
prop.put("searchresult", 3);
prop.put("genUrlList_flags", (flags == null) ? "" :
flags.exportB64());
prop.put("genUrlList_lines", maxlines);
- prop.put("genUrlList_ordering", ordering);
int i = 0;
yacyURL url;
indexURLEntry entry;
Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-03-05
18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-03-05
21:46:55 UTC (rev 4529)
@@ -123,7 +123,7 @@
if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
// do a global search
- this.rankedCache = new plasmaSearchRankingProcess(wordIndex,
query, 2, max_results_preparation, 16);
+ this.rankedCache = new plasmaSearchRankingProcess(wordIndex,
query, max_results_preparation, 16);
int fetchpeers = 30;
@@ -156,7 +156,7 @@
serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER
GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " +
((System.currentTimeMillis() - start) / 1000) + " seconds");
} else {
// do a local search
- this.rankedCache = new plasmaSearchRankingProcess(wordIndex,
query, 2, max_results_preparation, 2);
+ this.rankedCache = new plasmaSearchRankingProcess(wordIndex,
query, max_results_preparation, 2);
this.rankedCache.execQuery();
//plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query,
false, 2, ranking, process);
Modified: trunk/source/de/anomic/plasma/plasmaSearchQuery.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchQuery.java 2008-03-05
18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchQuery.java 2008-03-05
21:46:55 UTC (rev 4529)
@@ -54,6 +54,7 @@
import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
+import de.anomic.yacy.yacyURL;
public final class plasmaSearchQuery {
@@ -80,7 +81,7 @@
public int contentdom;
public String urlMask;
public int domType;
- public String domGroupName;
+ public int zonecode;
public int domMaxTargets;
public int maxDistance;
public kelondroBitfield constraint;
@@ -117,7 +118,7 @@
this.offset = 0;
this.urlMask = ".*";
this.domType = SEARCHDOM_LOCAL;
- this.domGroupName = "";
+ this.zonecode = yacyURL.language_domain_any_zone;
this.domMaxTargets = 0;
this.constraint = constraint;
this.allofconstraint = false;
@@ -148,7 +149,7 @@
//this.maximumTime = Math.min(6000, maximumTime);
this.urlMask = urlMask;
this.domType = domType;
- this.domGroupName = domGroupName;
+ this.zonecode = yacyURL.language_domain_any_zone;
this.domMaxTargets = domMaxTargets;
this.constraint = constraint;
this.allofconstraint = allofconstraint;
Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-03-05 18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-03-05 21:46:55 UTC (rev 4529)
@@ -40,7 +40,6 @@
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryOrder;
-import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexRWIVarEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBinSearch;
@@ -49,6 +48,7 @@
import de.anomic.server.serverCodings;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverProfiling;
+import de.anomic.yacy.yacyURL;
public final class plasmaSearchRankingProcess {
@@ -59,7 +59,6 @@
private HashMap<String, kelondroSortStack<indexRWIVarEntry>>
doubleDomCache; // key = domhash (6 bytes); value = like stack
private HashMap<String, String> handover; // key = urlhash, value =
urlstring; used for double-check of urls that had been handed over to search
process
private plasmaSearchQuery query;
- private int sortorder;
private int maxentries;
private int remote_peerCount, remote_indexCount, remote_resourceSize,
local_resourceSize;
private indexRWIEntryOrder order;
@@ -70,7 +69,7 @@
private plasmaWordIndex wordIndex;
private HashMap<String, indexContainer>[] localSearchContainerMaps;
- public plasmaSearchRankingProcess(plasmaWordIndex wordIndex,
plasmaSearchQuery query, int sortorder, int maxentries, int concurrency) {
+ public plasmaSearchRankingProcess(plasmaWordIndex wordIndex,
plasmaSearchQuery query, int maxentries, int concurrency) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not
terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
@@ -89,7 +88,6 @@
this.ref = new kelondroMScoreCluster<String>();
this.misses = new TreeSet<String>();
this.wordIndex = wordIndex;
- this.sortorder = sortorder;
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
}
@@ -120,59 +118,9 @@
return;
}
- if (sortorder == 2) {
- insertRanked(index, true, index.size());
- } else {
- insertNoOrder(index, true, index.size());
- }
+ insertRanked(index, true, index.size());
}
- private void insertNoOrder(indexContainer index, boolean local, int
fullResource) {
- final Iterator<indexRWIRowEntry> en = index.entries();
- // generate a new map where the urls are sorted (not by hash but by
the url text)
-
- if (local) {
- this.local_resourceSize += fullResource;
- } else {
- this.remote_resourceSize += fullResource;
- this.remote_peerCount++;
- this.remote_indexCount += index.size();
- }
-
- indexRWIVarEntry ientry;
- indexURLEntry uentry;
- String u;
- loop: while (en.hasNext()) {
- ientry = new indexRWIVarEntry(en.next());
-
- // check constraints
- if (!testFlags(ientry)) continue loop;
-
- // increase flag counts
- for (int i = 0; i < 32; i++) {
- if (ientry.flags().get(i)) {flagcount[i]++;}
- }
-
- // load url
- if (sortorder == 0) {
- this.stack.push(ientry, new Long(ientry.urlHash().hashCode()));
- this.urlhashes.put(ientry.urlHash(), new
Integer(ientry.urlHash().hashCode()));
- } else {
- uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
- if (uentry == null) {
- this.misses.add(ientry.urlHash());
- } else {
- u = uentry.comp().url().toNormalform(false, true);
- this.stack.push(ientry, new Long(u.hashCode()));
- this.urlhashes.put(ientry.urlHash(), new
Integer(u.hashCode()));
- }
- }
-
- // interrupt if we have enough
- if ((query.neededResults() > 0) && (this.misses.size() +
this.stack.size() > query.neededResults())) break loop;
- } // end loop
- }
-
public void insertRanked(indexContainer index, boolean local, int
fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not
terminate within the maxTime
@@ -222,6 +170,12 @@
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP )
&& (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasapp )))) continue;
}
+ // check tld domain
+ if (!yacyURL.matchesAnyDomDomain(iEntry.urlHash(),
this.query.zonecode)) {
+ // filter out all tld that do not match with wanted tld domain
+ continue;
+ }
+
// insert
if ((maxentries < 0) || (stack.size() < maxentries)) {
// in case that we don't have enough yet, accept any new entry
Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-03-05
18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-03-05
21:46:55 UTC (rev 4529)
@@ -1445,6 +1445,7 @@
if (hostAddress == null) return ((this.remoteProxyConfig != null) &&
(this.remoteProxyConfig.useProxy()));
// check if this is a local address and we are allowed to index local
pages:
boolean local = hostAddress.isSiteLocalAddress() ||
hostAddress.isLoopbackAddress();
+ //assert local == yacyURL.isLocalDomain(url.hash()); // TODO: remove
the dnsResolve above!
return (this.acceptGlobalURLs && !local) || (this.acceptLocalURLs &&
local);
}
Modified: trunk/source/de/anomic/server/serverProcessor.java
===================================================================
--- trunk/source/de/anomic/server/serverProcessor.java 2008-03-05 18:56:01 UTC
(rev 4528)
+++ trunk/source/de/anomic/server/serverProcessor.java 2008-03-05 21:46:55 UTC
(rev 4529)
@@ -24,10 +24,29 @@
package de.anomic.server;
+import java.util.concurrent.LinkedBlockingQueue;
+
public class serverProcessor {
public static final int availableCPU =
Runtime.getRuntime().availableProcessors();
public static int useCPU = availableCPU;
+
+ public static class queue<I, O> {
+ String nickname;
+ int priority;
+ serverProcess<I, O> implementation;
+ LinkedBlockingQueue<I> inputQueue;
+ LinkedBlockingQueue<O> outputQueue;
+
+ public queue(String nickname, int priority, serverProcess<I, O>
implementation) {
+ this.nickname = nickname;
+ this.priority = priority;
+ this.implementation = implementation;
+ this.inputQueue = new LinkedBlockingQueue<I>();
+ this.outputQueue = new LinkedBlockingQueue<O>();
+ }
+ }
+
}
Modified: trunk/source/de/anomic/yacy/yacyURL.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyURL.java 2008-03-05 18:56:01 UTC (rev
4528)
+++ trunk/source/de/anomic/yacy/yacyURL.java 2008-03-05 21:46:55 UTC (rev
4529)
@@ -139,7 +139,7 @@
"UY=Uruguay",
"VE=Venezuela"
};
- private static final String[] TLD_EuropaRussia = {
+ private static final String[] TLD_EuropeRussia = {
// includes also countries that are mainly french- dutch- speaking
// and culturally close to europe
"AD=Andorra",
@@ -373,20 +373,30 @@
}
}
}
-
+
+ public static final int language_domain_europe_zone = 128 + 1;
//{0, 7};
+ public static final int language_domain_english_zone = 128 + 16 + 64;
//{4, 6, 7};
+ public static final int language_domain_spanish_zone = 128 + 2;
//{1, 7};
+ public static final int language_domain_asia_zone = 128 + 4;
//{2, 7};
+ public static final int language_domain_middleeast_zone = 128 + 8;
//{3, 7};
+ public static final int language_domain_africa_zone = 128 + 32;
//{5, 7};
+ public static final int language_domain_any_zone = 255;
+
+ public static final String[] regions = {"europe", "english", "spanish",
"asia", "middleeast", "africa"};
+
static {
// create a dummy hash
dummyHash = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-";
// assign TLD-ids and names
- insertTLDProps(TLD_EuropaRussia, 0);
- insertTLDProps(TLD_MiddleSouthAmerica, 1);
- insertTLDProps(TLD_SouthEastAsia, 2);
- insertTLDProps(TLD_MiddleEastWestAsia, 3);
- insertTLDProps(TLD_NorthAmericaOceania, 4);
- insertTLDProps(TLD_Africa, 5);
- insertTLDProps(TLD_Generic, 6);
+ insertTLDProps(TLD_EuropeRussia, 0); // European languages but
no english
+ insertTLDProps(TLD_MiddleSouthAmerica, 1); // mainly spanish-speaking
countries
+ insertTLDProps(TLD_SouthEastAsia, 2); // asia
+ insertTLDProps(TLD_MiddleEastWestAsia, 3); // middle east
+ insertTLDProps(TLD_NorthAmericaOceania, 4); // english-speaking
countries
+ insertTLDProps(TLD_Africa, 5); // africa
+ insertTLDProps(TLD_Generic, 6); // anything else, mixed
languages, mainly english
// the id=7 is used to flag local addresses
}
@@ -971,7 +981,7 @@
tld = host.substring(p + 1);
dom = host.substring(0, p);
}
- Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer)
TLDID.get(tld); // identify local addresses
+ Integer ID = (serverDomains.isLocal(tld)) ? null : TLDID.get(tld); //
identify local addresses
int id = (ID == null) ? 7 : ID.intValue(); // local addresses are
flagged with id=7
boolean isHTTP = this.protocol.equals("http");
p = dom.lastIndexOf('.'); // locate subdomain
@@ -1083,14 +1093,22 @@
// returns the ID of the domain of the domain
assert (urlHash != null);
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
- int flagbyte =
kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11));
- return (flagbyte & 12) >> 2;
+ return
(kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)) & 12) >> 2;
}
- public static boolean isGlobalDomain(String urlhash) {
- return domDomain(urlhash) != 7;
+ public static boolean isLocalDomain(String urlhash) {
+ return domDomain(urlhash) == 7;
}
+ public static boolean isDomDomain(String urlHash, int id) {
+ return domDomain(urlHash) == id;
+ }
+
+ public static boolean matchesAnyDomDomain(String urlHash, int idset) {
+ // this is a boolean matching on a set of domDomains
+ return (domDomain(urlHash) | idset) != 0;
+ }
+
// checks for local/global IP range and local IP
public boolean isLocal() {
return serverDomains.isLocal(this.host);
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn