Author: orbiter
Date: 2008-03-05 22:46:55 +0100 (Wed, 05 Mar 2008)
New Revision: 4529

Added:
   trunk/htroot/yacy/user/sidebar_history.html
   trunk/htroot/yacy/user/sidebar_history.java
   trunk/htroot/yacy/user/sidebar_navigation.html
   trunk/htroot/yacy/user/sidebar_navigation.java
Modified:
   trunk/htroot/IndexControlRWIs_p.html
   trunk/htroot/IndexControlRWIs_p.java
   trunk/htroot/yacy/user/ysearch.html
   trunk/htroot/yacy/user/ysearchitem.html
   trunk/htroot/yacy/user/ysearchitem.java
   trunk/source/de/anomic/plasma/plasmaSearchAPI.java
   trunk/source/de/anomic/plasma/plasmaSearchEvent.java
   trunk/source/de/anomic/plasma/plasmaSearchQuery.java
   trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
   trunk/source/de/anomic/plasma/plasmaSwitchboard.java
   trunk/source/de/anomic/server/serverProcessor.java
   trunk/source/de/anomic/yacy/yacyURL.java
Log:
- some refactoring in search process
- separated sidebars in new search interface and placed them in their own files
  which can be put in into the search page like plug-ins

Modified: trunk/htroot/IndexControlRWIs_p.html
===================================================================
--- trunk/htroot/IndexControlRWIs_p.html        2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/htroot/IndexControlRWIs_p.html        2008-03-05 21:46:55 UTC (rev 
4529)
@@ -92,11 +92,6 @@
             <input type="radio" name="lines" value="100" checked="checked" 
/>100&nbsp;&nbsp;
             <input type="radio" name="lines" value="1000" />1000&nbsp;&nbsp;
         </dd>
-        <dt class="TableCellDark">Ordering of list:</dt>
-        <dd><input type="radio" name="ordering" value="2" checked="checked"  
/>by Ranking&nbsp;&nbsp;
-            <input type="radio" name="ordering" value="0"/>by URL&nbsp;&nbsp;
-            <input type="radio" name="ordering" value="1" />by URL 
Hash&nbsp;&nbsp;
-        </dd>
         <dt class="TableCellLight"></dt>
         <dd><input type="submit" name="urllist" value="List Selected URLs" />
         </dd>

Modified: trunk/htroot/IndexControlRWIs_p.java
===================================================================
--- trunk/htroot/IndexControlRWIs_p.java        2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/htroot/IndexControlRWIs_p.java        2008-03-05 21:46:55 UTC (rev 
4529)
@@ -77,7 +77,6 @@
             // default values
             String keystring = post.get("keystring", "").trim();
             String keyhash = post.get("keyhash", "").trim();
-            int sortorder = post.getInt("ordering", 0);
             prop.putHTML("keystring", keystring);
             prop.put("keyhash", keyhash);
 
@@ -89,7 +88,7 @@
             if (post.containsKey("keystringsearch")) {
                 keyhash = plasmaCondenser.word2hash(keystring);
                 prop.put("keyhash", keyhash);
-                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null);
                 if (ranking.filteredCount() == 0) {
                     prop.put("searchresult", 1);
                     prop.put("searchresult_word", keystring);
@@ -100,7 +99,7 @@
                 if (keystring.length() == 0 || 
!plasmaCondenser.word2hash(keystring).equals(keyhash)) {
                     prop.put("keystring", "&lt;not possible to compute word 
from hash&gt;");
                 }
-                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null);
                 if (ranking.filteredCount() == 0) {
                     prop.put("searchresult", 2);
                     prop.put("searchresult_wordhash", keyhash);
@@ -159,8 +158,8 @@
                 }
                 kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
                 int count = (post.get("lines", "all").equals("all")) ? -1 : 
post.getInt("lines", -1);
-                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder);
-                plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, 
flags, count, sortorder);
+                final plasmaSearchRankingProcess ranking = 
plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags);
+                plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, 
flags, count);
             }
 
             // transfer to other peer

Added: trunk/htroot/yacy/user/sidebar_history.html
===================================================================
--- trunk/htroot/yacy/user/sidebar_history.html 2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/htroot/yacy/user/sidebar_history.html 2008-03-05 21:46:55 UTC (rev 
4529)
@@ -0,0 +1,14 @@
+#(history)#::
+    <div id="partners" class="boxed">
+      <h2 class="title">Recent Searches</h2>
+      <div class="content">
+        <ul>
+        #{list}#
+          <li><a 
href="/yacy/user/ysearch.html?search=#[querystring]#&amp;resource=#[searchdom]#&amp;contentdom=#[contentdom]#">#[querystring]#</a></li>
+        #{/list}#
+        </ul>
+        <p>The search history is only visible for users from host #[host]#</p>
+      </div>
+    </div>
+#(/history)#
+

Added: trunk/htroot/yacy/user/sidebar_history.java
===================================================================
--- trunk/htroot/yacy/user/sidebar_history.java 2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/htroot/yacy/user/sidebar_history.java 2008-03-05 21:46:55 UTC (rev 
4529)
@@ -0,0 +1,68 @@
+// sidebar_history.java
+// (C) 2008 by Michael Peter Christen; [EMAIL PROTECTED], Frankfurt a. M., 
Germany
+// first published 03.03.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import java.util.HashSet;
+import java.util.Iterator;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaSearchQuery;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class sidebar_history {
+
+    public static serverObjects respond(httpHeader header, serverObjects post, 
serverSwitch env) {
+        final plasmaSwitchboard sb = (plasmaSwitchboard) env;
+        final serverObjects prop = new serverObjects();
+    
+        // list search history
+        Iterator<plasmaSearchQuery> i = sb.localSearches.iterator();
+        String client = (String) 
header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
+        plasmaSearchQuery query;
+        int c = 0;
+        HashSet<String> visibleQueries = new HashSet<String>();
+        while (i.hasNext()) {
+            query = i.next();
+            if (query.resultcount == 0) continue;
+            if (query.offset != 0) continue;
+            if (!query.host.equals(client)) continue; // the search history 
should only be visible from the user who initiated the search
+            if (visibleQueries.contains(query.queryString)) continue; // avoid 
doubles
+            visibleQueries.add(query.queryString);
+            prop.put("history_list_" + c + "_querystring", query.queryString);
+            prop.put("history_list_" + c + "_searchdom", query.searchdom());
+            prop.put("history_list_" + c + "_contentdom", query.contentdom());
+            c++;
+            if (c >= 10) break;
+        }
+        prop.put("history_list", c);
+        prop.put("history_host", client);
+        if (c == 0) prop.put("history", 0); else prop.put("history", 1); // 
switch on if there is anything to see
+        
+        return prop;
+    }
+
+}

Added: trunk/htroot/yacy/user/sidebar_navigation.html
===================================================================
--- trunk/htroot/yacy/user/sidebar_navigation.html      2008-03-05 18:56:01 UTC 
(rev 4528)
+++ trunk/htroot/yacy/user/sidebar_navigation.html      2008-03-05 21:46:55 UTC 
(rev 4529)
@@ -0,0 +1,37 @@
+#(rssreferences)#::
+<yacyTopwords:topwords>
+#{words}#
+<yacyTopwords:item><yacyTopwords:word>#[word]#</yacyTopwords:word></yacyTopwords:item>
+#{/words}#
+</yacyTopwords:topwords>
+#(/rssreferences)#
+#(navigation)#::
+    <div id="navigate" class="boxed">
+      <h2 class="title">Navigate</h2>
+      <div class="content">
+        <p><Strong>Page</strong>:
+        #[resnav]#
+        </p>
+        #(topwords)#::
+        <p><strong>Category</strong>:
+        <select 
onchange="window.location.href=this.options[this.selectedIndex].value">
+          <option selected="selected">-select-</option>
+        #{words}#
+          <option 
value="/yacy/user/ysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=#[zonecode]#">#[word]#</option>
+        #{/words}#
+        </select>
+        </p>
+        #(/topwords)#
+        #(languagezone)#::
+        <p><strong>Language Zone</strong>:
+        <select 
onchange="window.location.href=this.options[this.selectedIndex].value">
+          <option selected="selected">-select-</option>
+        #{zones}#
+          <option 
value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=#[zonecode]#">#[zone]#</option>
+        #{/zones}#
+        </select>
+        </p>
+        #(/languagezone)#
+      </div>
+    </div>
+#(/navigation)#

Added: trunk/htroot/yacy/user/sidebar_navigation.java
===================================================================
--- trunk/htroot/yacy/user/sidebar_navigation.java      2008-03-05 18:56:01 UTC 
(rev 4528)
+++ trunk/htroot/yacy/user/sidebar_navigation.java      2008-03-05 21:46:55 UTC 
(rev 4529)
@@ -0,0 +1,173 @@
+// sidebar_navigation.java
+// (C) 2008 by Michael Peter Christen; [EMAIL PROTECTED], Frankfurt a. M., 
Germany
+// first published 03.03.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import de.anomic.http.httpHeader;
+import de.anomic.kelondro.kelondroMSetTools;
+import de.anomic.kelondro.kelondroNaturalOrder;
+import de.anomic.plasma.plasmaSearchEvent;
+import de.anomic.plasma.plasmaSearchQuery;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class sidebar_navigation {
+
+    private static final int MAX_TOPWORDS = 24;
+    
+    public static serverObjects respond(httpHeader header, serverObjects post, 
serverSwitch env) {
+        final serverObjects prop = new serverObjects();
+        
+        String eventID = post.get("eventID", "");
+        boolean rss = post.get("rss", "false").equals("true");
+        
+        // default settings for blank item
+        prop.put("navigation", "0");
+        prop.put("rssreferences", "0");
+        
+        // find search event
+        plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(eventID);
+        if (theSearch == null) {
+            // the event does not exist, show empty page
+            return prop;
+        }
+        plasmaSearchQuery theQuery = theSearch.getQuery();
+        int offset = theQuery.neededResults() - theQuery.displayResults();
+        int totalcount = theSearch.getRankingResult().getLocalResourceSize() + 
theSearch.getRankingResult().getRemoteResourceSize();
+    
+        // attach the bottom line with search references (topwords)
+        final Set<String> references = theSearch.references(20);
+        if (references.size() > 0) {
+            // get the topwords
+            final TreeSet<String> topwords = new 
TreeSet<String>(kelondroNaturalOrder.naturalComparator);
+            String tmp = "";
+            Iterator<String> i = references.iterator();
+            while (i.hasNext()) {
+                tmp = i.next();
+                if (tmp.matches("[a-z]+")) {
+                    topwords.add(tmp);
+                }
+            }
+
+            // filter out the badwords
+            final TreeSet<String> filteredtopwords = 
kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
+            if (filteredtopwords.size() > 0) {
+                kelondroMSetTools.excludeDestructive(topwords, 
plasmaSwitchboard.badwords);
+            }
+
+            // avoid stopwords being topwords
+            if (env.getConfig("filterOutStopwordsFromTopwords", 
"true").equals("true")) {
+                if ((plasmaSwitchboard.stopwords != null) && 
(plasmaSwitchboard.stopwords.size() > 0)) {
+                    kelondroMSetTools.excludeDestructive(topwords, 
plasmaSwitchboard.stopwords);
+                }
+            }
+            
+            if (rss) {
+                String word;
+                int hintcount = 0;
+                final Iterator<String> iter = topwords.iterator();
+                while (iter.hasNext()) {
+                    word = (String) iter.next();
+                    if (word != null) {
+                        prop.putHTML("rssreferences_words_" + hintcount + 
"_word", word);
+                    }
+                    prop.put("rssreferences_words", hintcount);
+                    if (hintcount++ > MAX_TOPWORDS) {
+                        break;
+                    }
+                }
+                prop.put("rssreferences", "1");
+            } else {
+                String word;
+                int hintcount = 0;
+                final Iterator<String> iter = topwords.iterator();
+                while (iter.hasNext()) {
+                    word = (String) iter.next();
+                    if ((theQuery == null) || (theQuery.queryString == null)) 
break;
+                    if (word != null) {
+                        prop.putHTML("navigation_topwords_words_" + hintcount 
+ "_word", word);
+                        prop.putHTML("navigation_topwords_words_" + hintcount 
+ "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
+                        prop.put("navigation_topwords_words_" + hintcount + 
"_count", theQuery.displayResults());
+                        prop.put("navigation_topwords_words_" + hintcount + 
"_offset", "0");
+                        prop.put("navigation_topwords_words_" + hintcount + 
"_contentdom", theQuery.contentdom());
+                        prop.put("navigation_topwords_words_" + hintcount + 
"_resource", theQuery.searchdom());
+                        prop.put("navigation_topwords_words_" + hintcount + 
"_zonecode", theQuery.zonecode);
+                    }
+                    prop.put("navigation_topwords_words", hintcount);
+                    if (hintcount++ > MAX_TOPWORDS) {
+                        break;
+                    }
+                }
+                prop.put("navigation_topwords", "1");
+            }
+        }
+        
+        // compose page navigation
+        StringBuffer resnav = new StringBuffer();
+        int thispage = offset / theQuery.displayResults();
+        if (thispage == 0) resnav.append("&lt;&nbsp;"); else {
+            resnav.append(navurla(thispage - 1, theQuery));
+            resnav.append("<strong>&lt;</strong></a>&nbsp;");
+        }
+        int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount / 
theQuery.displayResults()));
+        for (int j = 0; j < numberofpages; j++) {
+            if (j == thispage) {
+                resnav.append("<strong>");
+                resnav.append(j + 1);
+                resnav.append("</strong>&nbsp;");
+            } else {
+                resnav.append(navurla(j, theQuery));
+                resnav.append(j + 1);
+                resnav.append("</a>&nbsp;");
+            }
+        }
+        if (thispage >= numberofpages) resnav.append("&gt;"); else {
+            resnav.append(navurla(thispage + 1, theQuery));
+            resnav.append("<strong>&gt;</strong></a>");
+        }
+        prop.put("navigation_resnav", resnav.toString());
+        prop.put("navigation", "1");
+
+        return prop;
+    }
+    
+    private static String navurla(int page, plasmaSearchQuery theQuery) {
+        return
+        "<a href=\"ysearch.html?search=" + theQuery.queryString() +
+        "&amp;count="+ theQuery.displayResults() +
+        "&amp;offset=" + (page * theQuery.displayResults()) +
+        "&amp;resource=" + theQuery.searchdom() +
+        "&amp;urlmaskfilter=" + theQuery.urlMask +
+        "&amp;prefermaskfilter=" + theQuery.prefer +
+        "&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" 
: theQuery.constraint.exportB64()) +
+        "&amp;contentdom=" + theQuery.contentdom() +
+        "&amp;former=" + theQuery.queryString() + "\">";
+    }
+    
+}

Modified: trunk/htroot/yacy/user/ysearch.html
===================================================================
--- trunk/htroot/yacy/user/ysearch.html 2008-03-05 18:56:01 UTC (rev 4528)
+++ trunk/htroot/yacy/user/ysearch.html 2008-03-05 21:46:55 UTC (rev 4529)
@@ -141,7 +141,8 @@
   
   <div id="sidebar">
 <!-- attach the bottomline -->
-<!--#include 
virtual="/yacy/user/ysearchitem.html?bottomline=true&eventID=#[eventID]#" -->
+<!--#include virtual="/yacy/user/sidebar_navigation.html?eventID=#[eventID]#" 
-->
+<!--#include virtual="/yacy/user/sidebar_history.html?eventID=#[eventID]#" -->
   </div>
 </div>
 

Modified: trunk/htroot/yacy/user/ysearchitem.html
===================================================================
--- trunk/htroot/yacy/user/ysearchitem.html     2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/htroot/yacy/user/ysearchitem.html     2008-03-05 21:46:55 UTC (rev 
4529)
@@ -38,50 +38,9 @@
 <guid isPermaLink="false">#[urlhash]#</guid>
 </item>
 #(/rss)#
-#(rssreferences)#::
-<yacyTopwords:topwords>
-#{words}#
-<yacyTopwords:item><yacyTopwords:word>#[word]#</yacyTopwords:word></yacyTopwords:item>
-#{/words}#
-</yacyTopwords:topwords>
-#(/rssreferences)#
 #(dynamic)#::
 <script type="text/javascript">
 statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#", 
"#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#", 
"#[remotePeerCount]#");
 progressbar.step(1);
 </script>
 #(/dynamic)#
-#(navigation)#::
-    <div id="navigate" class="boxed">
-      <h2 class="title">Navigate</h2>
-      <div class="content">
-        <p><Strong>Page</strong>:
-        #[resnav]#
-        </p>
-        #(topwords)#::
-        <p><strong>Category</strong>:
-        <select 
onchange="window.location.href=this.options[this.selectedIndex].value">
-          <option selected="selected">-select-</option>
-        #{words}#
-          <option 
value="/yacy/user/ysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#">#[word]#</option>
-        #{/words}#
-        </select>
-        </p>
-        #(/topwords)#
-      </div>
-    </div>
-#(/navigation)#
-#(history)#::
-    <div id="partners" class="boxed">
-      <h2 class="title">Recent Searches</h2>
-      <div class="content">
-        <ul>
-        #{list}#
-          <li><a 
href="/yacy/user/ysearch.html?search=#[querystring]#&amp;resource=#[searchdom]#&amp;contentdom=#[contentdom]#">#[querystring]#</a></li>
-        #{/list}#
-        </ul>
-        <p>The search history is only visible for users from host #[host]#</p>
-      </div>
-    </div>
-#(/history)#
-

Modified: trunk/htroot/yacy/user/ysearchitem.java
===================================================================
--- trunk/htroot/yacy/user/ysearchitem.java     2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/htroot/yacy/user/ysearchitem.java     2008-03-05 21:46:55 UTC (rev 
4529)
@@ -28,14 +28,9 @@
 import java.net.MalformedURLException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
 import java.util.TreeSet;
 
 import de.anomic.http.httpHeader;
-import de.anomic.kelondro.kelondroMSetTools;
-import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.plasma.plasmaSearchEvent;
 import de.anomic.plasma.plasmaSearchQuery;
 import de.anomic.plasma.plasmaSearchRankingProcess;
@@ -53,14 +48,12 @@
     private static boolean col = true;
     private static final int namelength = 60;
     private static final int urllength = 120;
-    private static final int MAX_TOPWORDS = 24;
     
     public static serverObjects respond(httpHeader header, serverObjects post, 
serverSwitch env) {
         final plasmaSwitchboard sb = (plasmaSwitchboard) env;
         final serverObjects prop = new serverObjects();
         
         String eventID = post.get("eventID", "");
-        boolean bottomline = post.get("bottomline", "false").equals("true");
         boolean rss = post.get("rss", "false").equals("true");
         int item = post.getInt("item", -1);
         boolean auth = ((String) 
header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") || 
sb.verifyAuthentication(header, true);
@@ -69,8 +62,6 @@
         prop.put("content", "0");
         prop.put("rss", "0");
         prop.put("references", "0");
-        prop.put("rssreferences", "0");
-        prop.put("navigation", "0");
         prop.put("dynamic", "0");
         
         // find search event
@@ -81,7 +72,6 @@
         }
         plasmaSearchQuery theQuery = theSearch.getQuery();
         int offset = theQuery.neededResults() - theQuery.displayResults();
-        int totalcount = theSearch.getRankingResult().getLocalResourceSize() + 
theSearch.getRankingResult().getRemoteResourceSize();
         
         // dynamically update count values
         if (!rss) {
@@ -95,129 +85,7 @@
             prop.put("dynamic_resnav", "");
             prop.put("dynamic", "1");
         }
-        
-        if (bottomline) {
-            // attach the bottom line with search references (topwords)
-            final Set<String> references = theSearch.references(20);
-            if (references.size() > 0) {
-                // get the topwords
-                final TreeSet<String> topwords = new 
TreeSet<String>(kelondroNaturalOrder.naturalComparator);
-                String tmp = "";
-                Iterator<String> i = references.iterator();
-                while (i.hasNext()) {
-                    tmp = i.next();
-                    if (tmp.matches("[a-z]+")) {
-                        topwords.add(tmp);
-                    }
-                }
 
-                // filter out the badwords
-                final TreeSet<String> filteredtopwords = 
kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
-                if (filteredtopwords.size() > 0) {
-                    kelondroMSetTools.excludeDestructive(topwords, 
plasmaSwitchboard.badwords);
-                }
-
-                // avoid stopwords being topwords
-                if (env.getConfig("filterOutStopwordsFromTopwords", 
"true").equals("true")) {
-                    if ((plasmaSwitchboard.stopwords != null) && 
(plasmaSwitchboard.stopwords.size() > 0)) {
-                        kelondroMSetTools.excludeDestructive(topwords, 
plasmaSwitchboard.stopwords);
-                    }
-                }
-                
-                if (rss) {
-                    String word;
-                    int hintcount = 0;
-                    final Iterator<String> iter = topwords.iterator();
-                    while (iter.hasNext()) {
-                        word = (String) iter.next();
-                        if (word != null) {
-                            prop.putHTML("rssreferences_words_" + hintcount + 
"_word", word);
-                        }
-                        prop.put("rssreferences_words", hintcount);
-                        if (hintcount++ > MAX_TOPWORDS) {
-                            break;
-                        }
-                    }
-                    prop.put("rssreferences", "1");
-                } else {
-                    String word;
-                    int hintcount = 0;
-                    final Iterator<String> iter = topwords.iterator();
-                    while (iter.hasNext()) {
-                        word = (String) iter.next();
-                        if ((theQuery == null) || (theQuery.queryString == 
null)) break;
-                        if (word != null) {
-                            prop.putHTML("navigation_topwords_words_" + 
hintcount + "_word", word);
-                            prop.putHTML("navigation_topwords_words_" + 
hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
-                            prop.put("navigation_topwords_words_" + hintcount 
+ "_count", theQuery.displayResults());
-                            prop.put("navigation_topwords_words_" + hintcount 
+ "_offset", "0");
-                            prop.put("navigation_topwords_words_" + hintcount 
+ "_contentdom", theQuery.contentdom());
-                            prop.put("navigation_topwords_words_" + hintcount 
+ "_resource", theQuery.searchdom());
-                        }
-                        prop.put("navigation_topwords_words", hintcount);
-                        if (hintcount++ > MAX_TOPWORDS) {
-                            break;
-                        }
-                    }
-                    prop.put("navigation_topwords", "1");
-                }
-                
-            }
-            
-            // compose page navigation
-            StringBuffer resnav = new StringBuffer();
-            int thispage = offset / theQuery.displayResults();
-            if (thispage == 0) resnav.append("&lt;&nbsp;"); else {
-                resnav.append(navurla(thispage - 1, theQuery));
-                resnav.append("<strong>&lt;</strong></a>&nbsp;");
-            }
-            int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount 
/ theQuery.displayResults()));
-            for (int j = 0; j < numberofpages; j++) {
-                if (j == thispage) {
-                    resnav.append("<strong>");
-                    resnav.append(j + 1);
-                    resnav.append("</strong>&nbsp;");
-                } else {
-                    resnav.append(navurla(j, theQuery));
-                    resnav.append(j + 1);
-                    resnav.append("</a>&nbsp;");
-                }
-            }
-            if (thispage >= numberofpages) resnav.append("&gt;"); else {
-                resnav.append(navurla(thispage + 1, theQuery));
-                resnav.append("<strong>&gt;</strong></a>");
-            }
-            prop.put("navigation_resnav", resnav.toString());
-            prop.put("navigation", "1");
-            
-            // list search history
-            Iterator<plasmaSearchQuery> i = sb.localSearches.iterator();
-            String client = (String) 
header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
-            plasmaSearchQuery query;
-            int c = 0;
-            HashSet<String> visibleQueries = new HashSet<String>();
-            while (i.hasNext()) {
-                query = i.next();
-                if (query.resultcount == 0) continue;
-                if (query.offset != 0) continue;
-                if (!query.host.equals(client)) continue; // the search 
history should only be visible from the user who initiated the search
-                if (visibleQueries.contains(query.queryString)) continue; // 
avoid doubles
-                visibleQueries.add(query.queryString);
-                prop.put("history_list_" + c + "_querystring", 
query.queryString);
-                prop.put("history_list_" + c + "_searchdom", 
query.searchdom());
-                prop.put("history_list_" + c + "_contentdom", 
query.contentdom());
-                c++;
-                if (c >= 10) break;
-            }
-            prop.put("history_list", c);
-            prop.put("history_host", client);
-            if (c == 0) prop.put("history", 0); else prop.put("history", 1); 
// switch on if there is anything to see
-            
-            return prop;
-        }
-
-        prop.put("rss", "0");
-        
         if (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) {
             // text search
 
@@ -331,18 +199,4 @@
         return s.substring(0, length - (s.length() - p) - 3) + "..." + 
s.substring(p);
     }
 
-
-    private static String navurla(int page, plasmaSearchQuery theQuery) {
-        return
-        "<a href=\"ysearch.html?search=" + theQuery.queryString() +
-        "&amp;count="+ theQuery.displayResults() +
-        "&amp;offset=" + (page * theQuery.displayResults()) +
-        "&amp;resource=" + theQuery.searchdom() +
-        "&amp;urlmaskfilter=" + theQuery.urlMask +
-        "&amp;prefermaskfilter=" + theQuery.prefer +
-        "&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" 
: theQuery.constraint.exportB64()) +
-        "&amp;contentdom=" + theQuery.contentdom() +
-        "&amp;former=" + theQuery.queryString() + "\">";
-    }
-    
 }

Modified: trunk/source/de/anomic/plasma/plasmaSearchAPI.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchAPI.java  2008-03-05 18:56:01 UTC 
(rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchAPI.java  2008-03-05 21:46:55 UTC 
(rev 4529)
@@ -88,9 +88,9 @@
         }
     }
 
-    public static plasmaSearchRankingProcess genSearchresult(serverObjects 
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int 
sortorder) {
+    public static plasmaSearchRankingProcess genSearchresult(serverObjects 
prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter) {
         plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, 
sb.getRanking(), filter);
-        plasmaSearchRankingProcess ranked = new 
plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE, 
1);
+        plasmaSearchRankingProcess ranked = new 
plasmaSearchRankingProcess(sb.wordIndex, query, Integer.MAX_VALUE, 1);
         ranked.execQuery();
         
         if (ranked.filteredCount() == 0) {
@@ -114,7 +114,7 @@
         return ranked;
     }
     
-    public static void genURLList(serverObjects prop, String keyhash, String 
keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int 
maxlines, int ordering) {
+    public static void genURLList(serverObjects prop, String keyhash, String 
keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int 
maxlines) {
         // search for a word hash and generate a list of url links
         prop.put("genUrlList_keyHash", keyhash);
         
@@ -127,7 +127,6 @@
             prop.put("searchresult", 3);
             prop.put("genUrlList_flags", (flags == null) ? "" : 
flags.exportB64());
             prop.put("genUrlList_lines", maxlines);
-            prop.put("genUrlList_ordering", ordering);
             int i = 0;
             yacyURL url;
             indexURLEntry entry;

Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchEvent.java        2008-03-05 
18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java        2008-03-05 
21:46:55 UTC (rev 4529)
@@ -123,7 +123,7 @@
         if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
             (query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
             // do a global search
-            this.rankedCache = new plasmaSearchRankingProcess(wordIndex, 
query, 2, max_results_preparation, 16);
+            this.rankedCache = new plasmaSearchRankingProcess(wordIndex, 
query, max_results_preparation, 16);
             
             int fetchpeers = 30;
 
@@ -156,7 +156,7 @@
             serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER 
GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + 
((System.currentTimeMillis() - start) / 1000) + " seconds");
         } else {
             // do a local search
-            this.rankedCache = new plasmaSearchRankingProcess(wordIndex, 
query, 2, max_results_preparation, 2);
+            this.rankedCache = new plasmaSearchRankingProcess(wordIndex, 
query, max_results_preparation, 2);
             this.rankedCache.execQuery();
             //plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, 
false, 2, ranking, process);
             

Modified: trunk/source/de/anomic/plasma/plasmaSearchQuery.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchQuery.java        2008-03-05 
18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchQuery.java        2008-03-05 
21:46:55 UTC (rev 4529)
@@ -54,6 +54,7 @@
 import de.anomic.server.serverCharBuffer;
 import de.anomic.yacy.yacySeed;
 import de.anomic.yacy.yacySeedDB;
+import de.anomic.yacy.yacyURL;
 
 public final class plasmaSearchQuery {
     
@@ -80,7 +81,7 @@
     public int contentdom;
     public String urlMask;
     public int domType;
-    public String domGroupName;
+    public int zonecode;
     public int domMaxTargets;
     public int maxDistance;
     public kelondroBitfield constraint;
@@ -117,7 +118,7 @@
         this.offset = 0;
         this.urlMask = ".*";
         this.domType = SEARCHDOM_LOCAL;
-        this.domGroupName = "";
+        this.zonecode = yacyURL.language_domain_any_zone;
         this.domMaxTargets = 0;
         this.constraint = constraint;
         this.allofconstraint = false;
@@ -148,7 +149,7 @@
                //this.maximumTime = Math.min(6000, maximumTime);
                this.urlMask = urlMask;
                this.domType = domType;
-               this.domGroupName = domGroupName;
+        this.zonecode = yacyURL.language_domain_any_zone;
                this.domMaxTargets = domMaxTargets;
                this.constraint = constraint;
                this.allofconstraint = allofconstraint;

Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java       
2008-03-05 18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java       
2008-03-05 21:46:55 UTC (rev 4529)
@@ -40,7 +40,6 @@
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexRWIEntry;
 import de.anomic.index.indexRWIEntryOrder;
-import de.anomic.index.indexRWIRowEntry;
 import de.anomic.index.indexRWIVarEntry;
 import de.anomic.index.indexURLEntry;
 import de.anomic.kelondro.kelondroBinSearch;
@@ -49,6 +48,7 @@
 import de.anomic.server.serverCodings;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverProfiling;
+import de.anomic.yacy.yacyURL;
 
 public final class plasmaSearchRankingProcess {
     
@@ -59,7 +59,6 @@
     private HashMap<String, kelondroSortStack<indexRWIVarEntry>> 
doubleDomCache; // key = domhash (6 bytes); value = like stack
     private HashMap<String, String> handover; // key = urlhash, value = 
urlstring; used for double-check of urls that had been handed over to search 
process
     private plasmaSearchQuery query;
-    private int sortorder;
     private int maxentries;
     private int remote_peerCount, remote_indexCount, remote_resourceSize, 
local_resourceSize;
     private indexRWIEntryOrder order;
@@ -70,7 +69,7 @@
     private plasmaWordIndex wordIndex;
     private HashMap<String, indexContainer>[] localSearchContainerMaps;
     
-    public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, 
plasmaSearchQuery query, int sortorder, int maxentries, int concurrency) {
+    public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, 
plasmaSearchQuery query, int maxentries, int concurrency) {
         // we collect the urlhashes and construct a list with urlEntry objects
         // attention: if minEntries is too high, this method will not 
terminate within the maxTime
         // sortorder: 0 = hash, 1 = url, 2 = ranking
@@ -89,7 +88,6 @@
         this.ref = new kelondroMScoreCluster<String>();
         this.misses = new TreeSet<String>();
         this.wordIndex = wordIndex;
-        this.sortorder = sortorder;
         this.flagcount = new int[32];
         for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
     }
@@ -120,59 +118,9 @@
             return;
         }
         
-        if (sortorder == 2) {
-            insertRanked(index, true, index.size());
-        } else {            
-            insertNoOrder(index, true, index.size());
-        }
+        insertRanked(index, true, index.size());
     }
     
-    private void insertNoOrder(indexContainer index, boolean local, int 
fullResource) {
-        final Iterator<indexRWIRowEntry> en = index.entries();
-        // generate a new map where the urls are sorted (not by hash but by 
the url text)
-        
-        if (local) {
-            this.local_resourceSize += fullResource;
-        } else {
-            this.remote_resourceSize += fullResource;
-            this.remote_peerCount++;
-            this.remote_indexCount += index.size();
-        }
-        
-        indexRWIVarEntry ientry;
-        indexURLEntry uentry;
-        String u;
-        loop: while (en.hasNext()) {
-            ientry = new indexRWIVarEntry(en.next());
-
-            // check constraints
-            if (!testFlags(ientry)) continue loop;
-            
-            // increase flag counts
-            for (int i = 0; i < 32; i++) {
-                if (ientry.flags().get(i)) {flagcount[i]++;}
-            }
-            
-            // load url
-            if (sortorder == 0) {
-                this.stack.push(ientry, new Long(ientry.urlHash().hashCode()));
-                this.urlhashes.put(ientry.urlHash(), new 
Integer(ientry.urlHash().hashCode()));
-            } else {
-                uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
-                if (uentry == null) {
-                    this.misses.add(ientry.urlHash());
-                } else {
-                    u = uentry.comp().url().toNormalform(false, true);
-                    this.stack.push(ientry, new Long(u.hashCode()));
-                    this.urlhashes.put(ientry.urlHash(), new 
Integer(u.hashCode()));
-                }
-            }
-            
-            // interrupt if we have enough
-            if ((query.neededResults() > 0) && (this.misses.size() + 
this.stack.size() > query.neededResults())) break loop;
-        } // end loop
-    }
-    
     public void insertRanked(indexContainer index, boolean local, int 
fullResource) {
         // we collect the urlhashes and construct a list with urlEntry objects
         // attention: if minEntries is too high, this method will not 
terminate within the maxTime
@@ -222,6 +170,12 @@
                 if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP  ) 
&& (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasapp  )))) continue;
             }
 
+            // check tld domain
+            if (!yacyURL.matchesAnyDomDomain(iEntry.urlHash(), 
this.query.zonecode)) {
+                // filter out all tld that do not match with wanted tld domain
+                continue;
+            }
+            
             // insert
             if ((maxentries < 0) || (stack.size() < maxentries)) {
                 // in case that we don't have enough yet, accept any new entry

Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2008-03-05 
18:56:01 UTC (rev 4528)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java        2008-03-05 
21:46:55 UTC (rev 4529)
@@ -1445,6 +1445,7 @@
         if (hostAddress == null) return ((this.remoteProxyConfig != null) && 
(this.remoteProxyConfig.useProxy()));
         // check if this is a local address and we are allowed to index local 
pages:
         boolean local = hostAddress.isSiteLocalAddress() || 
hostAddress.isLoopbackAddress();
+        //assert local == yacyURL.isLocalDomain(url.hash()); // TODO: remove 
the dnsResolve above!
         return (this.acceptGlobalURLs && !local) || (this.acceptLocalURLs && 
local);
     }
     

Modified: trunk/source/de/anomic/server/serverProcessor.java
===================================================================
--- trunk/source/de/anomic/server/serverProcessor.java  2008-03-05 18:56:01 UTC 
(rev 4528)
+++ trunk/source/de/anomic/server/serverProcessor.java  2008-03-05 21:46:55 UTC 
(rev 4529)
@@ -24,10 +24,29 @@
 
 package de.anomic.server;
 
+import java.util.concurrent.LinkedBlockingQueue;
 
+
 public class serverProcessor {
 
     public static final int availableCPU = 
Runtime.getRuntime().availableProcessors();
     public static int       useCPU = availableCPU;
+  
+    public static class queue<I, O> {
+        String nickname;
+        int priority;
+        serverProcess<I, O> implementation;
+        LinkedBlockingQueue<I> inputQueue;
+        LinkedBlockingQueue<O> outputQueue;
+        
+        public queue(String nickname, int priority, serverProcess<I, O> 
implementation) {
+            this.nickname = nickname;
+            this.priority = priority;
+            this.implementation = implementation;
+            this.inputQueue = new LinkedBlockingQueue<I>();
+            this.outputQueue = new LinkedBlockingQueue<O>();
+        }
+    }
     
+    
 }

Modified: trunk/source/de/anomic/yacy/yacyURL.java
===================================================================
--- trunk/source/de/anomic/yacy/yacyURL.java    2008-03-05 18:56:01 UTC (rev 
4528)
+++ trunk/source/de/anomic/yacy/yacyURL.java    2008-03-05 21:46:55 UTC (rev 
4529)
@@ -139,7 +139,7 @@
          "UY=Uruguay",
          "VE=Venezuela"
      };
-     private static final String[] TLD_EuropaRussia = {
+     private static final String[] TLD_EuropeRussia = {
         // includes also countries that are mainly french- dutch- speaking
         // and culturally close to europe
          "AD=Andorra",
@@ -373,20 +373,30 @@
             }
         }
     }
-
+    
+    public static final int language_domain_europe_zone     = 128 + 1;       
//{0, 7};
+    public static final int language_domain_english_zone    = 128 + 16 + 64; 
//{4, 6, 7};
+    public static final int language_domain_spanish_zone    = 128 + 2;       
//{1, 7};
+    public static final int language_domain_asia_zone       = 128 + 4;       
//{2, 7};
+    public static final int language_domain_middleeast_zone = 128 + 8;       
//{3, 7};
+    public static final int language_domain_africa_zone     = 128 + 32;      
//{5, 7};
+    public static final int language_domain_any_zone        = 255;
+    
+    public static final String[] regions = {"europe", "english", "spanish", 
"asia", "middleeast", "africa"};
+    
     static {
         // create a dummy hash
         dummyHash = "";
         for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-";
 
         // assign TLD-ids and names
-        insertTLDProps(TLD_EuropaRussia, 0);
-        insertTLDProps(TLD_MiddleSouthAmerica, 1);
-        insertTLDProps(TLD_SouthEastAsia, 2);
-        insertTLDProps(TLD_MiddleEastWestAsia, 3);
-        insertTLDProps(TLD_NorthAmericaOceania, 4);
-        insertTLDProps(TLD_Africa, 5);
-        insertTLDProps(TLD_Generic, 6);
+        insertTLDProps(TLD_EuropeRussia,        0); // European languages but 
no english
+        insertTLDProps(TLD_MiddleSouthAmerica,  1); // mainly spanish-speaking 
countries
+        insertTLDProps(TLD_SouthEastAsia,       2); // asia
+        insertTLDProps(TLD_MiddleEastWestAsia,  3); // middle east
+        insertTLDProps(TLD_NorthAmericaOceania, 4); // english-speaking 
countries
+        insertTLDProps(TLD_Africa,              5); // africa
+        insertTLDProps(TLD_Generic,             6); // anything else, mixed 
languages, mainly english
         // the id=7 is used to flag local addresses
     }
     
@@ -971,7 +981,7 @@
             tld = host.substring(p + 1);
             dom = host.substring(0, p);
         }
-        Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) 
TLDID.get(tld); // identify local addresses
+        Integer ID = (serverDomains.isLocal(tld)) ? null : TLDID.get(tld); // 
identify local addresses
         int id = (ID == null) ? 7 : ID.intValue(); // local addresses are 
flagged with id=7
         boolean isHTTP = this.protocol.equals("http");
         p = dom.lastIndexOf('.'); // locate subdomain
@@ -1083,14 +1093,22 @@
         // returns the ID of the domain of the domain
         assert (urlHash != null);
         assert (urlHash.length() == 12) : "urlhash = " + urlHash;
-        int flagbyte = 
kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11));
-        return (flagbyte & 12) >> 2;
+        return 
(kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)) & 12) >> 2;
     }
 
-    public static boolean isGlobalDomain(String urlhash) {
-        return domDomain(urlhash) != 7;
+    public static boolean isLocalDomain(String urlhash) {
+        return domDomain(urlhash) == 7;
     }
 
+    public static boolean isDomDomain(String urlHash, int id) {
+        return domDomain(urlHash) == id;
+    }
+    
+    public static boolean matchesAnyDomDomain(String urlHash, int idset) {
+        // this is a boolean matching on a set of domDomains
+        return (domDomain(urlHash) | idset) != 0;
+    }
+    
     // checks for local/global IP range and local IP
     public boolean isLocal() {
         return serverDomains.isLocal(this.host);

_______________________________________________
YaCy-svn mailing list
YaCy-svn@lists.berlios.de
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an