Author: swatig0
Date: 2007-07-13 11:31:40 +0000 (Fri, 13 Jul 2007)
New Revision: 14062

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
Exporting XmlSpider to external plugin format

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2007-07-13 11:07:16 UTC (rev 
14061)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2007-07-13 11:31:40 UTC (rev 
14062)
@@ -74,7 +74,12 @@
 import freenet.support.api.HTTPRequest;

 /**
- * Spider. Produces an index.
+ * XMLSpider. Produces index for searching words. 
+ * In case the size of the index grows up a specific threshold the index is 
split into several subindices
+ * The indexing key is the md5 hash of the word.
+ * 
+ *  @author swati goyal
+ *  
  */
 public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless,  FredPluginHTTPAdvanced,HttpPlugin, ClientCallback, 
FoundURICallback ,USKCallback{

@@ -94,16 +99,24 @@
        private int match;
        private Vector list;
        private boolean indexing ;
+       
        private static final int minTimeBetweenEachIndexRewriting = 10;
-       //private static final String indexFilename = "index.xml";
+/**
+ * DEFAULT_INDEX_DIR is the directory where the generated indices are stored.
+ * Needs to be created before it can be used
+ */
        private static final String DEFAULT_INDEX_DIR = "myindex3/";
        public Set allowedMIMETypes;
        private static final int MAX_ENTRIES = 5;
        private static final String pluginName = "XML spider";
+       /**
+        * This gives the allowed fraction of total time spent on generating 
indices
+        * max value = 1; min value > 0 
+        */
        private static final double MAX_TIME_SPENT_INDEXING = 0.5;
-       //MAX_TIME_SPENT_INDEXING is the fraction of the total time  allowed to 
be spent on indexing(max value = 1)
-       private static final String indexTitle= "This is an index";
-       private static final String indexOwner = "Another anonymous";
+       
+       private static final String indexTitle= "XMLSpider index";
+       private static final String indexOwner = "Freenet";
        private static final String indexOwnerEmail = null;
        private final HashMap sizeOfURIs = new HashMap(); /* String (URI) -> 
Long */
        private final HashMap mimeOfURIs = new HashMap(); /* String (URI) -> 
String */
@@ -112,7 +125,7 @@

        // Can have many; this limit only exists to save memory.
        private static final int maxParallelRequests = 100;
-       private int maxShownURIs = 50;
+       private int maxShownURIs = 15;
        private HashMap urisToNumbers;
        private NodeClientCore core;
        private FetchContext ctx;
@@ -541,7 +554,15 @@


        }
-
+/**
+ * generates the main index file that can be used by librarian for searching 
in the list of
+ * subindices
+ *  
+ * @param void
+ * @author swati 
+ * @throws IOException
+ * @throws NoSuchAlgorithmException
+ */
        private synchronized void produceIndex2() throws 
IOException,NoSuchAlgorithmException {
                // Produce the main index file.

@@ -670,6 +691,13 @@


        }
+       /**
+        * Generates the subindices. 
+        * Each index has less than {@code MAX_ENTRIES} words.
+        * The original treemap is split into several sublists indexed by the 
common substring
+        * of the hash code of the words
+        * @throws Exception
+        */
        private synchronized void generateIndex2() throws Exception{
                // now we the tree map and we need to use the sorted (md5s) to 
generate the xml indices
                if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
@@ -706,17 +734,7 @@
                generateSubIndex(prefix,list);
                str = key;
                list = new Vector();
-//             int count = list.size();
-//             if(count > MAX_ENTRIES){
-//                     //the index has to be split up
-//                     generateSubIndex(prefix,list);                  
-//             }
-//             else generateXML(list,prefix);
-//             str = key;
-//             list = new Vector();
                }
-                       //
-               // this variable will keep the number of digits to be used 
                }

                generateSubIndex(prefix,list);
@@ -809,23 +827,7 @@
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);

-               /* -> owner */
-               subHeaderElement = xmlDoc.createElement("owner");
-               subHeaderText = xmlDoc.createTextNode(indexOwner);
-               
-               subHeaderElement.appendChild(subHeaderText);
-               headerElement.appendChild(subHeaderElement);
-               
-       
-               /* -> owner email */
-               if(indexOwnerEmail != null) {
-                       subHeaderElement = xmlDoc.createElement("email");
-                       subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);

-                       subHeaderElement.appendChild(subHeaderText);
-                       headerElement.appendChild(subHeaderElement);
-               }
-

                Element filesElement = xmlDoc.createElement("files"); /* 
filesElement != fileElement */

@@ -874,15 +876,6 @@
                                
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
                                int l;
                                wordElement.appendChild(uriElement);
-//                     for(l = 
0;l<filesElement.getChildNodes().getLength();l++)
-//                             { Element file = (Element) 
filesElement.getChildNodes().item(l);
-//                             if(file.getAttribute("id").equals(x.toString()))
-//                             
-//                             break;
-//                             }
-                               
-//                             if(l>=filesElement.getChildNodes().getLength())
-//                             filesElement.appendChild(fileElement);
                                if(!fileid.contains(x.toString()))
                                {
                                        fileid.add(x.toString());
@@ -892,9 +885,8 @@

                        //Element keywordsElement = (Element) 
root.getElementsByTagName("keywords").item(0);
                        keywordsElement.appendChild(wordElement);
-//                             
+                               
                }
-//     

                rootElement.appendChild(EntriesElement);
                rootElement.appendChild(headerElement);
@@ -1213,9 +1205,6 @@
                }
        }

-       /**
-        * @see 
freenet.oldplugins.plugin.HttpPlugin#handlePost(freenet.clients.http.HTTPRequestImpl,
 freenet.clients.http.ToadletContext)
-        */
        public void handlePost(HTTPRequest request, ToadletContext context) 
throws IOException {
        }

@@ -1515,7 +1504,14 @@
        // need to produce pretty html
        //later fredpluginhttpadvanced will give the interface
        //this brings us to the page from visit
-
+       String listname = request.getParam("list");
+       if(listname.length() != 0)
+       {
+               appendDefaultHeader(out,null);
+               out.append("<p><h4>"+listname+" URIs</h4></p>");
+               appendList(listname,out,null);
+               return out.toString();
+       }
        appendDefaultPageStart(out,null);
        String uriParam = request.getParam("adduri");
        if(uriParam != null && uriParam.length() != 0)
@@ -1583,8 +1579,22 @@

        return out.toString();
 }
+private void appendList(String listname, StringBuffer out, String stylesheet)
+{
+       Iterator it = (runningFetchesByURI.keySet()).iterator();
+       if(listname.equals("running"))
+               it = (runningFetchesByURI.keySet()).iterator();
+       if(listname.equals("visited"))
+               it = (new HashSet(visitedURIs)).iterator();
+       if(listname.equals("queued"))
+               it = (new ArrayList(queuedURIList)).iterator();
+       if(listname.equals("failed"))
+               it = (new HashSet(failedURIs)).iterator();
+       while(it.hasNext())
+               out.append("<code>"+it.next().toString()+"</code><br/>");
+}
 private void appendDefaultPageStart(StringBuffer out, String stylesheet) {
-       count ++;
+       
        out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
        if(stylesheet != null)
                out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
@@ -1597,16 +1607,60 @@
        out.append("<p><h3>Running Fetches</h3></p>");
        Set visited = new HashSet(visitedURIs);
        List queued = new ArrayList(queuedURIList);
+       
        Set failed = new HashSet(failedURIs);
        Iterator it=queued.iterator();
        out.append("<br/>Size :"+runningFetches.size());
+       appendList(runningFetches,out,stylesheet);
+       out.append("<p><a href=\"?list="+"running"+"\">Showall 
running</a><br/></p>");
        out.append("<br/>Size :"+queued.size());
+       int i = 0;
+       while(it.hasNext()){
+               if(i<=maxShownURIs){
+               out.append("<code>"+it.next().toString()+"</code><br/>");
+               }
+               else break;
+               i++;
+       }
+       out.append("<p><a href=\"?list="+"queued"+"\">Showall 
queued</a><br/></p>");
        out.append("<br/>Size :"+visited.size());
+       appendList(visited,out,stylesheet);
+       out.append("<p><a href=\"?list="+"visited"+"\">Showall 
visited</a><br/></p>");
        out.append("<br/>Size :"+failed.size());
-       out.append("<br/>Count : "+count);
+       appendList(failed,out,stylesheet);
+       out.append("<p><a href=\"?list="+"failed"+"\">Showall 
failed</a><br/></p>");
+       
+       
+}
+private void appendDefaultHeader(StringBuffer out, String stylesheet){
+       out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+       if(stylesheet != null)
+               out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
+       out.append("</HEAD><BODY>\n");
+       out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
+       out.append("Add uri:");
+       out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\" 
/><br/><br/>");
+       out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
+}
+private void appendList(Set  list,StringBuffer out, String stylesheet){
+       Iterator it = list.iterator();
+       int i = 0;
        while(it.hasNext()){
-               out.append("<code>"+(it.next()).toString()+"</code><br>");
+               if(i<=maxShownURIs){
+               out.append("<code>"+it.next().toString()+"</code><br/>");
+               }
+               else{
+                       //out.append("<form method=\"GET\"><input 
type=\"submit\" name=\"Showall\" />");
+//                     if(listname.equals("visited"))
+//                     out.append("<p><a href=\"?list="+listname+">Showall 
visited</a><br/></p>");
+//                     if(listname.equals("failed"))
+//                             out.append("<p><a 
href=\"?list="+listname+">Showall failed</a><br/></p>");
+                       break;
+               }
+               i++;
+               
        }
+       
 }
 public String handleHTTPPut(HTTPRequest request) throws PluginHTTPException{
        return null;


Reply via email to