XMLSpider

[email protected] Thu, 16 Aug 2007 20:24:53 +0000 (UTC)

Author: swatig0
Date: 2007-08-16 20:24:52 +0000 (Thu, 16 Aug 2007)
New Revision: 14725


Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
URI-id transformation

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2007-08-16 19:03:40 UTC (rev 
14724)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2007-08-16 20:24:52 UTC (rev 
14725)
@@ -4,7 +4,6 @@
 package plugins.XMLSpider;

 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
@@ -13,14 +12,11 @@
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.Vector;
@@ -33,11 +29,10 @@
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;

-import org.w3c.dom.Attr;
+
 import org.w3c.dom.DOMImplementation;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;

 import freenet.client.ClientMetadata;
@@ -49,7 +44,6 @@
 import freenet.client.async.ClientCallback;
 import freenet.client.async.ClientGetter;
 import freenet.client.async.USKCallback;
-import freenet.clients.http.PageMaker;
 import freenet.clients.http.ToadletContext;
 import freenet.clients.http.ToadletContextClosedException;
 import freenet.clients.http.filter.ContentFilter;
@@ -67,15 +61,13 @@
 import freenet.pluginmanager.FredPluginThreadless;
 import freenet.pluginmanager.PluginHTTPException;
 import freenet.pluginmanager.PluginRespirator;
-import freenet.support.HTMLNode;
 import freenet.support.Logger;
-import freenet.support.MultiValueTable;
 import freenet.support.api.Bucket;
 import freenet.support.api.HTTPRequest;

 /**
- * XMLSpider. Produces index for searching words. 
- * In case the size of the index grows up a specific threshold the index is 
split into several subindices
+ * XMLSpider. Produces xml index for searching words. 
+ * In case the size of the index grows up a specific threshold the index is 
split into several subindices.
  * The indexing key is the md5 hash of the word.
  * 
  *  @author swati goyal
@@ -84,45 +76,71 @@
 public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless,  FredPluginHTTPAdvanced,HttpPlugin, ClientCallback, 
USKCallback{

        long tProducedIndex;
-       private TreeMap tMap = new TreeMap();
+       /**
+        * Stores the found words along with md5
+        */
+       public TreeMap tMap = new TreeMap();
        int count;
        // URIs visited, or fetching, or queued. Added once then forgotten 
about.
-       private final HashSet visitedURIs = new HashSet();
-       private final HashSet urisWithWords = new HashSet();
+       /**
+        * 
+        * Lists the uris that have been vistied by the spider
+        */
+       public final HashSet visitedURIs = new HashSet();
        private final HashSet idsWithWords = new HashSet();
-       private final HashSet failedURIs = new HashSet();
+       /**
+        * 
+        * Lists the uris that were visited but failed.
+        */
+       public final HashSet failedURIs = new HashSet();
+
        private final HashSet queuedURISet = new HashSet();
-       private final LinkedList queuedURIList = new LinkedList();
+       /**
+        * 
+        * Lists the uris that are still queued.
+        */
+       public final LinkedList queuedURIList = new LinkedList();
        private final HashMap runningFetchesByURI = new HashMap();
-       private final HashMap urisByWord = new HashMap();
+
        private final HashMap idsByWord = new HashMap();
-       private final HashMap titlesOfURIs = new HashMap();
+
        private final HashMap titlesOfIds = new HashMap();
        private final HashMap uriIds = new HashMap();
        private final HashMap idUris = new HashMap();
-       private final HashMap outlinks = new HashMap();
-       private final HashMap inlinks = new HashMap();
+       /**
+        * Lists the outlinks from a particular page, 
+        * </br> indexed by the id of page uri
+        */
+       public final HashMap outlinks = new HashMap();
+       /**
+        * Lists the inlinks to a particular page,
+        *  indexed by the id of page uri.
+        */
+       public final HashMap inlinks = new HashMap();
        private Vector indices;
        private int match;
        private int id;
-       private Vector list;
+
        private boolean indexing ;
-       
+
        private static final int minTimeBetweenEachIndexRewriting = 10;
-/**
- * DEFAULT_INDEX_DIR is the directory where the generated indices are stored.
- * Needs to be created before it can be used
- */
-       private static final String DEFAULT_INDEX_DIR = "myindex4/";
+       /**
+        * directory where the generated indices are stored. 
+        * Needs to be created before it can be used
+        */
+       public static final String DEFAULT_INDEX_DIR = "myindex4/";
+       /**
+        * Lists the allowed mime types of the fetched page. 
+        */
        public Set allowedMIMETypes;
        private static final int MAX_ENTRIES = 10;
        private static final String pluginName = "XML spider";
        /**
-        * This gives the allowed fraction of total time spent on generating 
indices
-        * max value = 1; min value > 0 
+        * Gives the allowed fraction of total time spent on generating indices 
with
+        * maximum value = 1; minimum value = 0. 
         */
-       private static final double MAX_TIME_SPENT_INDEXING = 0.5;
-       
+       public static final double MAX_TIME_SPENT_INDEXING = 0.5;
+
        private static final String indexTitle= "XMLSpider index";
        private static final String indexOwner = "Freenet";
        private static final String indexOwnerEmail = null;
@@ -135,47 +153,46 @@
        // Can have many; this limit only exists to save memory.
        private static final int maxParallelRequests = 100;
        private int maxShownURIs = 15;
-       private HashMap urisToNumbers;
+
        private NodeClientCore core;
        private FetchContext ctx;
        private final short PRIORITY_CLASS = 
RequestStarter.BULK_SPLITFILE_PRIORITY_CLASS;
        private boolean stopped = true;
        PluginRespirator pr;
-       

-       private synchronized void queueURI(FreenetURI uri) {
-               //not adding the html condition
+       /**
+        * Adds the found uri to the list of to-be-retrieved uris. <p>Every usk 
uri added as ssk.
+        * @param uri the new uri that needs to be fetched for further indexing
+        */
+       public synchronized void queueURI(FreenetURI uri) {
                if((uri.getKeyType()).equals("USK")){
                        if(uri.getSuggestedEdition() < 0)
                                uri = uri.setSuggestedEdition((-1)* 
uri.getSuggestedEdition());
                        try{
-                       uri = ((USK.create(uri)).getSSK()).getURI();
-                       //all uris are added as ssk
-                       (ctx.uskManager).subscribe(USK.create(uri),this, false, 
this);  
+                               uri = ((USK.create(uri)).getSSK()).getURI();
+                               
(ctx.uskManager).subscribe(USK.create(uri),this, false, this);  
                        }
                        catch(Exception e){}
                }
-               
+
                if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
                        queuedURIList.addLast(uri);
                        visitedURIs.add(uri);
                        uriIds.put(uri, id);
                        idUris.put(id, uri);
                        id++;
-                       
-                       //the page object of the client will contain the uri of 
the current page
                }
        }

        private void startSomeRequests() {

-               
+
                FreenetURI[] initialURIs = 
core.bookmarkManager.getBookmarkURIs();
                for (int i = 0; i < initialURIs.length; i++)
                {
-               queueURI(initialURIs[i]);
+                       queueURI(initialURIs[i]);
                }
-                                       
+
                ArrayList toStart = null;
                synchronized (this) {
                        if (stopped) {
@@ -183,59 +200,44 @@
                        }
                        int running = runningFetchesByURI.size();
                        int queued = queuedURIList.size();
-                       
+
                        if ((running >= maxParallelRequests) || (queued == 0))
                                return;
-                       
+
                        toStart = new ArrayList(Math.min(maxParallelRequests - 
running, queued));
-                       
+
                        for (int i = running; i < maxParallelRequests; i++) {
                                if (queuedURIList.isEmpty())
                                        break;
                                FreenetURI uri = (FreenetURI) 
queuedURIList.removeFirst();
                                queuedURISet.remove(uri);
-//                             if((uri.getKeyType()).equals("USK")){
-//                             if(uri.getSuggestedEdition() < 0)
-//                                     uri = uri.setSuggestedEdition((-1)* 
uri.getSuggestedEdition());
-//                             try{
-//                                     
(ctx.uskManager).subscribe(USK.create(uri),this, false, this);  
-//                             }catch(Exception e){
-//                                     
-//                             }
-                               
-       //                      }
                                ClientGetter getter = makeGetter(uri);
                                toStart.add(getter);
-                               }
+                       }
                }
-                       for (int i = 0; i < toStart.size(); i++) {
-                               
+               for (int i = 0; i < toStart.size(); i++) {
+
                        ClientGetter g = (ClientGetter) toStart.get(i);
                        try {
                                runningFetchesByURI.put(g.getURI(), g);
                                g.start();
-                               FileWriter outp = new 
FileWriter("logfile2",true);
-                               outp.write("URI "+g.getURI().toString()+'\n');
-                               
-                               outp.close();
-                               } catch (FetchException e) {
-                                       onFailure(e, g);
-                               }
-                               catch (IOException e){
-                                       Logger.error(this, "the logfile can not 
be written"+e.toString(), e);
-                               }
-               
+                       } catch (FetchException e) {
+                               onFailure(e, g);
                        }
-               //}
-                               
+               }
        }
-       

+
        private ClientGetter makeGetter(FreenetURI uri) {
                ClientGetter g = new ClientGetter(this, 
core.requestStarters.chkFetchScheduler, core.requestStarters.sskFetchScheduler, 
uri, ctx, PRIORITY_CLASS, this, null, null);
                return g;
        }
-
+       /**
+        * Processes the successfully fetched uri for further outlinks.
+        * 
+        * @param result
+        * @param state
+        */
        public void onSuccess(FetchResult result, ClientGetter state) {
                FreenetURI uri = state.getURI();

@@ -246,22 +248,16 @@
                ClientMetadata cm = result.getMetadata();
                Bucket data = result.asBucket();
                String mimeType = cm.getMIMEType();
-               
+
                sizeOfURIs.put(uri.toString(), new Long(data.size()));
                mimeOfURIs.put(uri.toString(), mimeType);
                PageCallBack page = new PageCallBack();
                page.id = (Integer) uriIds.get(uri);
                inlinks.put(page.id, new Vector());
                outlinks.put(page.id, new Vector());
-               
-               try{
-           FileWriter output = new FileWriter("logfile",true);
-           output.write(uri.toString()+" page " + page.id +"\n");
-           output.close();
-               }
-               catch(Exception e){
-                       Logger.error(this, "The uri could not be removed from 
running "+e.toString(), e);
-               }
+
+               //instead of passing the current object, the pagecallback 
object for every page is passed to the content filter
+               // this is to allow inlinks and outlinks be indexed by specific 
pages
                try {
                        ContentFilter.filter(data, ctx.bucketFactory, mimeType, 
uri.toURI("http://127.0.0.1:8888/";), page);
                } catch (UnsafeContentTypeException e) {
@@ -274,28 +270,18 @@
                        data.free();
                }
        }
-       
+
        public void onFailure(FetchException e, ClientGetter state) {
                FreenetURI uri = state.getURI();
-//             try{
-//                     FileWriter outp = new FileWriter("failed",true);
-//                     outp.write("failed "+e.toString()+" for "+uri+'\n');
-//                     outp.close();
-//                     
-//             }catch(Exception e2){
-//                     
-//             }
+
                synchronized (this) {
                        runningFetchesByURI.remove(uri);
                        failedURIs.add(uri);
                }
                if (e.newURI != null)
                        queueURI(e.newURI);
-//             else
-//                     queueURI(uri);
+
                startSomeRequests();
-               
-               
        }

        public void onSuccess(BaseClientPutter state) {
@@ -311,29 +297,22 @@
        }

        /**
- * generates the main index file that can be used by librarian for searching 
in the list of
- * subindices
- *  
- * @param void
- * @author swati 
- * @throws IOException
- * @throws NoSuchAlgorithmException
- */
+        * generates the main index file that can be used by librarian for 
searching in the list of
+        * subindices
+        *  
+        * @param void
+        * @author swati 
+        * @throws IOException
+        * @throws NoSuchAlgorithmException
+        */
        private synchronized void produceIndex2() throws 
IOException,NoSuchAlgorithmException {
                // Produce the main index file.
-               
-               //the number of bits to consider for matching 
-               
-       
-//             if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
-//                     System.out.println("No URIs with words");
-//                     return;
-//             }
-               
+
                if (idsByWord.isEmpty() || idsWithWords.isEmpty()) {
                        System.out.println("No URIs with words");
                        return;
                }
+               //the main index file 
                File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
                StreamResult resultStream;
                resultStream = new StreamResult(outputFile);
@@ -351,7 +330,7 @@
                try {
                        xmlBuilder = xmlFactory.newDocumentBuilder();
                } catch(javax.xml.parsers.ParserConfigurationException e) {
-                       /* Will (should ?) never happen */
+
                        Logger.error(this, "Spider: Error while initializing 
XML generator: "+e.toString());
                        return;
                }
@@ -367,57 +346,42 @@
                /* -> title */
                Element subHeaderElement = xmlDoc.createElement("title");
                Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-               
+
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);

                /* -> owner */
                subHeaderElement = xmlDoc.createElement("owner");
                subHeaderText = xmlDoc.createTextNode(indexOwner);
-               
+
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);
-               
+
                /* -> owner email */
                if(indexOwnerEmail != null) {
                        subHeaderElement = xmlDoc.createElement("email");
                        subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-                       
+
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
                }
-
-               
-               //String[] words = (String[]) urisByWord.keySet().toArray(new 
String[urisByWord.size()]);
-               //Arrays.sort(words);
-               
+               /*
+                * the max number of digits in md5 to be used for matching with 
the search query is stored in the xml
+                */
                Element prefixElement = xmlDoc.createElement("prefix");
-               //prefixElement.setAttribute("value",match+"");
-               //this match will be set after processing the TreeMap
-       
-
-               
-               //all index files are ready
                /* Adding word index */
                Element keywordsElement = xmlDoc.createElement("keywords");
                for(int i = 0;i<indices.size();i++){
-                       
//generateSubIndex(DEFAULT_INDEX_DIR+"index_"+Integer.toHexString(i)+".xml");
+
                        Element subIndexElement = 
xmlDoc.createElement("subIndex");
-//                     if(i<=9)
-//                     subIndexElement.setAttribute("key",i+"");
-//                     else
-//                             
subIndexElement.setAttribute("key",Integer.toHexString(i));
                        subIndexElement.setAttribute("key", (String) 
indices.elementAt(i));
                        //the subindex element key will contain the bits used 
for matching in that subindex
                        keywordsElement.appendChild(subIndexElement);
                }
-               
+
                prefixElement.setAttribute("value",match+"");
-               // make sure that prefix is the first child of root Element
                rootElement.appendChild(prefixElement);
                rootElement.appendChild(headerElement);
-               
-               //rootElement.appendChild(filesElement);
                rootElement.appendChild(keywordsElement);

                /* Serialization */
@@ -434,7 +398,7 @@

                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-               
+
                /* final step */
                try {
                        serializer.transform(domSource, resultStream);
@@ -445,12 +409,11 @@

                if(Logger.shouldLog(Logger.MINOR, this))
                        Logger.minor(this, "Spider: indexes regenerated.");
-       
-       //the main xml file is generated 
-       //now as each word is generated enter it into the respective subindex
-       //now the parsing will start and nodes will be added as needed 
-               

+               //The main xml file is generated 
+               //As each word is generated enter it into the respective 
subindex
+               //The parsing will start and nodes will be added as needed 
+
        }
        /**
         * Generates the subindices. 
@@ -460,48 +423,36 @@
         * @throws Exception
         */
        private synchronized void generateIndex2() throws Exception{
-               // now we the tree map and we need to use the sorted (md5s) to 
generate the xml indices
-       
-               
+               //using the tMap generate the xml indices
                if (idsByWord.isEmpty() || idsWithWords.isEmpty()) {
                        System.out.println("No URIs with words");
                        return;
                }
-               
-       //      FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new 
FreenetURI[urisWithWords.size()]);
-               Integer[] ids = (Integer[]) idsWithWords.toArray(new 
Integer[idsWithWords.size()]);
-//             urisToNumbers = new HashMap();
-//             for (int i = 0; i < uris.length; i++) {
-//                     urisToNumbers.put(uris[i], new Integer(i));
-//                     }
+
                indices = new Vector();
                int prefix = 1;
                match = 1;
                Vector list = new Vector();
-               //String str = tMap.firstKey();
                Iterator it = tMap.keySet().iterator();
-                FileWriter outp = new FileWriter("indexing");
-               outp.write("size = "+tMap.size()+"\n");
-               outp.close();
+
                String str = (String) it.next();
-               int i = 0,index =0;
+               int i = 0;
                while(it.hasNext())
                {
-                outp = new FileWriter("indexing",true);
                        String key =(String) it.next();
-                       outp.write(key + "\n");
-                       outp.close();
+                       //create a list of the words to be added in the same 
subindex
                        if(key.substring(0, prefix).equals(str.substring(0, 
prefix))) 
-                               {i++;
-                               list.add(key);
-                               }
+                       {i++;
+                       list.add(key);
+                       }
                        else {
-               generateSubIndex(prefix,list);
-               str = key;
-               list = new Vector();
+                               //generate the appropriate subindex with the 
current list
+                               generateSubIndex(prefix,list);
+                               str = key;
+                               list = new Vector();
+                       }
                }
-               }
-               
+
                generateSubIndex(prefix,list);
        }
        private synchronized Vector subVector(Vector list, int begin, int end){
@@ -509,17 +460,21 @@
                for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i));
                return tmp;
        }
-       
+
        private synchronized void generateSubIndex(int p,Vector list) throws 
Exception{
-               
+               /*
+                * if the list is less than max allowed entries in a file then 
directly generate the xml 
+                * otherwise split the list into further sublists
+                * and iterate till the number of entries per subindex is less 
than the allowed value
+                */
+
                if(list.size() < MAX_ENTRIES)
-               {
-                       //the index can be generated from this list
+               {               
                        generateXML(list,p);
                }
                else
                {
-                       //this means that prefix needs to be incremented
+                       //prefix needs to be incremented
                        if(match <= p) match = p+1; 
                        int prefix = p+1;
                        int i =0;
@@ -529,35 +484,30 @@
                        {
                                String key = (String) list.elementAt(i);
                                if((key.substring(0, 
prefix)).equals(str.substring(0, prefix))) 
-                                       {
-                                       //index = i;
+                               {
                                        i++;
-                                       }
+                               }
                                else {
-                                       
//generateXML(subVector(list,index,i-1),prefix);
                                        
generateSubIndex(prefix,subVector(list,index,i-1));
                                        index = i;
                                        str = key;
                                }
-                               
-
                        }
                        generateSubIndex(prefix,subVector(list,index,i-1));
                }
        }       
-               

-       private synchronized void generateXML (Vector list, int prefix) throws 
Exception
+       /**
+        * generates the xml index with the given list of words with prefix 
number of matching bits in md5
+        * @param list  list of the words to be added in the index
+        * @param prefix number of matching bits of md5
+        * @throws Exception
+        */
+       public synchronized void generateXML (Vector list, int prefix) throws 
Exception
        {
-               FileWriter outp = new FileWriter("gen",true);
-               
-               
                String p = ((String) list.elementAt(0)).substring(0, prefix);
-               outp.write("inside gen xml + "+p+"\n");
-               
                indices.add(p);
                File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+p+".xml");
-               //indices.add(p);
                StreamResult resultStream;
                resultStream = new StreamResult(outputFile);

@@ -567,92 +517,66 @@
                DocumentBuilder xmlBuilder = null;
                DOMImplementation impl = null;
                Element rootElement = null;
-
                xmlFactory = DocumentBuilderFactory.newInstance();

-
                try {
                        xmlBuilder = xmlFactory.newDocumentBuilder();
                } catch(javax.xml.parsers.ParserConfigurationException e) {
-                       /* Will (should ?) never happen */
                        Logger.error(this, "Spider: Error while initializing 
XML generator: "+e.toString());
                        return;
                }

-
                impl = xmlBuilder.getDOMImplementation();
-
                /* Starting to generate index */
-
                xmlDoc = impl.createDocument(null, "sub_index", null);
                rootElement = xmlDoc.getDocumentElement();

                /* Adding header to the index */
                Element headerElement = xmlDoc.createElement("header");
-
                /* -> title */
                Element subHeaderElement = xmlDoc.createElement("title");
                Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-               
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);

-                       
-               
                Element filesElement = xmlDoc.createElement("files"); /* 
filesElement != fileElement */
-
                Element EntriesElement = xmlDoc.createElement("entries");
-               
                EntriesElement.setNodeValue(list.size()+"");
-               outp.write("size = "+list.size()+"\n");
                EntriesElement.setAttribute("value", list.size()+"");
-               //all index files are ready
+
                /* Adding word index */
                Element keywordsElement = xmlDoc.createElement("keywords");
-               //words to be added 
                Vector fileid = new Vector();
                for(int i =0;i<list.size();i++)
                {
                        Element wordElement = xmlDoc.createElement("word");
                        String str = (String) tMap.get(list.elementAt(i));
-                       outp.write("word "+str+"\n");
                        wordElement.setAttribute("v",str );
-                       //FreenetURI[] urisForWord = (FreenetURI[]) 
urisByWord.get(str);
                        Integer[] idsForWord = (Integer[]) idsByWord.get(str);
-//                     
                        for (int j = 0; j < idsForWord.length; j++) {
                                Integer id = idsForWord[j];
-                               //Integer x = (Integer) urisToNumbers.get(uri);
                                Integer x = id;
-                               outp.write("x "+x+"\n");
                                if (x == null) {
                                        Logger.error(this, "Eh?");
                                        continue;
                                }
-//
                                Element uriElement = 
xmlDoc.createElement("file");
                                Element fileElement = 
xmlDoc.createElement("file");
                                uriElement.setAttribute("id", x.toString());
                                fileElement.setAttribute("id", x.toString());
-                               //fileElement.setAttribute("key", 
uri.toString());
-                               outp.write("uri 
"+(idUris.get(id)).toString()+"\n");
                                
fileElement.setAttribute("key",(idUris.get(id)).toString());
-////                           /* Position by position */
-                               //HashMap positionsForGivenWord = 
(HashMap)positionsByWordByURI.get(uri.toString());
+                               /* Position by position */
+
                                HashMap positionsForGivenWord = 
(HashMap)positionsByWordById.get(x);
                                Integer[] positions = 
(Integer[])positionsForGivenWord.get(str);
-
                                StringBuffer positionList = new StringBuffer();

                                for(int k=0; k < positions.length ; k++) {
                                        if(k!=0)
                                                positionList.append(',');
-
                                        
positionList.append(positions[k].toString());
                                }
-                               
                                
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
-                       
                                wordElement.appendChild(uriElement);
                                if(!fileid.contains(x))
                                {
@@ -660,12 +584,8 @@
                                        filesElement.appendChild(fileElement);
                                }
                        }
-                       
-                       //Element keywordsElement = (Element) 
root.getElementsByTagName("keywords").item(0);
                        keywordsElement.appendChild(wordElement);
-                               
                }
-               
                rootElement.appendChild(EntriesElement);
                rootElement.appendChild(headerElement);
                rootElement.appendChild(filesElement);
@@ -682,11 +602,8 @@
                        Logger.error(this, "Spider: Error while serializing XML 
(transformFactory.newTransformer()): "+e.toString());
                        return;
                }
-
-
                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-               
                /* final step */
                try {
                        serializer.transform(domSource, resultStream);
@@ -697,152 +614,22 @@

                if(Logger.shouldLog(Logger.MINOR, this))
                        Logger.minor(this, "Spider: indexes regenerated.");
-               outp.close();
        }

-               
-       public String search(String str,NodeList list) throws Exception
-       {
-               int prefix = str.length();
-               for(int i = 0;i<list.getLength();i++){
-                       Element subIndex = (Element) list.item(i);
-                       String key = subIndex.getAttribute("key");
-                       if(key.equals(str)) return key;
-               }
-               return search(str.substring(0, prefix-1),list);
-       }

-       
        public void handleGet(HTTPRequest request, ToadletContext context) 
throws IOException, ToadletContextClosedException {
-               String action = request.getParam("action");
-               PageMaker pageMaker = context.getPageMaker();
-               if ((action == null) || (action.length() == 0)) {
-                       MultiValueTable responseHeaders = new MultiValueTable();
-                       responseHeaders.put("Location", "?action=list");
-                       context.sendReplyHeaders(301, "Redirect", 
responseHeaders, "text/html; charset=utf-8", 0);
-                       return;
-               } else if ("list".equals(action)) {
-                       
-                       String listName = request.getParam("listName", null);
-                       HTMLNode pageNode = pageMaker.getPageNode("The XML 
Spider", context);
-                       HTMLNode contentNode = 
pageMaker.getContentNode(pageNode);
-                       /* create copies for multi-threaded use */
-                       if (listName == null) {
-                               Map runningFetches = new 
HashMap(runningFetchesByURI);
-                               List queued = new ArrayList(queuedURIList);
-                               Set visited = new HashSet(visitedURIs);
-                               Set failed = new HashSet(failedURIs);
-                               contentNode.addChild(createNavbar(pageMaker, 
runningFetches.size(), queued.size(), visited.size(), failed.size()));
-                               contentNode.addChild(createAddBox(pageMaker, 
context));
-                               contentNode.addChild(createList(pageMaker, 
"Running FetcheIIIs", "running", runningFetches.keySet(), maxShownURIs));
-                               contentNode.addChild(createList(pageMaker, 
"Queued URIs", "queued", queued, maxShownURIs));
-                               contentNode.addChild(createList(pageMaker, 
"Visited URIs", "visited", visited, maxShownURIs));
-                               contentNode.addChild(createList(pageMaker, 
"Failed URIs", "failed", failed, maxShownURIs));
-                       } else {
-                               contentNode.addChild(createBackBox(pageMaker));
-                               if ("failed".equals(listName)) {
-                                       Set failed = new HashSet(failedURIs);
-                                       
contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed, 
-1));       
-                               } else if ("visited".equals(listName)) {
-                                       Set visited = new HashSet(visitedURIs);
-                                       
contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited, 
-1));
-                               } else if ("queued".equals(listName)) {
-                                       List queued = new 
ArrayList(queuedURIList);
-                                       
contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued, 
-1));
-                               } else if ("running".equals(listName)) {
-                                       Map runningFetches = new 
HashMap(runningFetchesByURI);
-                                       
contentNode.addChild(createList(pageMaker, "Running Fetches", "running", 
runningFetches.keySet(), -1));
-                               }
-                       }
-                       MultiValueTable responseHeaders = new MultiValueTable();
-                       byte[] responseBytes = 
pageNode.generate().getBytes("utf-8");
-                       context.sendReplyHeaders(200, "OK", responseHeaders, 
"text/html; charset=utf-8", responseBytes.length);
-                       context.writeData(responseBytes);
-               } else if ("add".equals(action)) {
-                       String uriParam = request.getParam("key");
-                       try {
-                               FreenetURI uri = new FreenetURI(uriParam);
-                               synchronized (this) {
-                                       failedURIs.remove(uri);
-                                       visitedURIs.remove(uri);
-                               }
-                               queueURI(uri);
-                               startSomeRequests();
-                       } catch (MalformedURLException mue1) {
-                               sendSimpleResponse(context, "URL invalid", "The 
given URI is not valid.");
-                               return;
-                       }
-                       MultiValueTable responseHeaders = new MultiValueTable();
-                       responseHeaders.put("Location", "?action=list");
-                       context.sendReplyHeaders(301, "Redirect", 
responseHeaders, "text/html; charset=utf-8", 0);
-                       return;
-               }
+               /*
+                * ignore
+                */
        }

+
        public void handlePost(HTTPRequest request, ToadletContext context) 
throws IOException {
+               /*
+                * ignore
+                */
        }
-       
-       private void sendSimpleResponse(ToadletContext context, String title, 
String message) throws ToadletContextClosedException, IOException {
-               PageMaker pageMaker = context.getPageMaker();
-               HTMLNode pageNode = pageMaker.getPageNode(title, context);
-               HTMLNode contentNode = pageMaker.getContentNode(pageNode);
-               HTMLNode infobox = 
contentNode.addChild(pageMaker.getInfobox("infobox-alter", title));
-               HTMLNode infoboxContent = pageMaker.getContentNode(infobox);
-               infoboxContent.addChild("#", message);
-               byte[] responseBytes = pageNode.generate().getBytes("utf-8");
-               context.sendReplyHeaders(200, "OK", new MultiValueTable(), 
"text/html; charset=utf-8", responseBytes.length);
-               context.writeData(responseBytes);
-       }
-       
-       private HTMLNode createBackBox(PageMaker pageMaker) {
-               HTMLNode backbox = pageMaker.getInfobox((String) null);
-               HTMLNode backContent = pageMaker.getContentNode(backbox);
-               backContent.addChild("#", "Return to the ");
-               backContent.addChild("a", "href", "?action=list", "list of all 
URIs");
-               backContent.addChild("#", ".");
-               return backbox;
-       }
-       
-       private HTMLNode createAddBox(PageMaker pageMaker, ToadletContext ctx) {
-               HTMLNode addBox = pageMaker.getInfobox("Add a URI");
-               HTMLNode formNode = 
pageMaker.getContentNode(addBox).addChild("form", new String[] { "action", 
"method" }, new String[] { "", "get" });
-               formNode.addChild("input", new String[] { "type", "name", 
"value" }, new String[] { "hidden", "action", "add" });
-               formNode.addChild("input", new String[] { "type", "size", 
"name", "value" }, new String[] { "text", "40", "key", "" });
-               formNode.addChild("input", new String[] { "type", "value" }, 
new String[] { "submit", "Add URI" });
-               return addBox;
-       }

-       private HTMLNode createNavbar(PageMaker pageMaker, int running, int 
queued, int visited, int failed) {
-               HTMLNode navbar = pageMaker.getInfobox("navbar", "Page 
Navigation");
-               HTMLNode list = pageMaker.getContentNode(navbar).addChild("ul");
-               list.addChild("li").addChild("a", "href", "#running", "Running 
(" + running + ')');
-               list.addChild("li").addChild("a", "href", "#queued", "Queued (" 
+ queued + ')');
-               list.addChild("li").addChild("a", "href", "#visited", "Visited 
(" + visited + ')');
-               list.addChild("li").addChild("a", "href", "#failed", "Failed (" 
+ failed + ')');
-               return navbar;
-       }
-
-       private HTMLNode createList(PageMaker pageMaker, String listName, 
String anchorName, Collection collection, int maxCount) {
-               HTMLNode listNode = new HTMLNode("div");
-               listNode.addChild("a", "name", anchorName);
-               HTMLNode listBox = pageMaker.getInfobox(listName);
-               HTMLNode listContent = pageMaker.getContentNode(listBox);
-               listNode.addChild(listBox);
-               Iterator collectionItems = collection.iterator();
-               int itemCount = 0;
-               while (collectionItems.hasNext()) {
-                       FreenetURI uri = (FreenetURI) collectionItems.next();
-                       listContent.addChild("#", uri.toString());
-                       listContent.addChild("br");
-                       if (itemCount++ == maxCount) {
-                               listContent.addChild("br");
-                               listContent.addChild("a", "href", 
"?action=list&listName=" + anchorName, "Show all\u2026");
-                               break;
-                       }
-               }
-               return listNode;
-       }
-
        /**
         * @see freenet.oldplugins.plugin.Plugin#getPluginName()
         */
@@ -854,7 +641,7 @@
         * @see 
freenet.oldplugins.plugin.Plugin#setPluginManager(freenet.oldplugins.plugin.PluginManager)
         */
        public void setPluginManager(PluginManager pluginManager) {
-               
+
                this.core = pluginManager.getClientCore();
                this.ctx = core.makeClient((short) 0).getFetchContext();
                ctx.maxSplitfileBlockRetries = 10;
@@ -865,9 +652,9 @@
                allowedMIMETypes.add(new String("text/html"));
                allowedMIMETypes.add(new String("text/plain"));
                allowedMIMETypes.add(new String("application/xhtml+xml"));
-       //      allowedMIMETypes.add(new String("application/zip"));
+               //      allowedMIMETypes.add(new String("application/zip"));
                ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-       //      ctx.allowedMIMETypes.add("text/html"); 
+               //      ctx.allowedMIMETypes.add("text/html"); 
                tProducedIndex = System.currentTimeMillis();
                indexing = true;
        }
@@ -898,22 +685,25 @@
                // Ignore
        }
        private static String convertToHex(byte[] data) {
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < data.length; i++) {
-               int halfbyte = (data[i] >>> 4) & 0x0F;
-               int two_halfs = 0;
-               do {
-                       if ((0 <= halfbyte) && (halfbyte <= 9))
-                       buf.append((char) ('0' + halfbyte));
-                   else
-                       buf.append((char) ('a' + (halfbyte - 10)));
-                       halfbyte = data[i] & 0x0F;
-               } while(two_halfs++ < 1);
-        }
-        return buf.toString();
-    }
-       //this function will return the String representation of the MD5 hash 
for the input string 
-       public static String MD5(String text) throws NoSuchAlgorithmException, 
UnsupportedEncodingException  {
+               StringBuffer buf = new StringBuffer();
+               for (int i = 0; i < data.length; i++) {
+                       int halfbyte = (data[i] >>> 4) & 0x0F;
+                       int two_halfs = 0;
+                       do {
+                               if ((0 <= halfbyte) && (halfbyte <= 9))
+                                       buf.append((char) ('0' + halfbyte));
+                               else
+                                       buf.append((char) ('a' + (halfbyte - 
10)));
+                               halfbyte = data[i] & 0x0F;
+                       } while(two_halfs++ < 1);
+               }
+               return buf.toString();
+       }
+
+       /*
+        * calculate the md5 for a given string
+        */
+       private static String MD5(String text) throws NoSuchAlgorithmException, 
UnsupportedEncodingException  {
                MessageDigest md;
                md = MessageDigest.getInstance("MD5");
                byte[] md5hash = new byte[32];
@@ -921,9 +711,9 @@
                md5hash = md.digest();
                return convertToHex(md5hash);
        }
-       
+
        public void generateSubIndex(String filename){
-//generates the new subIndex
+//             generates the new subIndex
                File outputFile = new File(filename);
                StreamResult resultStream;
                resultStream = new StreamResult(outputFile);
@@ -960,28 +750,28 @@
                /* -> title */
                Element subHeaderElement = xmlDoc.createElement("title");
                Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-               
+
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);

                /* -> owner */
                subHeaderElement = xmlDoc.createElement("owner");
                subHeaderText = xmlDoc.createTextNode(indexOwner);
-               
+
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);
-               
-       
+
+
                /* -> owner email */
                if(indexOwnerEmail != null) {
                        subHeaderElement = xmlDoc.createElement("email");
                        subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-                       
+
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
                }

-               
+
                Element filesElement = xmlDoc.createElement("files"); /* 
filesElement != fileElement */

                Element EntriesElement = xmlDoc.createElement("entries");
@@ -990,7 +780,7 @@
                //all index files are ready
                /* Adding word index */
                Element keywordsElement = xmlDoc.createElement("keywords");
-               
+
                rootElement.appendChild(EntriesElement);
                rootElement.appendChild(headerElement);
                rootElement.appendChild(filesElement);
@@ -1011,7 +801,7 @@

                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-               
+
                /* final step */
                try {
                        serializer.transform(domSource, resultStream);
@@ -1023,361 +813,330 @@
                if(Logger.shouldLog(Logger.MINOR, this))
                        Logger.minor(this, "Spider: indexes regenerated.");
        }
-       
-public void terminate(){
-       synchronized (this) {
-               stopped = true;
-               queuedURIList.clear();
-       }
-}
-       
-public void runPlugin(PluginRespirator pr){
-       this.pr = pr;
-       this.id = 0;
-       this.core = pr.getNode().clientCore;
-       this.ctx = core.makeClient((short) 0).getFetchContext();
-       ctx.maxSplitfileBlockRetries = 10;
-       ctx.maxNonSplitfileRetries = 10;
-       ctx.maxTempLength = 2 * 1024 * 1024;
-       ctx.maxOutputLength = 2 * 1024 * 1024;
-       allowedMIMETypes = new HashSet();
-       allowedMIMETypes.add(new String("text/html"));
-       allowedMIMETypes.add(new String("text/plain"));
-       allowedMIMETypes.add(new String("application/xhtml+xml"));
-//     allowedMIMETypes.add(new String("application/zip"));
-       ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-//     ctx.allowedMIMETypes.add("text/html"); 
-       tProducedIndex = System.currentTimeMillis();
-       indexing = true;
-       stopped = false;
-       count = 0;
-       
-       //startPlugin();
-       Thread starterThread = new Thread("Spider Plugin Starter") {
-               public void run() {
-                       try{
-                               Thread.sleep(30 * 1000); // Let the node start 
up
-                       } catch (InterruptedException e){}
-                       startSomeRequests();
+
+       public void terminate(){
+               synchronized (this) {
+                       stopped = true;
+                       queuedURIList.clear();
                }
-       };
-       starterThread.setDaemon(true);
-       starterThread.start();
-}
+       }

-public String handleHTTPGet(HTTPRequest request) throws PluginHTTPException{
-       StringBuffer out = new StringBuffer();
-       // need to produce pretty html
-       //later fredpluginhttpadvanced will give the interface
-       //this brings us to the page from visit
-       String listname = request.getParam("list");
-       if(listname.length() != 0)
-       {
-               appendDefaultHeader(out,null);
-               out.append("<p><h4>"+listname+" URIs</h4></p>");
-               appendList(listname,out,null);
-               return out.toString();
+       public void runPlugin(PluginRespirator pr){
+               this.pr = pr;
+               this.id = 0;
+               this.core = pr.getNode().clientCore;
+               this.ctx = core.makeClient((short) 0).getFetchContext();
+               ctx.maxSplitfileBlockRetries = 10;
+               ctx.maxNonSplitfileRetries = 10;
+               ctx.maxTempLength = 2 * 1024 * 1024;
+               ctx.maxOutputLength = 2 * 1024 * 1024;
+               allowedMIMETypes = new HashSet();
+               allowedMIMETypes.add(new String("text/html"));
+               allowedMIMETypes.add(new String("text/plain"));
+               allowedMIMETypes.add(new String("application/xhtml+xml"));
+
+               ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
+
+               tProducedIndex = System.currentTimeMillis();
+               indexing = true;
+               stopped = false;
+               count = 0;
+
+               //startPlugin();
+               Thread starterThread = new Thread("Spider Plugin Starter") {
+                       public void run() {
+                               try{
+                                       Thread.sleep(30 * 1000); // Let the 
node start up
+                               } catch (InterruptedException e){}
+                               startSomeRequests();
+                       }
+               };
+               starterThread.setDaemon(true);
+               starterThread.start();
        }
-       appendDefaultPageStart(out,null);
-       String uriParam = request.getParam("adduri");
-       if(uriParam != null && uriParam.length() != 0)
+
+       /**
+        * Interface to the Spider data
+        */
+       public String handleHTTPGet(HTTPRequest request) throws 
PluginHTTPException{
+               StringBuffer out = new StringBuffer();
+
+               String listname = request.getParam("list");
+               if(listname.length() != 0)
                {
-               try {
-                       FreenetURI uri = new FreenetURI(uriParam);
-                       synchronized (this) {
-                               failedURIs.remove(uri);
-                               visitedURIs.remove(uri);
+                       appendDefaultHeader(out,null);
+                       out.append("<p><h4>"+listname+" URIs</h4></p>");
+                       appendList(listname,out,null);
+                       return out.toString();
+               }
+               appendDefaultPageStart(out,null);
+               String uriParam = request.getParam("adduri");
+               if(uriParam != null && uriParam.length() != 0)
+               {
+                       try {
+                               FreenetURI uri = new FreenetURI(uriParam);
+                               synchronized (this) {
+                                       failedURIs.remove(uri);
+                                       visitedURIs.remove(uri);
+                               }
+                               out.append("<p>URI added :"+uriParam+"</p>");
+                               queueURI(uri);
+                               startSomeRequests();
+                       } catch (MalformedURLException mue1) {
+                               out.append("<p>MalFormed URI: "+uriParam+"</p");
                        }
-                       out.append("<p>URI added :"+uriParam+"</p>");
-                       queueURI(uri);
-                       startSomeRequests();
-               } catch (MalformedURLException mue1) {
-                       out.append("<p>MalFormed URI: "+uriParam+"</p");
                }
-               }
-       return out.toString();
-}
-private void appendList(String listname, StringBuffer out, String stylesheet)
-{
-       Iterator it = (runningFetchesByURI.keySet()).iterator();
-       if(listname.equals("running"))
-               it = (runningFetchesByURI.keySet()).iterator();
-       if(listname.equals("visited"))
-               it = (new HashSet(visitedURIs)).iterator();
-       if(listname.equals("queued"))
-               it = (new ArrayList(queuedURIList)).iterator();
-       if(listname.equals("failed"))
-               it = (new HashSet(failedURIs)).iterator();
-       while(it.hasNext())
-               out.append("<code>"+it.next().toString()+"</code><br/>");
-}
-private void appendDefaultPageStart(StringBuffer out, String stylesheet) {
-       
-       out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
-       if(stylesheet != null)
-               out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
-       out.append("</HEAD><BODY>\n");
-       out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
-       out.append("Add uri:");
-       out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\" 
/><br/><br/>");
-       out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
-       Set runningFetches = runningFetchesByURI.keySet();
-       out.append("<p><h3>Running Fetches</h3></p>");
-       Set visited = new HashSet(visitedURIs);
-       List queued = new ArrayList(queuedURIList);
-       
-       Set failed = new HashSet(failedURIs);
-       Iterator it=queued.iterator();
-       out.append("<br/>Size :"+runningFetches.size());
-       appendList(runningFetches,out,stylesheet);
-       out.append("<p><a href=\"?list="+"running"+"\">Show all</a><br/></p>");
-       out.append("<br/>Size :"+queued.size());
-       int i = 0;
-       while(it.hasNext()){
-               if(i<=maxShownURIs){
-               out.append("<code>"+it.next().toString()+"</code><br/>");
-               }
-               else break;
-               i++;
+               return out.toString();
        }
-       out.append("<p><a href=\"?list="+"queued"+"\">Show all</a><br/></p>");
-       out.append("<br/>Size :"+visited.size());
-       appendList(visited,out,stylesheet);
-       out.append("<p><a href=\"?list="+"visited"+"\">Show all</a><br/></p>");
-       out.append("<br/>Size :"+failed.size());
-       appendList(failed,out,stylesheet);
-       out.append("<p><a href=\"?list="+"failed"+"\">Show all</a><br/></p>");
-       
-       
-}
-private void appendDefaultHeader(StringBuffer out, String stylesheet){
-       out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
-       if(stylesheet != null)
-               out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
-       out.append("</HEAD><BODY>\n");
-       out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
-       out.append("Add uri:");
-       out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\" 
/><br/><br/>");
-       out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
-}
-private void appendList(Set  list,StringBuffer out, String stylesheet){
-       Iterator it = list.iterator();
-       int i = 0;
-       while(it.hasNext()){
-               if(i<=maxShownURIs){
-               out.append("<code>"+it.next().toString()+"</code><br/>");
+
+       private void appendList(String listname, StringBuffer out, String 
stylesheet)
+       {
+               Iterator it = (runningFetchesByURI.keySet()).iterator();
+               if(listname.equals("running"))
+                       it = (runningFetchesByURI.keySet()).iterator();
+               if(listname.equals("visited"))
+                       it = (new HashSet(visitedURIs)).iterator();
+               if(listname.equals("queued"))
+                       it = (new ArrayList(queuedURIList)).iterator();
+               if(listname.equals("failed"))
+                       it = (new HashSet(failedURIs)).iterator();
+               while(it.hasNext())
+                       
out.append("<code>"+it.next().toString()+"</code><br/>");
+       }
+
+       private void appendDefaultPageStart(StringBuffer out, String 
stylesheet) {
+
+               out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+               if(stylesheet != null)
+                       out.append("<link href=\""+stylesheet+"\" 
type=\"text/css\" rel=\"stylesheet\" />");
+               out.append("</HEAD><BODY>\n");
+               out.append("<CENTER><H1>" + pluginName + 
"</H1><BR/><BR/><BR/>\n");
+               out.append("Add uri:");
+               out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
+               out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
+               Set runningFetches = runningFetchesByURI.keySet();
+               out.append("<p><h3>Running Fetches</h3></p>");
+               Set visited = new HashSet(visitedURIs);
+               List queued = new ArrayList(queuedURIList);
+
+               Set failed = new HashSet(failedURIs);
+               Iterator it=queued.iterator();
+               out.append("<br/>Size :"+runningFetches.size()+"<br/>");
+               appendList(runningFetches,out,stylesheet);
+               out.append("<p><a href=\"?list="+"running"+"\">Show 
all</a><br/></p>");
+               out.append("<p><h3>Queued URIs</h3></p>");
+               out.append("<br/>Size :"+queued.size()+"<br/>");
+               int i = 0;
+               while(it.hasNext()){
+                       if(i<=maxShownURIs){
+                               
out.append("<code>"+it.next().toString()+"</code><br/>");
+                       }
+                       else break;
+                       i++;
                }
-               else{
-                       //out.append("<form method=\"GET\"><input 
type=\"submit\" name=\"Showall\" />");
-//                     if(listname.equals("visited"))
-//                     out.append("<p><a href=\"?list="+listname+">Showall 
visited</a><br/></p>");
-//                     if(listname.equals("failed"))
-//                             out.append("<p><a 
href=\"?list="+listname+">Showall failed</a><br/></p>");
-                       break;
-               }
-               i++;
-               
+               out.append("<p><a href=\"?list="+"queued"+"\">Show 
all</a><br/></p>");
+               out.append("<p><h3>Visited URIs</h3></p>");
+               out.append("<br/>Size :"+visited.size()+"<br/>");
+               appendList(visited,out,stylesheet);
+               out.append("<p><a href=\"?list="+"visited"+"\">Show 
all</a><br/></p>");
+               out.append("<p><h3>Failed URIs</h3></p>");
+               out.append("<br/>Size :"+failed.size()+"<br/>");
+               appendList(failed,out,stylesheet);
+               out.append("<p><a href=\"?list="+"failed"+"\">Show 
all</a><br/></p>");
+
+
        }
-       
-}

-public class PageCallBack implements FoundURICallback{
-       int id;
-               
-       PageCallBack(){
-               id = 0;
+
+       private void appendDefaultHeader(StringBuffer out, String stylesheet){
+               out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+               if(stylesheet != null)
+                       out.append("<link href=\""+stylesheet+"\" 
type=\"text/css\" rel=\"stylesheet\" />");
+               out.append("</HEAD><BODY>\n");
+               out.append("<CENTER><H1>" + pluginName + 
"</H1><BR/><BR/><BR/>\n");
+               out.append("Add uri:");
+               out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
+               out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
        }
-       public void foundURI(FreenetURI uri){
-               //now we have the id of the page that had called this link
-               queueURI(uri);
-               int iduri = (Integer) uriIds.get(uri);
-               Vector outlink = (Vector) outlinks.get(id);
-               if(!outlink.contains(iduri))    
-                       outlink.add(iduri);
-               outlinks.remove(id);
-               outlinks.put(id, outlink);
-               try{
-               FileWriter out = new FileWriter("outlink",true);
-               out.write(" id "+id+" size "+ outlink.size()+" \n");
-               out.close();
-               }catch(Exception e){}

-               if(inlinks.containsKey(iduri)){
-                       Vector inlink = (Vector) inlinks.get(iduri);
-                       try{
-                               FileWriter out = new FileWriter("inlink",true);
-                               out.write(" id "+iduri+" size "+ 
inlink.size()+" \n");
-                               out.close();
-                               }catch(Exception e){}
-               
-                       if(!inlink.contains(id)) inlink.add(id);
-                       inlinks.remove(iduri);
-                       inlinks.put(iduri, inlink);
-                       
+
+       private void appendList(Set  list,StringBuffer out, String stylesheet){
+               Iterator it = list.iterator();
+               int i = 0;
+               while(it.hasNext()){
+                       if(i<=maxShownURIs){
+                               
out.append("<code>"+it.next().toString()+"</code><br/>");
+                       }
+                       else{
+                               break;
+                       }
+                       i++;
                }
-               startSomeRequests();
        }
-       public void onText(String s, String type, URI baseURI){
-               try{
-                       FileWriter outp = new FileWriter("ontext",true);
-                       outp.write("inside on text with id"+id+" \n");
-                       outp.close();
-               }catch(Exception e){}
-//             FreenetURI uri;
-//             try {
-//                     uri = new FreenetURI(baseURI.getPath().substring(1));
-//             } catch (MalformedURLException e) {
-//                     Logger.error(this, "Caught " + e, e);
-//                     return;
-//             }
-                
-               
-      
-               if((type != null) && (type.length() != 0) && 
type.toLowerCase().equals("title")
-                  && (s != null) && (s.length() != 0) && (s.indexOf('\n') < 
0)) {
-                       /* We should have a correct title */
-               //      titlesOfURIs.put(uri.toString(), s);
-                       titlesOfIds.put(id, s);
-                       
-                       type = "title";
-                       
+
+       /**
+        * creates the callback object for each page.
+        *<p>Used to create inlinks and outlinks for each page separately.
+        * @author swati
+        *
+        */
+       public class PageCallBack implements FoundURICallback{
+               int id;
+               /*
+                * id of the page as refrenced in uriIds
+                */     
+               PageCallBack(){
+                       id = 0;
                }
-               else type = null;

+               public void foundURI(FreenetURI uri){

-               String[] words = s.split("[^A-Za-z0-9]");
+                       queueURI(uri);
+                       int iduri = (Integer) uriIds.get(uri);

-               Integer lastPosition = null;
+                       if(outlinks.containsKey(id)){
+                               Vector outlink = (Vector) outlinks.get(id);
+                               if(!outlink.contains(iduri))    
+                                       outlink.add(iduri);
+                               outlinks.remove(id);
+                               outlinks.put(id, outlink);
+                       }
+                       else 
+                       {
+                               Vector outlink = new Vector();
+                               outlink.add(iduri);
+                               outlinks.put(id, outlink);
+                       }

-               //lastPosition = (Integer)lastPositionByURI.get(uri.toString());
-               lastPosition = (Integer)lastPositionById.get(id);
-               if(lastPosition == null)
-                       lastPosition = new Integer(1); /* We start to count 
from 1 */
-
-               for (int i = 0; i < words.length; i++) {
-                       String word = words[i];
-                       if ((word == null) || (word.length() == 0))
-                               continue;
-                       word = word.toLowerCase();
-                       try{
-                       if(type == null)
-                               addWord(word, lastPosition.intValue() + i, id);
-                       else
-                               addWord(word, -1 * (i+1), id);
+                       if(inlinks.containsKey(iduri)){
+                               Vector inlink = (Vector) inlinks.get(iduri);
+                               if(!inlink.contains(id)) inlink.add(id);
+                               inlinks.remove(iduri);
+                               inlinks.put(iduri, inlink);
                        }
-                       catch (Exception e){}
+                       else 
+                       {
+                               Vector inlink = new Vector();
+                               inlink.add(id);
+                               inlinks.put(iduri, inlink);
+                       }
+
+                       startSomeRequests();
                }
-               
-               if(type == null) {
-                       lastPosition = new Integer(lastPosition.intValue() + 
words.length);
-               //      lastPositionByURI.put(uri.toString(), lastPosition);
-                       lastPositionById.put(id, lastPosition);
-               }
-               
-       }
-private synchronized void addWord(String word, int position,int id) throws 
Exception{
-               
-               
-               if(word.length() < 3)
-                       return;
-               
-               //word = word.intern();


-               //FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);
-               Integer[] ids = (Integer[]) idsByWord.get(word);
-               
-       //      urisWithWords.add(uri);
-               idsWithWords.add(id);
-               try{
-                       FileWriter outp = new FileWriter("addWord",true);
-                       outp.write("ID ="+id+" uri ="+idUris.get(id)+"\n");
-                       outp.close();
-               }catch(Exception e){}
-//     FileWriter outp = new FileWriter("uricheck",true);
-//     outp.write(uri.getDocName()+"\n");
-//     outp.write(uri.getKeyType()+"\n");
-//     outp.write(uri.getMetaString()+"\n");
-//     outp.write(uri.getGuessableKey()+"\n");
-//     outp.write(uri.hashCode()+"\n");
-//     outp.write(uri.getPreferredFilename()+"\n");
-//     
-//     outp.close();
+               public void onText(String s, String type, URI baseURI){

-               /* Word position indexation */
-               HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word, 
and gives position */
-               
-               if(wordPositionsForOneUri == null) {
-                       wordPositionsForOneUri = new HashMap();
-                       wordPositionsForOneUri.put(word, new Integer[] { new 
Integer(position) });
-                       //positionsByWordByURI.put(uri.toString(), 
wordPositionsForOneUri);
-                       positionsByWordById.put(id, wordPositionsForOneUri);
-               } else {
-                       Integer[] positions = 
(Integer[])wordPositionsForOneUri.get(word);
+                       if((type != null) && (type.length() != 0) && 
type.toLowerCase().equals("title")
+                                       && (s != null) && (s.length() != 0) && 
(s.indexOf('\n') < 0)) {
+                               /* We should have a correct title */
+                               titlesOfIds.put(id, s);
+                               type = "title";
+                       }
+                       else type = null;

-                       if(positions == null) {
-                               positions = new Integer[] { new 
Integer(position) };
-                               wordPositionsForOneUri.put(word, positions);
-                       } else {
-                               Integer[] newPositions = new 
Integer[positions.length + 1];
+                       String[] words = s.split("[^A-Za-z0-9]");
+                       Integer lastPosition = null;
+                       lastPosition = (Integer)lastPositionById.get(id);

-                               System.arraycopy(positions, 0, newPositions, 0, 
positions.length);
-                               newPositions[positions.length] = new 
Integer(position);
+                       if(lastPosition == null)
+                               lastPosition = new Integer(1); /* We start to 
count from 1 */
+                       for (int i = 0; i < words.length; i++) {
+                               String word = words[i];
+                               if ((word == null) || (word.length() == 0))
+                                       continue;
+                               word = word.toLowerCase();
+                               try{
+                                       if(type == null)
+                                               addWord(word, 
lastPosition.intValue() + i, id);
+                                       else
+                                               addWord(word, -1 * (i+1), id);
+                               }
+                               catch (Exception e){}
+                       }

-                               wordPositionsForOneUri.put(word, newPositions);
+                       if(type == null) {
+                               lastPosition = new 
Integer(lastPosition.intValue() + words.length);
+                               lastPositionById.put(id, lastPosition);
                        }
+
                }
-       
-               if (ids == null) {
-                       idsByWord.put(word, new Integer[] { id });
-                       
-               } else {
-                       for (int i = 0; i < ids.length; i++) {
-                               if (ids[i].equals(id))
-                                       return;
+
+               private synchronized void addWord(String word, int position,int 
id) throws Exception{
+                       if(word.length() < 3)
+                               return;
+
+                       Integer[] ids = (Integer[]) idsByWord.get(word);
+                       idsWithWords.add(id);
+
+                       /* Word position indexation */
+                       HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word, 
and gives position */
+                       if(wordPositionsForOneUri == null) {
+                               wordPositionsForOneUri = new HashMap();
+                               wordPositionsForOneUri.put(word, new Integer[] 
{ new Integer(position) });
+                               positionsByWordById.put(id, 
wordPositionsForOneUri);
+                       } 
+                       else {
+                               Integer[] positions = 
(Integer[])wordPositionsForOneUri.get(word);
+                               if(positions == null) {
+                                       positions = new Integer[] { new 
Integer(position) };
+                                       wordPositionsForOneUri.put(word, 
positions);
+                               } 
+                               else {
+                                       Integer[] newPositions = new 
Integer[positions.length + 1];
+                                       System.arraycopy(positions, 0, 
newPositions, 0, positions.length);
+                                       newPositions[positions.length] = new 
Integer(position);
+                                       wordPositionsForOneUri.put(word, 
newPositions);
+                               }
                        }
-                       Integer[] newIDs = new Integer[ids.length + 1];
-                       System.arraycopy(ids, 0, newIDs, 0, ids.length);
-                       newIDs[ids.length] = id;
-                       idsByWord.put(word, newIDs);
-               }
-               //the new word is added here in urisByWord
-               tMap.put(MD5(word), word);
-               long time_indexing = System.currentTimeMillis();
-               if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 < 
System.currentTimeMillis()) {
-                       try {
-                               //produceIndex();
-                               //check();
-                               
-                               if(indexing){
-                               generateIndex2();
-                               produceIndex2();
-                               if((System.currentTimeMillis() - 
time_indexing)/(System.currentTimeMillis() - tProducedIndex) > 
MAX_TIME_SPENT_INDEXING) indexing= false;
-                               else indexing = true;
+
+                       if (ids == null) {
+                               idsByWord.put(word, new Integer[] { id });
+                       } else {
+                               for (int i = 0; i < ids.length; i++) {
+                                       if (ids[i].equals(id))
+                                               return;
                                }
-                               
-                       } catch (IOException e) {
-                               Logger.error(this, "Caught " + e + " while 
creating index", e);
+                               Integer[] newIDs = new Integer[ids.length + 1];
+                               System.arraycopy(ids, 0, newIDs, 0, ids.length);
+                               newIDs[ids.length] = id;
+                               idsByWord.put(word, newIDs);
                        }
-                       tProducedIndex = System.currentTimeMillis();
+
+                       tMap.put(MD5(word), word);
+                       long time_indexing = System.currentTimeMillis();
+                       if (tProducedIndex + minTimeBetweenEachIndexRewriting * 
10 < System.currentTimeMillis()) {
+                               try {
+                                       if(indexing){
+                                               generateIndex2();
+                                               produceIndex2();
+                                               /*
+                                                * ensures that index 
production doesn't eat up the processor time 
+                                                */
+                                               if((System.currentTimeMillis() 
- time_indexing)/(System.currentTimeMillis() - tProducedIndex) > 
MAX_TIME_SPENT_INDEXING) indexing= false;
+                                               else indexing = true;
+                                       }
+                               } catch (IOException e) {
+                                       Logger.error(this, "Caught " + e + " 
while creating index", e);
+                               }
+                               tProducedIndex = System.currentTimeMillis();
+                       }
                }
-               
        }
-       
-}
-public String handleHTTPPut(HTTPRequest request) throws PluginHTTPException{
-       return null;
-}
-public String handleHTTPPost(HTTPRequest request) throws PluginHTTPException{
-       return null;
-}

-public void onFoundEdition(long l, USK key){
-       FreenetURI uri = key.getURI();
-       if(runningFetchesByURI.containsKey(uri)) 
runningFetchesByURI.remove(uri);
-       uri = key.getURI().setSuggestedEdition(l);
-       queueURI(uri);
+
+       public String handleHTTPPut(HTTPRequest request) throws 
PluginHTTPException{
+               return null;
+       }
+       public String handleHTTPPost(HTTPRequest request) throws 
PluginHTTPException{
+               return null;
+       }
+
+       public void onFoundEdition(long l, USK key){
+               FreenetURI uri = key.getURI();
+               if(runningFetchesByURI.containsKey(uri)) 
runningFetchesByURI.remove(uri);
+               uri = key.getURI().setSuggestedEdition(l);
+               queueURI(uri);
+       }
+
 }
-       
-       
-}

[freenet-cvs] r14725 - trunk/plugins/XMLSpider

Reply via email to