Author: swatig0
Date: 2007-08-16 20:30:56 +0000 (Thu, 16 Aug 2007)
New Revision: 14727

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
URI-id transformation

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2007-08-16 20:28:11 UTC (rev 
14726)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2007-08-16 20:30:56 UTC (rev 
14727)
@@ -4,6 +4,7 @@
 package plugins.XMLSpider;

 import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
@@ -12,11 +13,14 @@
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.Vector;
@@ -29,10 +33,11 @@
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;

-
+import org.w3c.dom.Attr;
 import org.w3c.dom.DOMImplementation;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;

 import freenet.client.ClientMetadata;
@@ -44,6 +49,7 @@
 import freenet.client.async.ClientCallback;
 import freenet.client.async.ClientGetter;
 import freenet.client.async.USKCallback;
+import freenet.clients.http.PageMaker;
 import freenet.clients.http.ToadletContext;
 import freenet.clients.http.ToadletContextClosedException;
 import freenet.clients.http.filter.ContentFilter;
@@ -61,7 +67,9 @@
 import freenet.pluginmanager.FredPluginThreadless;
 import freenet.pluginmanager.PluginHTTPException;
 import freenet.pluginmanager.PluginRespirator;
+import freenet.support.HTMLNode;
 import freenet.support.Logger;
+import freenet.support.MultiValueTable;
 import freenet.support.api.Bucket;
 import freenet.support.api.HTTPRequest;

@@ -70,7 +78,7 @@
  * In case the size of the index grows up a specific threshold the index is 
split into several subindices.
  * The indexing key is the md5 hash of the word.
  * 
- *  @author swati
+ *  @author swati goyal
  *  
  */
 public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless,  FredPluginHTTPAdvanced,HttpPlugin, ClientCallback, 
USKCallback{
@@ -87,13 +95,14 @@
         * Lists the uris that have been vistied by the spider
         */
        public final HashSet visitedURIs = new HashSet();
+       private final HashSet urisWithWords = new HashSet();
        private final HashSet idsWithWords = new HashSet();
        /**
         * 
         * Lists the uris that were visited but failed.
         */
        public final HashSet failedURIs = new HashSet();
-
+       
        private final HashSet queuedURISet = new HashSet();
        /**
         * 
@@ -101,9 +110,9 @@
         */
        public final LinkedList queuedURIList = new LinkedList();
        private final HashMap runningFetchesByURI = new HashMap();
-
+       private final HashMap urisByWord = new HashMap();
        private final HashMap idsByWord = new HashMap();
-
+       private final HashMap titlesOfURIs = new HashMap();
        private final HashMap titlesOfIds = new HashMap();
        private final HashMap uriIds = new HashMap();
        private final HashMap idUris = new HashMap();
@@ -120,14 +129,14 @@
        private Vector indices;
        private int match;
        private int id;
-
+       private Vector list;
        private boolean indexing ;
-
+       
        private static final int minTimeBetweenEachIndexRewriting = 10;
-       /**
-        * directory where the generated indices are stored. 
-        * Needs to be created before it can be used
-        */
+/**
+ * directory where the generated indices are stored. 
+ * Needs to be created before it can be used
+ */
        public static final String DEFAULT_INDEX_DIR = "myindex4/";
        /**
         * Lists the allowed mime types of the fetched page. 
@@ -140,7 +149,7 @@
         * maximum value = 1; minimum value = 0. 
         */
        public static final double MAX_TIME_SPENT_INDEXING = 0.5;
-
+       
        private static final String indexTitle= "XMLSpider index";
        private static final String indexOwner = "Freenet";
        private static final String indexOwnerEmail = null;
@@ -153,17 +162,17 @@
        // Can have many; this limit only exists to save memory.
        private static final int maxParallelRequests = 100;
        private int maxShownURIs = 15;
-
+       
        private NodeClientCore core;
        private FetchContext ctx;
        private final short PRIORITY_CLASS = 
RequestStarter.BULK_SPLITFILE_PRIORITY_CLASS;
        private boolean stopped = true;
        PluginRespirator pr;
-
-       /**
-        * Adds the found uri to the list of to-be-retrieved uris. <p>Every usk 
uri added as ssk.
-        * @param uri the new uri that needs to be fetched for further indexing
-        */
+       
+/**
+ * Adds the found uri to the list of to-be-retrieved uris. <p>Every usk uri 
added as ssk.
+ * @param uri the new uri that needs to be fetched for further indexing
+ */
        public synchronized void queueURI(FreenetURI uri) {
                if((uri.getKeyType()).equals("USK")){
                        if(uri.getSuggestedEdition() < 0)
@@ -226,18 +235,18 @@
                        }
                }
        }
+       

-
        private ClientGetter makeGetter(FreenetURI uri) {
                ClientGetter g = new ClientGetter(this, 
core.requestStarters.chkFetchScheduler, core.requestStarters.sskFetchScheduler, 
uri, ctx, PRIORITY_CLASS, this, null, null);
                return g;
        }
-       /**
-        * Processes the successfully fetched uri for further outlinks.
-        * 
-        * @param result
-        * @param state
-        */
+/**
+ * Processes the successfully fetched uri for further outlinks.
+ * 
+ * @param result
+ * @param state
+ */
        public void onSuccess(FetchResult result, ClientGetter state) {
                FreenetURI uri = state.getURI();

@@ -270,7 +279,7 @@
                        data.free();
                }
        }
-
+       
        public void onFailure(FetchException e, ClientGetter state) {
                FreenetURI uri = state.getURI();

@@ -297,14 +306,14 @@
        }

        /**
-        * generates the main index file that can be used by librarian for 
searching in the list of
-        * subindices
-        *  
-        * @param void
-        * @author swati 
-        * @throws IOException
-        * @throws NoSuchAlgorithmException
-        */
+ * generates the main index file that can be used by librarian for searching 
in the list of
+ * subindices
+ *  
+ * @param void
+ * @author swati 
+ * @throws IOException
+ * @throws NoSuchAlgorithmException
+ */
        private synchronized void produceIndex2() throws 
IOException,NoSuchAlgorithmException {
                // Produce the main index file.

@@ -428,7 +437,7 @@
                        System.out.println("No URIs with words");
                        return;
                }
-
+               
                indices = new Vector();
                int prefix = 1;
                match = 1;
@@ -460,7 +469,7 @@
                for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i));
                return tmp;
        }
-
+       
        private synchronized void generateSubIndex(int p,Vector list) throws 
Exception{
                /*
                 * if the list is less than max allowed entries in a file then 
directly generate the xml 
@@ -497,12 +506,12 @@
                }
        }       

-       /**
-        * generates the xml index with the given list of words with prefix 
number of matching bits in md5
-        * @param list  list of the words to be added in the index
-        * @param prefix number of matching bits of md5
-        * @throws Exception
-        */
+/**
+ * generates the xml index with the given list of words with prefix number of 
matching bits in md5
+ * @param list  list of the words to be added in the index
+ * @param prefix number of matching bits of md5
+ * @throws Exception
+ */
        public synchronized void generateXML (Vector list, int prefix) throws 
Exception
        {
                String p = ((String) list.elementAt(0)).substring(0, prefix);
@@ -616,20 +625,137 @@
                        Logger.minor(this, "Spider: indexes regenerated.");
        }

-
+               
        public void handleGet(HTTPRequest request, ToadletContext context) 
throws IOException, ToadletContextClosedException {
-               /*
-                * ignore
-                */
+               String action = request.getParam("action");
+               PageMaker pageMaker = context.getPageMaker();
+               if ((action == null) || (action.length() == 0)) {
+                       MultiValueTable responseHeaders = new MultiValueTable();
+                       responseHeaders.put("Location", "?action=list");
+                       context.sendReplyHeaders(301, "Redirect", 
responseHeaders, "text/html; charset=utf-8", 0);
+                       return;
+               } else if ("list".equals(action)) {
+                       
+                       String listName = request.getParam("listName", null);
+                       HTMLNode pageNode = pageMaker.getPageNode("The XML 
Spider", context);
+                       HTMLNode contentNode = 
pageMaker.getContentNode(pageNode);
+                       /* create copies for multi-threaded use */
+                       if (listName == null) {
+                               Map runningFetches = new 
HashMap(runningFetchesByURI);
+                               List queued = new ArrayList(queuedURIList);
+                               Set visited = new HashSet(visitedURIs);
+                               Set failed = new HashSet(failedURIs);
+                               contentNode.addChild(createNavbar(pageMaker, 
runningFetches.size(), queued.size(), visited.size(), failed.size()));
+                               contentNode.addChild(createAddBox(pageMaker, 
context));
+                               contentNode.addChild(createList(pageMaker, 
"Running FetcheIIIs", "running", runningFetches.keySet(), maxShownURIs));
+                               contentNode.addChild(createList(pageMaker, 
"Queued URIs", "queued", queued, maxShownURIs));
+                               contentNode.addChild(createList(pageMaker, 
"Visited URIs", "visited", visited, maxShownURIs));
+                               contentNode.addChild(createList(pageMaker, 
"Failed URIs", "failed", failed, maxShownURIs));
+                       } else {
+                               contentNode.addChild(createBackBox(pageMaker));
+                               if ("failed".equals(listName)) {
+                                       Set failed = new HashSet(failedURIs);
+                                       
contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed, 
-1));       
+                               } else if ("visited".equals(listName)) {
+                                       Set visited = new HashSet(visitedURIs);
+                                       
contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited, 
-1));
+                               } else if ("queued".equals(listName)) {
+                                       List queued = new 
ArrayList(queuedURIList);
+                                       
contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued, 
-1));
+                               } else if ("running".equals(listName)) {
+                                       Map runningFetches = new 
HashMap(runningFetchesByURI);
+                                       
contentNode.addChild(createList(pageMaker, "Running Fetches", "running", 
runningFetches.keySet(), -1));
+                               }
+                       }
+                       MultiValueTable responseHeaders = new MultiValueTable();
+                       byte[] responseBytes = 
pageNode.generate().getBytes("utf-8");
+                       context.sendReplyHeaders(200, "OK", responseHeaders, 
"text/html; charset=utf-8", responseBytes.length);
+                       context.writeData(responseBytes);
+               } else if ("add".equals(action)) {
+                       String uriParam = request.getParam("key");
+                       try {
+                               FreenetURI uri = new FreenetURI(uriParam);
+                               synchronized (this) {
+                                       failedURIs.remove(uri);
+                                       visitedURIs.remove(uri);
+                               }
+                               queueURI(uri);
+                               startSomeRequests();
+                       } catch (MalformedURLException mue1) {
+                               sendSimpleResponse(context, "URL invalid", "The 
given URI is not valid.");
+                               return;
+                       }
+                       MultiValueTable responseHeaders = new MultiValueTable();
+                       responseHeaders.put("Location", "?action=list");
+                       context.sendReplyHeaders(301, "Redirect", 
responseHeaders, "text/html; charset=utf-8", 0);
+                       return;
+               }
        }

-
        public void handlePost(HTTPRequest request, ToadletContext context) 
throws IOException {
-               /*
-                * ignore
-                */
        }
+       
+       private void sendSimpleResponse(ToadletContext context, String title, 
String message) throws ToadletContextClosedException, IOException {
+               PageMaker pageMaker = context.getPageMaker();
+               HTMLNode pageNode = pageMaker.getPageNode(title, context);
+               HTMLNode contentNode = pageMaker.getContentNode(pageNode);
+               HTMLNode infobox = 
contentNode.addChild(pageMaker.getInfobox("infobox-alter", title));
+               HTMLNode infoboxContent = pageMaker.getContentNode(infobox);
+               infoboxContent.addChild("#", message);
+               byte[] responseBytes = pageNode.generate().getBytes("utf-8");
+               context.sendReplyHeaders(200, "OK", new MultiValueTable(), 
"text/html; charset=utf-8", responseBytes.length);
+               context.writeData(responseBytes);
+       }
+       
+       private HTMLNode createBackBox(PageMaker pageMaker) {
+               HTMLNode backbox = pageMaker.getInfobox((String) null);
+               HTMLNode backContent = pageMaker.getContentNode(backbox);
+               backContent.addChild("#", "Return to the ");
+               backContent.addChild("a", "href", "?action=list", "list of all 
URIs");
+               backContent.addChild("#", ".");
+               return backbox;
+       }
+       
+       private HTMLNode createAddBox(PageMaker pageMaker, ToadletContext ctx) {
+               HTMLNode addBox = pageMaker.getInfobox("Add a URI");
+               HTMLNode formNode = 
pageMaker.getContentNode(addBox).addChild("form", new String[] { "action", 
"method" }, new String[] { "", "get" });
+               formNode.addChild("input", new String[] { "type", "name", 
"value" }, new String[] { "hidden", "action", "add" });
+               formNode.addChild("input", new String[] { "type", "size", 
"name", "value" }, new String[] { "text", "40", "key", "" });
+               formNode.addChild("input", new String[] { "type", "value" }, 
new String[] { "submit", "Add URI" });
+               return addBox;
+       }

+       private HTMLNode createNavbar(PageMaker pageMaker, int running, int 
queued, int visited, int failed) {
+               HTMLNode navbar = pageMaker.getInfobox("navbar", "Page 
Navigation");
+               HTMLNode list = pageMaker.getContentNode(navbar).addChild("ul");
+               list.addChild("li").addChild("a", "href", "#running", "Running 
(" + running + ')');
+               list.addChild("li").addChild("a", "href", "#queued", "Queued (" 
+ queued + ')');
+               list.addChild("li").addChild("a", "href", "#visited", "Visited 
(" + visited + ')');
+               list.addChild("li").addChild("a", "href", "#failed", "Failed (" 
+ failed + ')');
+               return navbar;
+       }
+
+       private HTMLNode createList(PageMaker pageMaker, String listName, 
String anchorName, Collection collection, int maxCount) {
+               HTMLNode listNode = new HTMLNode("div");
+               listNode.addChild("a", "name", anchorName);
+               HTMLNode listBox = pageMaker.getInfobox(listName);
+               HTMLNode listContent = pageMaker.getContentNode(listBox);
+               listNode.addChild(listBox);
+               Iterator collectionItems = collection.iterator();
+               int itemCount = 0;
+               while (collectionItems.hasNext()) {
+                       FreenetURI uri = (FreenetURI) collectionItems.next();
+                       listContent.addChild("#", uri.toString());
+                       listContent.addChild("br");
+                       if (itemCount++ == maxCount) {
+                               listContent.addChild("br");
+                               listContent.addChild("a", "href", 
"?action=list&listName=" + anchorName, "Show all\u2026");
+                               break;
+                       }
+               }
+               return listNode;
+       }
+
        /**
         * @see freenet.oldplugins.plugin.Plugin#getPluginName()
         */
@@ -641,7 +767,7 @@
         * @see 
freenet.oldplugins.plugin.Plugin#setPluginManager(freenet.oldplugins.plugin.PluginManager)
         */
        public void setPluginManager(PluginManager pluginManager) {
-
+               
                this.core = pluginManager.getClientCore();
                this.ctx = core.makeClient((short) 0).getFetchContext();
                ctx.maxSplitfileBlockRetries = 10;
@@ -652,9 +778,9 @@
                allowedMIMETypes.add(new String("text/html"));
                allowedMIMETypes.add(new String("text/plain"));
                allowedMIMETypes.add(new String("application/xhtml+xml"));
-               //      allowedMIMETypes.add(new String("application/zip"));
+       //      allowedMIMETypes.add(new String("application/zip"));
                ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-               //      ctx.allowedMIMETypes.add("text/html"); 
+       //      ctx.allowedMIMETypes.add("text/html"); 
                tProducedIndex = System.currentTimeMillis();
                indexing = true;
        }
@@ -685,25 +811,22 @@
                // Ignore
        }
        private static String convertToHex(byte[] data) {
-               StringBuffer buf = new StringBuffer();
-               for (int i = 0; i < data.length; i++) {
-                       int halfbyte = (data[i] >>> 4) & 0x0F;
-                       int two_halfs = 0;
-                       do {
-                               if ((0 <= halfbyte) && (halfbyte <= 9))
-                                       buf.append((char) ('0' + halfbyte));
-                               else
-                                       buf.append((char) ('a' + (halfbyte - 
10)));
-                               halfbyte = data[i] & 0x0F;
-                       } while(two_halfs++ < 1);
-               }
-               return buf.toString();
-       }
-
-       /*
-        * calculate the md5 for a given string
-        */
-       private static String MD5(String text) throws NoSuchAlgorithmException, 
UnsupportedEncodingException  {
+        StringBuffer buf = new StringBuffer();
+        for (int i = 0; i < data.length; i++) {
+               int halfbyte = (data[i] >>> 4) & 0x0F;
+               int two_halfs = 0;
+               do {
+                       if ((0 <= halfbyte) && (halfbyte <= 9))
+                       buf.append((char) ('0' + halfbyte));
+                   else
+                       buf.append((char) ('a' + (halfbyte - 10)));
+                       halfbyte = data[i] & 0x0F;
+               } while(two_halfs++ < 1);
+        }
+        return buf.toString();
+    }
+       //this function will return the String representation of the MD5 hash 
for the input string 
+       public static String MD5(String text) throws NoSuchAlgorithmException, 
UnsupportedEncodingException  {
                MessageDigest md;
                md = MessageDigest.getInstance("MD5");
                byte[] md5hash = new byte[32];
@@ -711,9 +834,9 @@
                md5hash = md.digest();
                return convertToHex(md5hash);
        }
-
+       
        public void generateSubIndex(String filename){
-//             generates the new subIndex
+//generates the new subIndex
                File outputFile = new File(filename);
                StreamResult resultStream;
                resultStream = new StreamResult(outputFile);
@@ -750,28 +873,28 @@
                /* -> title */
                Element subHeaderElement = xmlDoc.createElement("title");
                Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-
+               
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);

                /* -> owner */
                subHeaderElement = xmlDoc.createElement("owner");
                subHeaderText = xmlDoc.createTextNode(indexOwner);
-
+               
                subHeaderElement.appendChild(subHeaderText);
                headerElement.appendChild(subHeaderElement);
-
-
+               
+       
                /* -> owner email */
                if(indexOwnerEmail != null) {
                        subHeaderElement = xmlDoc.createElement("email");
                        subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-
+                       
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
                }

-
+               
                Element filesElement = xmlDoc.createElement("files"); /* 
filesElement != fileElement */

                Element EntriesElement = xmlDoc.createElement("entries");
@@ -780,7 +903,7 @@
                //all index files are ready
                /* Adding word index */
                Element keywordsElement = xmlDoc.createElement("keywords");
-
+               
                rootElement.appendChild(EntriesElement);
                rootElement.appendChild(headerElement);
                rootElement.appendChild(filesElement);
@@ -801,7 +924,7 @@

                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-
+               
                /* final step */
                try {
                        serializer.transform(domSource, resultStream);
@@ -813,330 +936,361 @@
                if(Logger.shouldLog(Logger.MINOR, this))
                        Logger.minor(this, "Spider: indexes regenerated.");
        }
-
-       public void terminate(){
-               synchronized (this) {
-                       stopped = true;
-                       queuedURIList.clear();
+       
+public void terminate(){
+       synchronized (this) {
+               stopped = true;
+               queuedURIList.clear();
+       }
+}
+       
+public void runPlugin(PluginRespirator pr){
+       this.pr = pr;
+       this.id = 0;
+       this.core = pr.getNode().clientCore;
+       this.ctx = core.makeClient((short) 0).getFetchContext();
+       ctx.maxSplitfileBlockRetries = 10;
+       ctx.maxNonSplitfileRetries = 10;
+       ctx.maxTempLength = 2 * 1024 * 1024;
+       ctx.maxOutputLength = 2 * 1024 * 1024;
+       allowedMIMETypes = new HashSet();
+       allowedMIMETypes.add(new String("text/html"));
+       allowedMIMETypes.add(new String("text/plain"));
+       allowedMIMETypes.add(new String("application/xhtml+xml"));
+//     allowedMIMETypes.add(new String("application/zip"));
+       ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
+//     ctx.allowedMIMETypes.add("text/html"); 
+       tProducedIndex = System.currentTimeMillis();
+       indexing = true;
+       stopped = false;
+       count = 0;
+       
+       //startPlugin();
+       Thread starterThread = new Thread("Spider Plugin Starter") {
+               public void run() {
+                       try{
+                               Thread.sleep(30 * 1000); // Let the node start 
up
+                       } catch (InterruptedException e){}
+                       startSomeRequests();
                }
-       }
+       };
+       starterThread.setDaemon(true);
+       starterThread.start();
+}

-       public void runPlugin(PluginRespirator pr){
-               this.pr = pr;
-               this.id = 0;
-               this.core = pr.getNode().clientCore;
-               this.ctx = core.makeClient((short) 0).getFetchContext();
-               ctx.maxSplitfileBlockRetries = 10;
-               ctx.maxNonSplitfileRetries = 10;
-               ctx.maxTempLength = 2 * 1024 * 1024;
-               ctx.maxOutputLength = 2 * 1024 * 1024;
-               allowedMIMETypes = new HashSet();
-               allowedMIMETypes.add(new String("text/html"));
-               allowedMIMETypes.add(new String("text/plain"));
-               allowedMIMETypes.add(new String("application/xhtml+xml"));
-
-               ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-
-               tProducedIndex = System.currentTimeMillis();
-               indexing = true;
-               stopped = false;
-               count = 0;
-
-               //startPlugin();
-               Thread starterThread = new Thread("Spider Plugin Starter") {
-                       public void run() {
-                               try{
-                                       Thread.sleep(30 * 1000); // Let the 
node start up
-                               } catch (InterruptedException e){}
-                               startSomeRequests();
-                       }
-               };
-               starterThread.setDaemon(true);
-               starterThread.start();
+public String handleHTTPGet(HTTPRequest request) throws PluginHTTPException{
+       StringBuffer out = new StringBuffer();
+       // need to produce pretty html
+       //later fredpluginhttpadvanced will give the interface
+       //this brings us to the page from visit
+       String listname = request.getParam("list");
+       if(listname.length() != 0)
+       {
+               appendDefaultHeader(out,null);
+               out.append("<p><h4>"+listname+" URIs</h4></p>");
+               appendList(listname,out,null);
+               return out.toString();
        }
-
-       /**
-        * Interface to the Spider data
-        */
-       public String handleHTTPGet(HTTPRequest request) throws 
PluginHTTPException{
-               StringBuffer out = new StringBuffer();
-
-               String listname = request.getParam("list");
-               if(listname.length() != 0)
+       appendDefaultPageStart(out,null);
+       String uriParam = request.getParam("adduri");
+       if(uriParam != null && uriParam.length() != 0)
                {
-                       appendDefaultHeader(out,null);
-                       out.append("<p><h4>"+listname+" URIs</h4></p>");
-                       appendList(listname,out,null);
-                       return out.toString();
-               }
-               appendDefaultPageStart(out,null);
-               String uriParam = request.getParam("adduri");
-               if(uriParam != null && uriParam.length() != 0)
-               {
-                       try {
-                               FreenetURI uri = new FreenetURI(uriParam);
-                               synchronized (this) {
-                                       failedURIs.remove(uri);
-                                       visitedURIs.remove(uri);
-                               }
-                               out.append("<p>URI added :"+uriParam+"</p>");
-                               queueURI(uri);
-                               startSomeRequests();
-                       } catch (MalformedURLException mue1) {
-                               out.append("<p>MalFormed URI: "+uriParam+"</p");
+               try {
+                       FreenetURI uri = new FreenetURI(uriParam);
+                       synchronized (this) {
+                               failedURIs.remove(uri);
+                               visitedURIs.remove(uri);
                        }
+                       out.append("<p>URI added :"+uriParam+"</p>");
+                       queueURI(uri);
+                       startSomeRequests();
+               } catch (MalformedURLException mue1) {
+                       out.append("<p>MalFormed URI: "+uriParam+"</p");
                }
-               return out.toString();
+               }
+       return out.toString();
+}
+private void appendList(String listname, StringBuffer out, String stylesheet)
+{
+       Iterator it = (runningFetchesByURI.keySet()).iterator();
+       if(listname.equals("running"))
+               it = (runningFetchesByURI.keySet()).iterator();
+       if(listname.equals("visited"))
+               it = (new HashSet(visitedURIs)).iterator();
+       if(listname.equals("queued"))
+               it = (new ArrayList(queuedURIList)).iterator();
+       if(listname.equals("failed"))
+               it = (new HashSet(failedURIs)).iterator();
+       while(it.hasNext())
+               out.append("<code>"+it.next().toString()+"</code><br/>");
+}
+private void appendDefaultPageStart(StringBuffer out, String stylesheet) {
+       
+       out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+       if(stylesheet != null)
+               out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
+       out.append("</HEAD><BODY>\n");
+       out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
+       out.append("Add uri:");
+       out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\" 
/><br/><br/>");
+       out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
+       Set runningFetches = runningFetchesByURI.keySet();
+       out.append("<p><h3>Running Fetches</h3></p>");
+       Set visited = new HashSet(visitedURIs);
+       List queued = new ArrayList(queuedURIList);
+       
+       Set failed = new HashSet(failedURIs);
+       Iterator it=queued.iterator();
+       out.append("<br/>Size :"+runningFetches.size());
+       appendList(runningFetches,out,stylesheet);
+       out.append("<p><a href=\"?list="+"running"+"\">Show all</a><br/></p>");
+       out.append("<br/>Size :"+queued.size());
+       int i = 0;
+       while(it.hasNext()){
+               if(i<=maxShownURIs){
+               out.append("<code>"+it.next().toString()+"</code><br/>");
+               }
+               else break;
+               i++;
        }
-
-       private void appendList(String listname, StringBuffer out, String 
stylesheet)
-       {
-               Iterator it = (runningFetchesByURI.keySet()).iterator();
-               if(listname.equals("running"))
-                       it = (runningFetchesByURI.keySet()).iterator();
-               if(listname.equals("visited"))
-                       it = (new HashSet(visitedURIs)).iterator();
-               if(listname.equals("queued"))
-                       it = (new ArrayList(queuedURIList)).iterator();
-               if(listname.equals("failed"))
-                       it = (new HashSet(failedURIs)).iterator();
-               while(it.hasNext())
-                       
out.append("<code>"+it.next().toString()+"</code><br/>");
-       }
-
-       private void appendDefaultPageStart(StringBuffer out, String 
stylesheet) {
-
-               out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
-               if(stylesheet != null)
-                       out.append("<link href=\""+stylesheet+"\" 
type=\"text/css\" rel=\"stylesheet\" />");
-               out.append("</HEAD><BODY>\n");
-               out.append("<CENTER><H1>" + pluginName + 
"</H1><BR/><BR/><BR/>\n");
-               out.append("Add uri:");
-               out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
-               out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
-               Set runningFetches = runningFetchesByURI.keySet();
-               out.append("<p><h3>Running Fetches</h3></p>");
-               Set visited = new HashSet(visitedURIs);
-               List queued = new ArrayList(queuedURIList);
-
-               Set failed = new HashSet(failedURIs);
-               Iterator it=queued.iterator();
-               out.append("<br/>Size :"+runningFetches.size()+"<br/>");
-               appendList(runningFetches,out,stylesheet);
-               out.append("<p><a href=\"?list="+"running"+"\">Show 
all</a><br/></p>");
-               out.append("<p><h3>Queued URIs</h3></p>");
-               out.append("<br/>Size :"+queued.size()+"<br/>");
-               int i = 0;
-               while(it.hasNext()){
-                       if(i<=maxShownURIs){
-                               
out.append("<code>"+it.next().toString()+"</code><br/>");
-                       }
-                       else break;
-                       i++;
+       out.append("<p><a href=\"?list="+"queued"+"\">Show all</a><br/></p>");
+       out.append("<br/>Size :"+visited.size());
+       appendList(visited,out,stylesheet);
+       out.append("<p><a href=\"?list="+"visited"+"\">Show all</a><br/></p>");
+       out.append("<br/>Size :"+failed.size());
+       appendList(failed,out,stylesheet);
+       out.append("<p><a href=\"?list="+"failed"+"\">Show all</a><br/></p>");
+       
+       
+}
+private void appendDefaultHeader(StringBuffer out, String stylesheet){
+       out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+       if(stylesheet != null)
+               out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
+       out.append("</HEAD><BODY>\n");
+       out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
+       out.append("Add uri:");
+       out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\" 
/><br/><br/>");
+       out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
+}
+private void appendList(Set  list,StringBuffer out, String stylesheet){
+       Iterator it = list.iterator();
+       int i = 0;
+       while(it.hasNext()){
+               if(i<=maxShownURIs){
+               out.append("<code>"+it.next().toString()+"</code><br/>");
                }
-               out.append("<p><a href=\"?list="+"queued"+"\">Show 
all</a><br/></p>");
-               out.append("<p><h3>Visited URIs</h3></p>");
-               out.append("<br/>Size :"+visited.size()+"<br/>");
-               appendList(visited,out,stylesheet);
-               out.append("<p><a href=\"?list="+"visited"+"\">Show 
all</a><br/></p>");
-               out.append("<p><h3>Failed URIs</h3></p>");
-               out.append("<br/>Size :"+failed.size()+"<br/>");
-               appendList(failed,out,stylesheet);
-               out.append("<p><a href=\"?list="+"failed"+"\">Show 
all</a><br/></p>");
-
-
+               else{
+                       //out.append("<form method=\"GET\"><input 
type=\"submit\" name=\"Showall\" />");
+//                     if(listname.equals("visited"))
+//                     out.append("<p><a href=\"?list="+listname+">Showall 
visited</a><br/></p>");
+//                     if(listname.equals("failed"))
+//                             out.append("<p><a 
href=\"?list="+listname+">Showall failed</a><br/></p>");
+                       break;
+               }
+               i++;
+               
        }
+       
+}

-
-       private void appendDefaultHeader(StringBuffer out, String stylesheet){
-               out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
-               if(stylesheet != null)
-                       out.append("<link href=\""+stylesheet+"\" 
type=\"text/css\" rel=\"stylesheet\" />");
-               out.append("</HEAD><BODY>\n");
-               out.append("<CENTER><H1>" + pluginName + 
"</H1><BR/><BR/><BR/>\n");
-               out.append("Add uri:");
-               out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
-               out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
+public class PageCallBack implements FoundURICallback{
+       int id;
+               
+       PageCallBack(){
+               id = 0;
        }
+       public void foundURI(FreenetURI uri){
+               //now we have the id of the page that had called this link
+               queueURI(uri);
+               int iduri = (Integer) uriIds.get(uri);
+               Vector outlink = (Vector) outlinks.get(id);
+               if(!outlink.contains(iduri))    
+                       outlink.add(iduri);
+               outlinks.remove(id);
+               outlinks.put(id, outlink);
+               try{
+               FileWriter out = new FileWriter("outlink",true);
+               out.write(" id "+id+" size "+ outlink.size()+" \n");
+               out.close();
+               }catch(Exception e){}

-
-       private void appendList(Set  list,StringBuffer out, String stylesheet){
-               Iterator it = list.iterator();
-               int i = 0;
-               while(it.hasNext()){
-                       if(i<=maxShownURIs){
-                               
out.append("<code>"+it.next().toString()+"</code><br/>");
-                       }
-                       else{
-                               break;
-                       }
-                       i++;
+               if(inlinks.containsKey(iduri)){
+                       Vector inlink = (Vector) inlinks.get(iduri);
+                       try{
+                               FileWriter out = new FileWriter("inlink",true);
+                               out.write(" id "+iduri+" size "+ 
inlink.size()+" \n");
+                               out.close();
+                               }catch(Exception e){}
+               
+                       if(!inlink.contains(id)) inlink.add(id);
+                       inlinks.remove(iduri);
+                       inlinks.put(iduri, inlink);
+                       
                }
+               startSomeRequests();
        }
-
-       /**
-        * creates the callback object for each page.
-        *<p>Used to create inlinks and outlinks for each page separately.
-        * @author swati
-        *
-        */
-       public class PageCallBack implements FoundURICallback{
-               int id;
-               /*
-                * id of the page as refrenced in uriIds
-                */     
-               PageCallBack(){
-                       id = 0;
+       public void onText(String s, String type, URI baseURI){
+               try{
+                       FileWriter outp = new FileWriter("ontext",true);
+                       outp.write("inside on text with id"+id+" \n");
+                       outp.close();
+               }catch(Exception e){}
+//             FreenetURI uri;
+//             try {
+//                     uri = new FreenetURI(baseURI.getPath().substring(1));
+//             } catch (MalformedURLException e) {
+//                     Logger.error(this, "Caught " + e, e);
+//                     return;
+//             }
+                
+               
+      
+               if((type != null) && (type.length() != 0) && 
type.toLowerCase().equals("title")
+                  && (s != null) && (s.length() != 0) && (s.indexOf('\n') < 
0)) {
+                       /* We should have a correct title */
+               //      titlesOfURIs.put(uri.toString(), s);
+                       titlesOfIds.put(id, s);
+                       
+                       type = "title";
+                       
                }
+               else type = null;

-               public void foundURI(FreenetURI uri){

-                       queueURI(uri);
-                       int iduri = (Integer) uriIds.get(uri);
+               String[] words = s.split("[^A-Za-z0-9]");

-                       if(outlinks.containsKey(id)){
-                               Vector outlink = (Vector) outlinks.get(id);
-                               if(!outlink.contains(iduri))    
-                                       outlink.add(iduri);
-                               outlinks.remove(id);
-                               outlinks.put(id, outlink);
-                       }
-                       else 
-                       {
-                               Vector outlink = new Vector();
-                               outlink.add(iduri);
-                               outlinks.put(id, outlink);
-                       }
+               Integer lastPosition = null;

-                       if(inlinks.containsKey(iduri)){
-                               Vector inlink = (Vector) inlinks.get(iduri);
-                               if(!inlink.contains(id)) inlink.add(id);
-                               inlinks.remove(iduri);
-                               inlinks.put(iduri, inlink);
+               //lastPosition = (Integer)lastPositionByURI.get(uri.toString());
+               lastPosition = (Integer)lastPositionById.get(id);
+               if(lastPosition == null)
+                       lastPosition = new Integer(1); /* We start to count 
from 1 */
+
+               for (int i = 0; i < words.length; i++) {
+                       String word = words[i];
+                       if ((word == null) || (word.length() == 0))
+                               continue;
+                       word = word.toLowerCase();
+                       try{
+                       if(type == null)
+                               addWord(word, lastPosition.intValue() + i, id);
+                       else
+                               addWord(word, -1 * (i+1), id);
                        }
-                       else 
-                       {
-                               Vector inlink = new Vector();
-                               inlink.add(id);
-                               inlinks.put(iduri, inlink);
-                       }
-
-                       startSomeRequests();
+                       catch (Exception e){}
                }
+               
+               if(type == null) {
+                       lastPosition = new Integer(lastPosition.intValue() + 
words.length);
+               //      lastPositionByURI.put(uri.toString(), lastPosition);
+                       lastPositionById.put(id, lastPosition);
+               }
+               
+       }
+private synchronized void addWord(String word, int position,int id) throws 
Exception{
+               
+               
+               if(word.length() < 3)
+                       return;
+               
+               //word = word.intern();


-               public void onText(String s, String type, URI baseURI){
+               //FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);
+               Integer[] ids = (Integer[]) idsByWord.get(word);
+               
+       //      urisWithWords.add(uri);
+               idsWithWords.add(id);
+               try{
+                       FileWriter outp = new FileWriter("addWord",true);
+                       outp.write("ID ="+id+" uri ="+idUris.get(id)+"\n");
+                       outp.close();
+               }catch(Exception e){}
+//     FileWriter outp = new FileWriter("uricheck",true);
+//     outp.write(uri.getDocName()+"\n");
+//     outp.write(uri.getKeyType()+"\n");
+//     outp.write(uri.getMetaString()+"\n");
+//     outp.write(uri.getGuessableKey()+"\n");
+//     outp.write(uri.hashCode()+"\n");
+//     outp.write(uri.getPreferredFilename()+"\n");
+//     
+//     outp.close();

-                       if((type != null) && (type.length() != 0) && 
type.toLowerCase().equals("title")
-                                       && (s != null) && (s.length() != 0) && 
(s.indexOf('\n') < 0)) {
-                               /* We should have a correct title */
-                               titlesOfIds.put(id, s);
-                               type = "title";
-                       }
-                       else type = null;
+               /* Word position indexation */
+               HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word, 
and gives position */
+               
+               if(wordPositionsForOneUri == null) {
+                       wordPositionsForOneUri = new HashMap();
+                       wordPositionsForOneUri.put(word, new Integer[] { new 
Integer(position) });
+                       //positionsByWordByURI.put(uri.toString(), 
wordPositionsForOneUri);
+                       positionsByWordById.put(id, wordPositionsForOneUri);
+               } else {
+                       Integer[] positions = 
(Integer[])wordPositionsForOneUri.get(word);

-                       String[] words = s.split("[^A-Za-z0-9]");
-                       Integer lastPosition = null;
-                       lastPosition = (Integer)lastPositionById.get(id);
+                       if(positions == null) {
+                               positions = new Integer[] { new 
Integer(position) };
+                               wordPositionsForOneUri.put(word, positions);
+                       } else {
+                               Integer[] newPositions = new 
Integer[positions.length + 1];

-                       if(lastPosition == null)
-                               lastPosition = new Integer(1); /* We start to 
count from 1 */
-                       for (int i = 0; i < words.length; i++) {
-                               String word = words[i];
-                               if ((word == null) || (word.length() == 0))
-                                       continue;
-                               word = word.toLowerCase();
-                               try{
-                                       if(type == null)
-                                               addWord(word, 
lastPosition.intValue() + i, id);
-                                       else
-                                               addWord(word, -1 * (i+1), id);
-                               }
-                               catch (Exception e){}
-                       }
+                               System.arraycopy(positions, 0, newPositions, 0, 
positions.length);
+                               newPositions[positions.length] = new 
Integer(position);

-                       if(type == null) {
-                               lastPosition = new 
Integer(lastPosition.intValue() + words.length);
-                               lastPositionById.put(id, lastPosition);
+                               wordPositionsForOneUri.put(word, newPositions);
                        }
-
                }
-
-               private synchronized void addWord(String word, int position,int 
id) throws Exception{
-                       if(word.length() < 3)
-                               return;
-
-                       Integer[] ids = (Integer[]) idsByWord.get(word);
-                       idsWithWords.add(id);
-
-                       /* Word position indexation */
-                       HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word, 
and gives position */
-                       if(wordPositionsForOneUri == null) {
-                               wordPositionsForOneUri = new HashMap();
-                               wordPositionsForOneUri.put(word, new Integer[] 
{ new Integer(position) });
-                               positionsByWordById.put(id, 
wordPositionsForOneUri);
-                       } 
-                       else {
-                               Integer[] positions = 
(Integer[])wordPositionsForOneUri.get(word);
-                               if(positions == null) {
-                                       positions = new Integer[] { new 
Integer(position) };
-                                       wordPositionsForOneUri.put(word, 
positions);
-                               } 
-                               else {
-                                       Integer[] newPositions = new 
Integer[positions.length + 1];
-                                       System.arraycopy(positions, 0, 
newPositions, 0, positions.length);
-                                       newPositions[positions.length] = new 
Integer(position);
-                                       wordPositionsForOneUri.put(word, 
newPositions);
-                               }
+       
+               if (ids == null) {
+                       idsByWord.put(word, new Integer[] { id });
+                       
+               } else {
+                       for (int i = 0; i < ids.length; i++) {
+                               if (ids[i].equals(id))
+                                       return;
                        }
-
-                       if (ids == null) {
-                               idsByWord.put(word, new Integer[] { id });
-                       } else {
-                               for (int i = 0; i < ids.length; i++) {
-                                       if (ids[i].equals(id))
-                                               return;
+                       Integer[] newIDs = new Integer[ids.length + 1];
+                       System.arraycopy(ids, 0, newIDs, 0, ids.length);
+                       newIDs[ids.length] = id;
+                       idsByWord.put(word, newIDs);
+               }
+               //the new word is added here in urisByWord
+               tMap.put(MD5(word), word);
+               long time_indexing = System.currentTimeMillis();
+               if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 < 
System.currentTimeMillis()) {
+                       try {
+                               //produceIndex();
+                               //check();
+                               
+                               if(indexing){
+                               generateIndex2();
+                               produceIndex2();
+                               if((System.currentTimeMillis() - 
time_indexing)/(System.currentTimeMillis() - tProducedIndex) > 
MAX_TIME_SPENT_INDEXING) indexing= false;
+                               else indexing = true;
                                }
-                               Integer[] newIDs = new Integer[ids.length + 1];
-                               System.arraycopy(ids, 0, newIDs, 0, ids.length);
-                               newIDs[ids.length] = id;
-                               idsByWord.put(word, newIDs);
+                               
+                       } catch (IOException e) {
+                               Logger.error(this, "Caught " + e + " while 
creating index", e);
                        }
-
-                       tMap.put(MD5(word), word);
-                       long time_indexing = System.currentTimeMillis();
-                       if (tProducedIndex + minTimeBetweenEachIndexRewriting * 
10 < System.currentTimeMillis()) {
-                               try {
-                                       if(indexing){
-                                               generateIndex2();
-                                               produceIndex2();
-                                               /*
-                                                * ensures that index 
production doesn't eat up the processor time 
-                                                */
-                                               if((System.currentTimeMillis() 
- time_indexing)/(System.currentTimeMillis() - tProducedIndex) > 
MAX_TIME_SPENT_INDEXING) indexing= false;
-                                               else indexing = true;
-                                       }
-                               } catch (IOException e) {
-                                       Logger.error(this, "Caught " + e + " 
while creating index", e);
-                               }
-                               tProducedIndex = System.currentTimeMillis();
-                       }
+                       tProducedIndex = System.currentTimeMillis();
                }
+               
        }
+       
+}
+public String handleHTTPPut(HTTPRequest request) throws PluginHTTPException{
+       return null;
+}
+public String handleHTTPPost(HTTPRequest request) throws PluginHTTPException{
+       return null;
+}

-
-       public String handleHTTPPut(HTTPRequest request) throws 
PluginHTTPException{
-               return null;
-       }
-       public String handleHTTPPost(HTTPRequest request) throws 
PluginHTTPException{
-               return null;
-       }
-
-       public void onFoundEdition(long l, USK key){
-               FreenetURI uri = key.getURI();
-               if(runningFetchesByURI.containsKey(uri)) 
runningFetchesByURI.remove(uri);
-               uri = key.getURI().setSuggestedEdition(l);
-               queueURI(uri);
-       }
-
+public void onFoundEdition(long l, USK key){
+       FreenetURI uri = key.getURI();
+       if(runningFetchesByURI.containsKey(uri)) 
runningFetchesByURI.remove(uri);
+       uri = key.getURI().setSuggestedEdition(l);
+       queueURI(uri);
 }
+       
+       
+}


Reply via email to