XMLSpider

[email protected] Thu, 12 Jul 2007 15:42:52 +0000 (UTC)

Author: swatig0
Date: 2007-07-12 15:42:51 +0000 (Thu, 12 Jul 2007)
New Revision: 14044


Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
XMLSpider as plugin

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2007-07-12 14:34:11 UTC (rev 
14043)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2007-07-12 15:42:51 UTC (rev 
14044)
@@ -22,6 +22,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeMap;
+import java.util.Vector;

 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -55,7 +57,6 @@
 import freenet.clients.http.filter.UnsafeContentTypeException;
 import freenet.keys.FreenetURI;
 import freenet.keys.USK;
-import freenet.node.Node;
 import freenet.node.NodeClientCore;
 import freenet.node.RequestStarter;
 import freenet.oldplugins.plugin.HttpPlugin;
@@ -71,12 +72,14 @@
 import freenet.support.MultiValueTable;
 import freenet.support.api.Bucket;
 import freenet.support.api.HTTPRequest;
+
 /**
  * Spider. Produces an index.
  */
-public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless,  FredPluginHTTPAdvanced, HttpPlugin, ClientCallback, 
FoundURICallback ,USKCallback{
+public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless,  FredPluginHTTPAdvanced,HttpPlugin, ClientCallback, 
FoundURICallback ,USKCallback{

        long tProducedIndex;
+       private TreeMap tMap = new TreeMap();
        int count;
        // URIs visited, or fetching, or queued. Added once then forgotten 
about.
        private final HashSet visitedURIs = new HashSet();
@@ -87,14 +90,18 @@
        private final HashMap runningFetchesByURI = new HashMap();
        private final HashMap urisByWord = new HashMap();
        private final HashMap titlesOfURIs = new HashMap();
-       
+       private Vector indices;
+       private int match;
+       private Vector list;
+       private boolean indexing ;
        private static final int minTimeBetweenEachIndexRewriting = 10;
        //private static final String indexFilename = "index.xml";
-       private static final String DEFAULT_INDEX_DIR = "myindex/";
+       private static final String DEFAULT_INDEX_DIR = "myindex3/";
        public Set allowedMIMETypes;
        private static final int MAX_ENTRIES = 5;
        private static final String pluginName = "XML spider";
-       
+       private static final double MAX_TIME_SPENT_INDEXING = 0.5;
+       //MAX_TIME_SPENT_INDEXING is the fraction of the total time  allowed to 
be spent on indexing(max value = 1)
        private static final String indexTitle= "This is an index";
        private static final String indexOwner = "Another anonymous";
        private static final String indexOwnerEmail = null;
@@ -104,12 +111,12 @@
        private final HashMap positionsByWordByURI = new HashMap(); /* String 
(URI) -> HashMap (String (word) -> Integer[] (Positions)) */

        // Can have many; this limit only exists to save memory.
-       private static final int maxParallelRequests = 20;
+       private static final int maxParallelRequests = 100;
        private int maxShownURIs = 50;
        private HashMap urisToNumbers;
        private NodeClientCore core;
        private FetchContext ctx;
-       private final short PRIORITY_CLASS = 
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
+       private final short PRIORITY_CLASS = 
RequestStarter.BULK_SPLITFILE_PRIORITY_CLASS;
        private boolean stopped = true;
        PluginRespirator pr;

@@ -119,7 +126,7 @@
                if((uri.getKeyType()).equals("USK")){
                        if(uri.getSuggestedEdition() < 0)
                                uri = uri.setSuggestedEdition((-1)* 
uri.getSuggestedEdition());
-                       }
+               }
                if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
                        queuedURIList.addLast(uri);
                        visitedURIs.add(uri);
@@ -154,7 +161,8 @@
                                FreenetURI uri = (FreenetURI) 
queuedURIList.removeFirst();
                                queuedURISet.remove(uri);
                                if((uri.getKeyType()).equals("USK")){
-                                                       
+//                             if(uri.getSuggestedEdition() < 0)
+//                                     uri = uri.setSuggestedEdition((-1)* 
uri.getSuggestedEdition());
                                try{
                                        
(ctx.uskManager).subscribe(USK.create(uri),this, false, this);  
                                }catch(Exception e){
@@ -172,7 +180,7 @@
                        try {
                                runningFetchesByURI.put(g.getURI(), g);
                                g.start();
-                               FileWriter outp = new 
FileWriter("logfile23",true);
+                               FileWriter outp = new 
FileWriter("logfile2",true);
                                outp.write("URI "+g.getURI().toString()+'\n');

                                outp.close();
@@ -278,14 +286,7 @@
                        return;
                }

-               try{
-                       FileWriter outp = new FileWriter("onText",true);
-                       outp.write("inside on text\n");
-                       outp.close();
-                       
-               }catch(Exception e2){
-                       
-               }
+               

                if((type != null) && (type.length() != 0) && 
type.toLowerCase().equals("title")
                   && (s != null) && (s.length() != 0) && (s.indexOf('\n') < 
0)) {
@@ -332,15 +333,17 @@

                if(word.length() < 3)
                        return;
+               
+               //word = word.intern();

-               
+
                FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);

                //Integer[] positions = (Integer[]) 
positionsByWordByURI.get(word);

                urisWithWords.add(uri);

-               
+
                /* Word position indexation */
                HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordByURI.get(uri.toString()); /* For a given URI, take as 
key a word, and gives position */

@@ -363,7 +366,7 @@
                                wordPositionsForOneUri.put(word, newPositions);
                        }
                }
-               
+       
                if (uris == null) {
                        urisByWord.put(word, new FreenetURI[] { uri });

@@ -377,13 +380,21 @@
                        newURIs[uris.length] = uri;
                        urisByWord.put(word, newURIs);
                }
-               
-               
-               //produceIndex();
+               //the new word is added here in urisByWord
+               tMap.put(MD5(word), word);
+               long time_indexing = System.currentTimeMillis();
                if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 < 
System.currentTimeMillis()) {
                        try {
-                               produceIndex();
-                               generateIndex();
+                               //produceIndex();
+                               //check();
+                               
+                               if(indexing){
+                               generateIndex2();
+                               produceIndex2();
+                               if((System.currentTimeMillis() - 
time_indexing)/(System.currentTimeMillis() - tProducedIndex) > 
MAX_TIME_SPENT_INDEXING) indexing= false;
+                               else indexing = true;
+                               }
+                               
                        } catch (IOException e) {
                                Logger.error(this, "Caught " + e + " while 
creating index", e);
                        }
@@ -391,18 +402,25 @@
                }

        }
+//     private synchronized void check() throws IOException{
+//             FileWriter outp = new FileWriter("logs/indexing",true);
+//             outp.write("size = "+urisByWord.size()+"\n");
+//             Iterator it = urisByWord.keySet().iterator();
+//             while(it.hasNext())
+//                     outp.write(it.next()+"\n");
+//             outp.close();
+//     }

        private synchronized void produceIndex() throws 
IOException,NoSuchAlgorithmException {
                // Produce the main index file.

                //the number of bits to consider for matching 
                int prefix = 1 ;
-               
+       
                if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
                        System.out.println("No URIs with words");
                        return;
                }
-               
                File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
                StreamResult resultStream;
                resultStream = new StreamResult(outputFile);
@@ -524,6 +542,393 @@

        }

+       private synchronized void produceIndex2() throws 
IOException,NoSuchAlgorithmException {
+               // Produce the main index file.
+               
+               //the number of bits to consider for matching 
+               
+       
+               if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
+                       System.out.println("No URIs with words");
+                       return;
+               }
+               File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
+               StreamResult resultStream;
+               resultStream = new StreamResult(outputFile);
+
+               /* Initialize xml builder */
+               Document xmlDoc = null;
+               DocumentBuilderFactory xmlFactory = null;
+               DocumentBuilder xmlBuilder = null;
+               DOMImplementation impl = null;
+               Element rootElement = null;
+
+               xmlFactory = DocumentBuilderFactory.newInstance();
+
+
+               try {
+                       xmlBuilder = xmlFactory.newDocumentBuilder();
+               } catch(javax.xml.parsers.ParserConfigurationException e) {
+                       /* Will (should ?) never happen */
+                       Logger.error(this, "Spider: Error while initializing 
XML generator: "+e.toString());
+                       return;
+               }
+
+               impl = xmlBuilder.getDOMImplementation();
+               /* Starting to generate index */
+               xmlDoc = impl.createDocument(null, "main_index", null);
+               rootElement = xmlDoc.getDocumentElement();
+
+               /* Adding header to the index */
+               Element headerElement = xmlDoc.createElement("header");
+
+               /* -> title */
+               Element subHeaderElement = xmlDoc.createElement("title");
+               Text subHeaderText = xmlDoc.createTextNode(indexTitle);
+               
+               subHeaderElement.appendChild(subHeaderText);
+               headerElement.appendChild(subHeaderElement);
+
+               /* -> owner */
+               subHeaderElement = xmlDoc.createElement("owner");
+               subHeaderText = xmlDoc.createTextNode(indexOwner);
+               
+               subHeaderElement.appendChild(subHeaderText);
+               headerElement.appendChild(subHeaderElement);
+               
+               /* -> owner email */
+               if(indexOwnerEmail != null) {
+                       subHeaderElement = xmlDoc.createElement("email");
+                       subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
+                       
+                       subHeaderElement.appendChild(subHeaderText);
+                       headerElement.appendChild(subHeaderElement);
+               }
+
+               
+               //String[] words = (String[]) urisByWord.keySet().toArray(new 
String[urisByWord.size()]);
+               //Arrays.sort(words);
+               
+               Element prefixElement = xmlDoc.createElement("prefix");
+               //prefixElement.setAttribute("value",match+"");
+               //this match will be set after processing the TreeMap
+       
+
+               
+               //all index files are ready
+               /* Adding word index */
+               Element keywordsElement = xmlDoc.createElement("keywords");
+               for(int i = 0;i<indices.size();i++){
+                       
//generateSubIndex(DEFAULT_INDEX_DIR+"index_"+Integer.toHexString(i)+".xml");
+                       Element subIndexElement = 
xmlDoc.createElement("subIndex");
+//                     if(i<=9)
+//                     subIndexElement.setAttribute("key",i+"");
+//                     else
+//                             
subIndexElement.setAttribute("key",Integer.toHexString(i));
+                       subIndexElement.setAttribute("key", (String) 
indices.elementAt(i));
+                       //the subindex element key will contain the bits used 
for matching in that subindex
+                       keywordsElement.appendChild(subIndexElement);
+               }
+               
+               prefixElement.setAttribute("value",match+"");
+               // make sure that prefix is the first child of root Element
+               rootElement.appendChild(prefixElement);
+               rootElement.appendChild(headerElement);
+               
+               //rootElement.appendChild(filesElement);
+               rootElement.appendChild(keywordsElement);
+
+               /* Serialization */
+               DOMSource domSource = new DOMSource(xmlDoc);
+               TransformerFactory transformFactory = 
TransformerFactory.newInstance();
+               Transformer serializer;
+
+               try {
+                       serializer = transformFactory.newTransformer();
+               } catch(javax.xml.transform.TransformerConfigurationException 
e) {
+                       Logger.error(this, "Spider: Error while serializing XML 
(transformFactory.newTransformer()): "+e.toString());
+                       return;
+               }
+
+               serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+               serializer.setOutputProperty(OutputKeys.INDENT,"yes");
+               
+               /* final step */
+               try {
+                       serializer.transform(domSource, resultStream);
+               } catch(javax.xml.transform.TransformerException e) {
+                       Logger.error(this, "Spider: Error while serializing XML 
(transform()): "+e.toString());
+                       return;
+               }
+
+               if(Logger.shouldLog(Logger.MINOR, this))
+                       Logger.minor(this, "Spider: indexes regenerated.");
+       
+       //the main xml file is generated 
+       //now as each word is generated enter it into the respective subindex
+       //now the parsing will start and nodes will be added as needed 
+               
+
+       }
+       private synchronized void generateIndex2() throws Exception{
+               // now we the tree map and we need to use the sorted (md5s) to 
generate the xml indices
+               if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
+                       System.out.println("No URIs with words");
+                       return;
+               }
+               FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new 
FreenetURI[urisWithWords.size()]);
+               urisToNumbers = new HashMap();
+               for (int i = 0; i < uris.length; i++) {
+                       urisToNumbers.put(uris[i], new Integer(i));
+                       }
+               indices = new Vector();
+               int prefix = 1;
+               match = 1;
+               Vector list = new Vector();
+               //String str = tMap.firstKey();
+               Iterator it = tMap.keySet().iterator();
+               FileWriter outp = new FileWriter("indexing");
+               outp.write("size = "+tMap.size()+"\n");
+               outp.close();
+               String str = (String) it.next();
+               int i = 0,index =0;
+               while(it.hasNext())
+               {
+                outp = new FileWriter("indexing",true);
+                       String key =(String) it.next();
+                       outp.write(key + "\n");
+                       outp.close();
+                       if(key.substring(0, prefix).equals(str.substring(0, 
prefix))) 
+                               {i++;
+                               list.add(key);
+                               }
+                       else {
+               generateSubIndex(prefix,list);
+               str = key;
+               list = new Vector();
+//             int count = list.size();
+//             if(count > MAX_ENTRIES){
+//                     //the index has to be split up
+//                     generateSubIndex(prefix,list);                  
+//             }
+//             else generateXML(list,prefix);
+//             str = key;
+//             list = new Vector();
+               }
+                       //
+               // this variable will keep the number of digits to be used 
+               }
+               
+               generateSubIndex(prefix,list);
+       }
+       private synchronized Vector subVector(Vector list, int begin, int end){
+               Vector tmp = new Vector();
+               for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i));
+               return tmp;
+       }
+       
+       private synchronized void generateSubIndex(int p,Vector list) throws 
Exception{
+               
+               if(list.size() < MAX_ENTRIES)
+               {
+                       //the index can be generated from this list
+                       generateXML(list,p);
+               }
+               else
+               {
+                       //this means that prefix needs to be incremented
+                       if(match <= p) match = p+1; 
+                       int prefix = p+1;
+                       int i =0;
+                       String str = (String) list.elementAt(i);
+                       int index=0;
+                       while(i<list.size())
+                       {
+                               String key = (String) list.elementAt(i);
+                               if((key.substring(0, 
prefix)).equals(str.substring(0, prefix))) 
+                                       {
+                                       //index = i;
+                                       i++;
+                                       }
+                               else {
+                                       
//generateXML(subVector(list,index,i-1),prefix);
+                                       
generateSubIndex(prefix,subVector(list,index,i-1));
+                                       index = i;
+                                       str = key;
+                               }
+                               
+
+                       }
+                       generateSubIndex(prefix,subVector(list,index,i-1));
+               }
+       }       
+               
+
+       private synchronized void generateXML(Vector list, int prefix)
+       {
+               String p = ((String) list.elementAt(0)).substring(0, prefix);
+               indices.add(p);
+               File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+p+".xml");
+               //indices.add(p);
+               StreamResult resultStream;
+               resultStream = new StreamResult(outputFile);
+
+               /* Initialize xml builder */
+               Document xmlDoc = null;
+               DocumentBuilderFactory xmlFactory = null;
+               DocumentBuilder xmlBuilder = null;
+               DOMImplementation impl = null;
+               Element rootElement = null;
+
+               xmlFactory = DocumentBuilderFactory.newInstance();
+
+
+               try {
+                       xmlBuilder = xmlFactory.newDocumentBuilder();
+               } catch(javax.xml.parsers.ParserConfigurationException e) {
+                       /* Will (should ?) never happen */
+                       Logger.error(this, "Spider: Error while initializing 
XML generator: "+e.toString());
+                       return;
+               }
+
+
+               impl = xmlBuilder.getDOMImplementation();
+
+               /* Starting to generate index */
+
+               xmlDoc = impl.createDocument(null, "sub_index", null);
+               rootElement = xmlDoc.getDocumentElement();
+
+               /* Adding header to the index */
+               Element headerElement = xmlDoc.createElement("header");
+
+               /* -> title */
+               Element subHeaderElement = xmlDoc.createElement("title");
+               Text subHeaderText = xmlDoc.createTextNode(indexTitle);
+               
+               subHeaderElement.appendChild(subHeaderText);
+               headerElement.appendChild(subHeaderElement);
+
+               /* -> owner */
+               subHeaderElement = xmlDoc.createElement("owner");
+               subHeaderText = xmlDoc.createTextNode(indexOwner);
+               
+               subHeaderElement.appendChild(subHeaderText);
+               headerElement.appendChild(subHeaderElement);
+               
+       
+               /* -> owner email */
+               if(indexOwnerEmail != null) {
+                       subHeaderElement = xmlDoc.createElement("email");
+                       subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
+                       
+                       subHeaderElement.appendChild(subHeaderText);
+                       headerElement.appendChild(subHeaderElement);
+               }
+
+               
+               Element filesElement = xmlDoc.createElement("files"); /* 
filesElement != fileElement */
+
+               Element EntriesElement = xmlDoc.createElement("entries");
+               EntriesElement.setNodeValue(list.size()+"");
+               EntriesElement.setAttribute("value", list.size()+"");
+               //all index files are ready
+               /* Adding word index */
+               Element keywordsElement = xmlDoc.createElement("keywords");
+               //words to be added 
+               Vector fileid = new Vector();
+               for(int i =0;i<list.size();i++)
+               {
+                       Element wordElement = xmlDoc.createElement("word");
+                       String str = (String) tMap.get(list.elementAt(i));
+                       wordElement.setAttribute("v",str );
+                       FreenetURI[] urisForWord = (FreenetURI[]) 
urisByWord.get(str);
+//                     
+                       for (int j = 0; j < urisForWord.length; j++) {
+                               FreenetURI uri = urisForWord[j];
+                               Integer x = (Integer) urisToNumbers.get(uri);
+                               
+                               if (x == null) {
+                                       Logger.error(this, "Eh?");
+                                       continue;
+                               }
+//
+                               Element uriElement = 
xmlDoc.createElement("file");
+                               Element fileElement = 
xmlDoc.createElement("file");
+                               uriElement.setAttribute("id", x.toString());
+                               fileElement.setAttribute("id", x.toString());
+                               fileElement.setAttribute("key", uri.toString());
+////                           /* Position by position */
+                               HashMap positionsForGivenWord = 
(HashMap)positionsByWordByURI.get(uri.toString());
+                               Integer[] positions = 
(Integer[])positionsForGivenWord.get(str);
+
+                               StringBuffer positionList = new StringBuffer();
+
+                               for(int k=0; k < positions.length ; k++) {
+                                       if(k!=0)
+                                               positionList.append(',');
+
+                                       
positionList.append(positions[k].toString());
+                               }
+                               
+                               
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
+                               int l;
+                               wordElement.appendChild(uriElement);
+//                     for(l = 
0;l<filesElement.getChildNodes().getLength();l++)
+//                             { Element file = (Element) 
filesElement.getChildNodes().item(l);
+//                             if(file.getAttribute("id").equals(x.toString()))
+//                             
+//                             break;
+//                             }
+                               
+//                             if(l>=filesElement.getChildNodes().getLength())
+//                             filesElement.appendChild(fileElement);
+                               if(!fileid.contains(x.toString()))
+                               {
+                                       fileid.add(x.toString());
+                                       filesElement.appendChild(fileElement);
+                               }
+                       }
+                       
+                       //Element keywordsElement = (Element) 
root.getElementsByTagName("keywords").item(0);
+                       keywordsElement.appendChild(wordElement);
+//                             
+               }
+//     
+               
+               rootElement.appendChild(EntriesElement);
+               rootElement.appendChild(headerElement);
+               rootElement.appendChild(filesElement);
+               rootElement.appendChild(keywordsElement);
+
+               /* Serialization */
+               DOMSource domSource = new DOMSource(xmlDoc);
+               TransformerFactory transformFactory = 
TransformerFactory.newInstance();
+               Transformer serializer;
+
+               try {
+                       serializer = transformFactory.newTransformer();
+               } catch(javax.xml.transform.TransformerConfigurationException 
e) {
+                       Logger.error(this, "Spider: Error while serializing XML 
(transformFactory.newTransformer()): "+e.toString());
+                       return;
+               }
+
+
+               serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+               serializer.setOutputProperty(OutputKeys.INDENT,"yes");
+               
+               /* final step */
+               try {
+                       serializer.transform(domSource, resultStream);
+               } catch(javax.xml.transform.TransformerException e) {
+                       Logger.error(this, "Spider: Error while serializing XML 
(transform()): "+e.toString());
+                       return;
+               }
+
+               if(Logger.shouldLog(Logger.MINOR, this))
+                       Logger.minor(this, "Spider: indexes regenerated.");
+       
+       }
        private synchronized void generateIndex() throws Exception{
                String[] words = (String[]) urisByWord.keySet().toArray(new 
String[urisByWord.size()]);
                Arrays.sort(words);
@@ -901,6 +1306,7 @@
                ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
        //      ctx.allowedMIMETypes.add("text/html"); 
                tProducedIndex = System.currentTimeMillis();
+               indexing = true;
        }


@@ -1066,111 +1472,6 @@
                        Logger.minor(this, "Spider: indexes regenerated.");
        }

-       
-       public String handleHTTPGet(HTTPRequest request) throws 
PluginHTTPException{
-               StringBuffer out = new StringBuffer();
-               // need to produce pretty html
-               //later fredpluginhttpadvanced will give the interface
-               //this brings us to the page from visit
-       
-               appendDefaultPageStart(out,null);
-               String uriParam = request.getParam("adduri");
-               if(uriParam != null && uriParam.length() != 0)
-                       {
-                       try {
-                               FreenetURI uri = new FreenetURI(uriParam);
-                               synchronized (this) {
-                                       failedURIs.remove(uri);
-                                       visitedURIs.remove(uri);
-                               }
-                               out.append("<p>URI added :"+uriParam+"</p>");
-                               queueURI(uri);
-                               startSomeRequests();
-                       } catch (MalformedURLException mue1) {
-                               out.append("<p>MalFormed URI: "+uriParam+"</p");
-                       }
-                       }
-               
-//             if(action == null || action.length() == 0){
-//                     //put the default post fields
-//                     appendDefaultPageStart(out,null);
-//                     
-//             } else if ("list".equals(action)) {
-//                     String listName = request.getParam("listName", null);
-//                     out.append("<p>list clicked</CENTER></BODY></HTML>");
-//                     if(listName == null){
-//                             //display all th
-//                             Set runningFetches = new 
HashMap(runningFetchesByURI).keySet();
-//                             List queued = new ArrayList(queuedURIList);
-//                             Set visited = new HashSet(visitedURIs);
-//                             Set failed = new HashSet(failedURIs);
-//                             
-//                             out.append("<p><h3>Running Fetches</h3>");
-//                             Iterator it=runningFetches.iterator();
-//                             while(it.hasNext()){
-//                                     
out.append("<code>"+(it.next()).toString()+"</code><br>");
-//                             }
-//                     }
-//                     else{
-//                             //display individual results
-//                     }
-//             }
-//             else if ("add".equals(action)) {
-//                     String uriParam = request.getParam("key");
-//                     try {
-//                             FreenetURI uri = new FreenetURI(uriParam);
-//                             synchronized (this) {
-//                                     failedURIs.remove(uri);
-//                                     visitedURIs.remove(uri);
-//                             }
-//                             queueURI(uri);
-//                             startSomeRequests();
-//                     } catch (MalformedURLException mue1) {
-//                             out.append("<h1>URL invalid</h1>");
-////                           sendSimpleResponse(context, "URL invalid", "The 
given URI is not valid.");
-////                           return;
-//                     }
-//                     //not really necc
-////                   MultiValueTable responseHeaders = new MultiValueTable();
-////                   responseHeaders.put("Location", "?action=list");
-////                   context.sendReplyHeaders(301, "Redirect", 
responseHeaders, "text/html; charset=utf-8", 0);
-//                     
-//             
-//             }
-               
-               return out.toString();
-       }
-       private void appendDefaultPageStart(StringBuffer out, String 
stylesheet) {
-               count ++;
-               out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
-               if(stylesheet != null)
-                       out.append("<link href=\""+stylesheet+"\" 
type=\"text/css\" rel=\"stylesheet\" />");
-               out.append("</HEAD><BODY>\n");
-               out.append("<CENTER><H1>" + pluginName + 
"</H1><BR/><BR/><BR/>\n");
-               out.append("Add uri:");
-               out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
-               out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
-               Set runningFetches = runningFetchesByURI.keySet();
-               out.append("<p><h3>Running Fetches</h3></p>");
-               Set visited = new HashSet(visitedURIs);
-               List queued = new ArrayList(queuedURIList);
-               Set failed = new HashSet(failedURIs);
-               Iterator it=queued.iterator();
-               out.append("<br/>Size :"+runningFetches.size());
-               out.append("<br/>Size :"+queued.size());
-               out.append("<br/>Size :"+visited.size());
-               out.append("<br/>Size :"+failed.size());
-               out.append("<br/>Count : "+count);
-               while(it.hasNext()){
-                       
out.append("<code>"+(it.next()).toString()+"</code><br>");
-               }
-       }
-       public String handleHTTPPut(HTTPRequest request) throws 
PluginHTTPException{
-               return null;
-       }
-       public String handleHTTPPost(HTTPRequest request) throws 
PluginHTTPException{
-               return null;
-       }
 public void terminate(){
        synchronized (this) {
                stopped = true;
@@ -1180,9 +1481,9 @@

 public void runPlugin(PluginRespirator pr){
        this.pr = pr;
-       this.core = ((Node) pr.getNode()).clientCore;
+       this.core = pr.getNode().clientCore;
        this.ctx = core.makeClient((short) 0).getFetchContext();
-       ctx.maxSplitfileBlockRetries = 3;
+       ctx.maxSplitfileBlockRetries = 10;
        ctx.maxNonSplitfileRetries = 10;
        ctx.maxTempLength = 2 * 1024 * 1024;
        ctx.maxOutputLength = 2 * 1024 * 1024;
@@ -1190,9 +1491,11 @@
        allowedMIMETypes.add(new String("text/html"));
        allowedMIMETypes.add(new String("text/plain"));
        allowedMIMETypes.add(new String("application/xhtml+xml"));
+//     allowedMIMETypes.add(new String("application/zip"));
        ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
+//     ctx.allowedMIMETypes.add("text/html"); 
        tProducedIndex = System.currentTimeMillis();
-       
+       indexing = true;
        stopped = false;
        count = 0;
        Thread starterThread = new Thread("Spider Plugin Starter") {
@@ -1207,7 +1510,111 @@
        starterThread.start();
 }

+public String handleHTTPGet(HTTPRequest request) throws PluginHTTPException{
+       StringBuffer out = new StringBuffer();
+       // need to produce pretty html
+       //later fredpluginhttpadvanced will give the interface
+       //this brings us to the page from visit

+       appendDefaultPageStart(out,null);
+       String uriParam = request.getParam("adduri");
+       if(uriParam != null && uriParam.length() != 0)
+               {
+               try {
+                       FreenetURI uri = new FreenetURI(uriParam);
+                       synchronized (this) {
+                               failedURIs.remove(uri);
+                               visitedURIs.remove(uri);
+                       }
+                       out.append("<p>URI added :"+uriParam+"</p>");
+                       queueURI(uri);
+                       startSomeRequests();
+               } catch (MalformedURLException mue1) {
+                       out.append("<p>MalFormed URI: "+uriParam+"</p");
+               }
+               }
+       
+//     if(action == null || action.length() == 0){
+//             //put the default post fields
+//             appendDefaultPageStart(out,null);
+//             
+//     } else if ("list".equals(action)) {
+//             String listName = request.getParam("listName", null);
+//             out.append("<p>list clicked</CENTER></BODY></HTML>");
+//             if(listName == null){
+//                     //display all th
+//                     Set runningFetches = new 
HashMap(runningFetchesByURI).keySet();
+//                     List queued = new ArrayList(queuedURIList);
+//                     Set visited = new HashSet(visitedURIs);
+//                     Set failed = new HashSet(failedURIs);
+//                     
+//                     out.append("<p><h3>Running Fetches</h3>");
+//                     Iterator it=runningFetches.iterator();
+//                     while(it.hasNext()){
+//                             
out.append("<code>"+(it.next()).toString()+"</code><br>");
+//                     }
+//             }
+//             else{
+//                     //display individual results
+//             }
+//     }
+//     else if ("add".equals(action)) {
+//             String uriParam = request.getParam("key");
+//             try {
+//                     FreenetURI uri = new FreenetURI(uriParam);
+//                     synchronized (this) {
+//                             failedURIs.remove(uri);
+//                             visitedURIs.remove(uri);
+//                     }
+//                     queueURI(uri);
+//                     startSomeRequests();
+//             } catch (MalformedURLException mue1) {
+//                     out.append("<h1>URL invalid</h1>");
+////                   sendSimpleResponse(context, "URL invalid", "The given 
URI is not valid.");
+////                   return;
+//             }
+//             //not really necc
+////           MultiValueTable responseHeaders = new MultiValueTable();
+////           responseHeaders.put("Location", "?action=list");
+////           context.sendReplyHeaders(301, "Redirect", responseHeaders, 
"text/html; charset=utf-8", 0);
+//             
+//     
+//     }
+       
+       return out.toString();
+}
+private void appendDefaultPageStart(StringBuffer out, String stylesheet) {
+       count ++;
+       out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+       if(stylesheet != null)
+               out.append("<link href=\""+stylesheet+"\" type=\"text/css\" 
rel=\"stylesheet\" />");
+       out.append("</HEAD><BODY>\n");
+       out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
+       out.append("Add uri:");
+       out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\" 
/><br/><br/>");
+       out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
+       Set runningFetches = runningFetchesByURI.keySet();
+       out.append("<p><h3>Running Fetches</h3></p>");
+       Set visited = new HashSet(visitedURIs);
+       List queued = new ArrayList(queuedURIList);
+       Set failed = new HashSet(failedURIs);
+       Iterator it=queued.iterator();
+       out.append("<br/>Size :"+runningFetches.size());
+       out.append("<br/>Size :"+queued.size());
+       out.append("<br/>Size :"+visited.size());
+       out.append("<br/>Size :"+failed.size());
+       out.append("<br/>Count : "+count);
+       while(it.hasNext()){
+               out.append("<code>"+(it.next()).toString()+"</code><br>");
+       }
+}
+public String handleHTTPPut(HTTPRequest request) throws PluginHTTPException{
+       return null;
+}
+public String handleHTTPPost(HTTPRequest request) throws PluginHTTPException{
+       return null;
+}
+
 public void onFoundEdition(long l, USK key){
        FreenetURI uri = key.getURI();
        if(runningFetchesByURI.containsKey(uri)) 
runningFetchesByURI.remove(uri);

[freenet-cvs] r14044 - trunk/plugins/XMLSpider

Reply via email to