Author: swatig0
Date: 2007-08-16 20:30:56 +0000 (Thu, 16 Aug 2007)
New Revision: 14727
Modified:
trunk/plugins/XMLSpider/XMLSpider.java
Log:
URI-id transformation
Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java 2007-08-16 20:28:11 UTC (rev
14726)
+++ trunk/plugins/XMLSpider/XMLSpider.java 2007-08-16 20:30:56 UTC (rev
14727)
@@ -4,6 +4,7 @@
package plugins.XMLSpider;
import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
@@ -12,11 +13,14 @@
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Vector;
@@ -29,10 +33,11 @@
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
-
+import org.w3c.dom.Attr;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import freenet.client.ClientMetadata;
@@ -44,6 +49,7 @@
import freenet.client.async.ClientCallback;
import freenet.client.async.ClientGetter;
import freenet.client.async.USKCallback;
+import freenet.clients.http.PageMaker;
import freenet.clients.http.ToadletContext;
import freenet.clients.http.ToadletContextClosedException;
import freenet.clients.http.filter.ContentFilter;
@@ -61,7 +67,9 @@
import freenet.pluginmanager.FredPluginThreadless;
import freenet.pluginmanager.PluginHTTPException;
import freenet.pluginmanager.PluginRespirator;
+import freenet.support.HTMLNode;
import freenet.support.Logger;
+import freenet.support.MultiValueTable;
import freenet.support.api.Bucket;
import freenet.support.api.HTTPRequest;
@@ -70,7 +78,7 @@
* In case the size of the index grows up a specific threshold the index is
split into several subindices.
* The indexing key is the md5 hash of the word.
*
- * @author swati
+ * @author swati goyal
*
*/
public class XMLSpider implements FredPlugin, FredPluginHTTP,
FredPluginThreadless, FredPluginHTTPAdvanced,HttpPlugin, ClientCallback,
USKCallback{
@@ -87,13 +95,14 @@
* Lists the uris that have been vistied by the spider
*/
public final HashSet visitedURIs = new HashSet();
+ private final HashSet urisWithWords = new HashSet();
private final HashSet idsWithWords = new HashSet();
/**
*
* Lists the uris that were visited but failed.
*/
public final HashSet failedURIs = new HashSet();
-
+
private final HashSet queuedURISet = new HashSet();
/**
*
@@ -101,9 +110,9 @@
*/
public final LinkedList queuedURIList = new LinkedList();
private final HashMap runningFetchesByURI = new HashMap();
-
+ private final HashMap urisByWord = new HashMap();
private final HashMap idsByWord = new HashMap();
-
+ private final HashMap titlesOfURIs = new HashMap();
private final HashMap titlesOfIds = new HashMap();
private final HashMap uriIds = new HashMap();
private final HashMap idUris = new HashMap();
@@ -120,14 +129,14 @@
private Vector indices;
private int match;
private int id;
-
+ private Vector list;
private boolean indexing ;
-
+
private static final int minTimeBetweenEachIndexRewriting = 10;
- /**
- * directory where the generated indices are stored.
- * Needs to be created before it can be used
- */
+/**
+ * directory where the generated indices are stored.
+ * Needs to be created before it can be used
+ */
public static final String DEFAULT_INDEX_DIR = "myindex4/";
/**
* Lists the allowed mime types of the fetched page.
@@ -140,7 +149,7 @@
* maximum value = 1; minimum value = 0.
*/
public static final double MAX_TIME_SPENT_INDEXING = 0.5;
-
+
private static final String indexTitle= "XMLSpider index";
private static final String indexOwner = "Freenet";
private static final String indexOwnerEmail = null;
@@ -153,17 +162,17 @@
// Can have many; this limit only exists to save memory.
private static final int maxParallelRequests = 100;
private int maxShownURIs = 15;
-
+
private NodeClientCore core;
private FetchContext ctx;
private final short PRIORITY_CLASS =
RequestStarter.BULK_SPLITFILE_PRIORITY_CLASS;
private boolean stopped = true;
PluginRespirator pr;
-
- /**
- * Adds the found uri to the list of to-be-retrieved uris. <p>Every usk
uri added as ssk.
- * @param uri the new uri that needs to be fetched for further indexing
- */
+
+/**
+ * Adds the found uri to the list of to-be-retrieved uris. <p>Every usk uri
added as ssk.
+ * @param uri the new uri that needs to be fetched for further indexing
+ */
public synchronized void queueURI(FreenetURI uri) {
if((uri.getKeyType()).equals("USK")){
if(uri.getSuggestedEdition() < 0)
@@ -226,18 +235,18 @@
}
}
}
+
-
private ClientGetter makeGetter(FreenetURI uri) {
ClientGetter g = new ClientGetter(this,
core.requestStarters.chkFetchScheduler, core.requestStarters.sskFetchScheduler,
uri, ctx, PRIORITY_CLASS, this, null, null);
return g;
}
- /**
- * Processes the successfully fetched uri for further outlinks.
- *
- * @param result
- * @param state
- */
+/**
+ * Processes the successfully fetched uri for further outlinks.
+ *
+ * @param result
+ * @param state
+ */
public void onSuccess(FetchResult result, ClientGetter state) {
FreenetURI uri = state.getURI();
@@ -270,7 +279,7 @@
data.free();
}
}
-
+
public void onFailure(FetchException e, ClientGetter state) {
FreenetURI uri = state.getURI();
@@ -297,14 +306,14 @@
}
/**
- * generates the main index file that can be used by librarian for
searching in the list of
- * subindices
- *
- * @param void
- * @author swati
- * @throws IOException
- * @throws NoSuchAlgorithmException
- */
+ * generates the main index file that can be used by librarian for searching
in the list of
+ * subindices
+ *
+ * @param void
+ * @author swati
+ * @throws IOException
+ * @throws NoSuchAlgorithmException
+ */
private synchronized void produceIndex2() throws
IOException,NoSuchAlgorithmException {
// Produce the main index file.
@@ -428,7 +437,7 @@
System.out.println("No URIs with words");
return;
}
-
+
indices = new Vector();
int prefix = 1;
match = 1;
@@ -460,7 +469,7 @@
for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i));
return tmp;
}
-
+
private synchronized void generateSubIndex(int p,Vector list) throws
Exception{
/*
* if the list is less than max allowed entries in a file then
directly generate the xml
@@ -497,12 +506,12 @@
}
}
- /**
- * generates the xml index with the given list of words with prefix
number of matching bits in md5
- * @param list list of the words to be added in the index
- * @param prefix number of matching bits of md5
- * @throws Exception
- */
+/**
+ * generates the xml index with the given list of words with prefix number of
matching bits in md5
+ * @param list list of the words to be added in the index
+ * @param prefix number of matching bits of md5
+ * @throws Exception
+ */
public synchronized void generateXML (Vector list, int prefix) throws
Exception
{
String p = ((String) list.elementAt(0)).substring(0, prefix);
@@ -616,20 +625,137 @@
Logger.minor(this, "Spider: indexes regenerated.");
}
-
+
public void handleGet(HTTPRequest request, ToadletContext context)
throws IOException, ToadletContextClosedException {
- /*
- * ignore
- */
+ String action = request.getParam("action");
+ PageMaker pageMaker = context.getPageMaker();
+ if ((action == null) || (action.length() == 0)) {
+ MultiValueTable responseHeaders = new MultiValueTable();
+ responseHeaders.put("Location", "?action=list");
+ context.sendReplyHeaders(301, "Redirect",
responseHeaders, "text/html; charset=utf-8", 0);
+ return;
+ } else if ("list".equals(action)) {
+
+ String listName = request.getParam("listName", null);
+ HTMLNode pageNode = pageMaker.getPageNode("The XML
Spider", context);
+ HTMLNode contentNode =
pageMaker.getContentNode(pageNode);
+ /* create copies for multi-threaded use */
+ if (listName == null) {
+ Map runningFetches = new
HashMap(runningFetchesByURI);
+ List queued = new ArrayList(queuedURIList);
+ Set visited = new HashSet(visitedURIs);
+ Set failed = new HashSet(failedURIs);
+ contentNode.addChild(createNavbar(pageMaker,
runningFetches.size(), queued.size(), visited.size(), failed.size()));
+ contentNode.addChild(createAddBox(pageMaker,
context));
+ contentNode.addChild(createList(pageMaker,
"Running FetcheIIIs", "running", runningFetches.keySet(), maxShownURIs));
+ contentNode.addChild(createList(pageMaker,
"Queued URIs", "queued", queued, maxShownURIs));
+ contentNode.addChild(createList(pageMaker,
"Visited URIs", "visited", visited, maxShownURIs));
+ contentNode.addChild(createList(pageMaker,
"Failed URIs", "failed", failed, maxShownURIs));
+ } else {
+ contentNode.addChild(createBackBox(pageMaker));
+ if ("failed".equals(listName)) {
+ Set failed = new HashSet(failedURIs);
+
contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed,
-1));
+ } else if ("visited".equals(listName)) {
+ Set visited = new HashSet(visitedURIs);
+
contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited,
-1));
+ } else if ("queued".equals(listName)) {
+ List queued = new
ArrayList(queuedURIList);
+
contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued,
-1));
+ } else if ("running".equals(listName)) {
+ Map runningFetches = new
HashMap(runningFetchesByURI);
+
contentNode.addChild(createList(pageMaker, "Running Fetches", "running",
runningFetches.keySet(), -1));
+ }
+ }
+ MultiValueTable responseHeaders = new MultiValueTable();
+ byte[] responseBytes =
pageNode.generate().getBytes("utf-8");
+ context.sendReplyHeaders(200, "OK", responseHeaders,
"text/html; charset=utf-8", responseBytes.length);
+ context.writeData(responseBytes);
+ } else if ("add".equals(action)) {
+ String uriParam = request.getParam("key");
+ try {
+ FreenetURI uri = new FreenetURI(uriParam);
+ synchronized (this) {
+ failedURIs.remove(uri);
+ visitedURIs.remove(uri);
+ }
+ queueURI(uri);
+ startSomeRequests();
+ } catch (MalformedURLException mue1) {
+ sendSimpleResponse(context, "URL invalid", "The
given URI is not valid.");
+ return;
+ }
+ MultiValueTable responseHeaders = new MultiValueTable();
+ responseHeaders.put("Location", "?action=list");
+ context.sendReplyHeaders(301, "Redirect",
responseHeaders, "text/html; charset=utf-8", 0);
+ return;
+ }
}
-
public void handlePost(HTTPRequest request, ToadletContext context)
throws IOException {
- /*
- * ignore
- */
}
+
+ private void sendSimpleResponse(ToadletContext context, String title,
String message) throws ToadletContextClosedException, IOException {
+ PageMaker pageMaker = context.getPageMaker();
+ HTMLNode pageNode = pageMaker.getPageNode(title, context);
+ HTMLNode contentNode = pageMaker.getContentNode(pageNode);
+ HTMLNode infobox =
contentNode.addChild(pageMaker.getInfobox("infobox-alter", title));
+ HTMLNode infoboxContent = pageMaker.getContentNode(infobox);
+ infoboxContent.addChild("#", message);
+ byte[] responseBytes = pageNode.generate().getBytes("utf-8");
+ context.sendReplyHeaders(200, "OK", new MultiValueTable(),
"text/html; charset=utf-8", responseBytes.length);
+ context.writeData(responseBytes);
+ }
+
+ private HTMLNode createBackBox(PageMaker pageMaker) {
+ HTMLNode backbox = pageMaker.getInfobox((String) null);
+ HTMLNode backContent = pageMaker.getContentNode(backbox);
+ backContent.addChild("#", "Return to the ");
+ backContent.addChild("a", "href", "?action=list", "list of all
URIs");
+ backContent.addChild("#", ".");
+ return backbox;
+ }
+
+ private HTMLNode createAddBox(PageMaker pageMaker, ToadletContext ctx) {
+ HTMLNode addBox = pageMaker.getInfobox("Add a URI");
+ HTMLNode formNode =
pageMaker.getContentNode(addBox).addChild("form", new String[] { "action",
"method" }, new String[] { "", "get" });
+ formNode.addChild("input", new String[] { "type", "name",
"value" }, new String[] { "hidden", "action", "add" });
+ formNode.addChild("input", new String[] { "type", "size",
"name", "value" }, new String[] { "text", "40", "key", "" });
+ formNode.addChild("input", new String[] { "type", "value" },
new String[] { "submit", "Add URI" });
+ return addBox;
+ }
+ private HTMLNode createNavbar(PageMaker pageMaker, int running, int
queued, int visited, int failed) {
+ HTMLNode navbar = pageMaker.getInfobox("navbar", "Page
Navigation");
+ HTMLNode list = pageMaker.getContentNode(navbar).addChild("ul");
+ list.addChild("li").addChild("a", "href", "#running", "Running
(" + running + ')');
+ list.addChild("li").addChild("a", "href", "#queued", "Queued ("
+ queued + ')');
+ list.addChild("li").addChild("a", "href", "#visited", "Visited
(" + visited + ')');
+ list.addChild("li").addChild("a", "href", "#failed", "Failed ("
+ failed + ')');
+ return navbar;
+ }
+
+ private HTMLNode createList(PageMaker pageMaker, String listName,
String anchorName, Collection collection, int maxCount) {
+ HTMLNode listNode = new HTMLNode("div");
+ listNode.addChild("a", "name", anchorName);
+ HTMLNode listBox = pageMaker.getInfobox(listName);
+ HTMLNode listContent = pageMaker.getContentNode(listBox);
+ listNode.addChild(listBox);
+ Iterator collectionItems = collection.iterator();
+ int itemCount = 0;
+ while (collectionItems.hasNext()) {
+ FreenetURI uri = (FreenetURI) collectionItems.next();
+ listContent.addChild("#", uri.toString());
+ listContent.addChild("br");
+ if (itemCount++ == maxCount) {
+ listContent.addChild("br");
+ listContent.addChild("a", "href",
"?action=list&listName=" + anchorName, "Show all\u2026");
+ break;
+ }
+ }
+ return listNode;
+ }
+
/**
* @see freenet.oldplugins.plugin.Plugin#getPluginName()
*/
@@ -641,7 +767,7 @@
* @see
freenet.oldplugins.plugin.Plugin#setPluginManager(freenet.oldplugins.plugin.PluginManager)
*/
public void setPluginManager(PluginManager pluginManager) {
-
+
this.core = pluginManager.getClientCore();
this.ctx = core.makeClient((short) 0).getFetchContext();
ctx.maxSplitfileBlockRetries = 10;
@@ -652,9 +778,9 @@
allowedMIMETypes.add(new String("text/html"));
allowedMIMETypes.add(new String("text/plain"));
allowedMIMETypes.add(new String("application/xhtml+xml"));
- // allowedMIMETypes.add(new String("application/zip"));
+ // allowedMIMETypes.add(new String("application/zip"));
ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
- // ctx.allowedMIMETypes.add("text/html");
+ // ctx.allowedMIMETypes.add("text/html");
tProducedIndex = System.currentTimeMillis();
indexing = true;
}
@@ -685,25 +811,22 @@
// Ignore
}
private static String convertToHex(byte[] data) {
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < data.length; i++) {
- int halfbyte = (data[i] >>> 4) & 0x0F;
- int two_halfs = 0;
- do {
- if ((0 <= halfbyte) && (halfbyte <= 9))
- buf.append((char) ('0' + halfbyte));
- else
- buf.append((char) ('a' + (halfbyte -
10)));
- halfbyte = data[i] & 0x0F;
- } while(two_halfs++ < 1);
- }
- return buf.toString();
- }
-
- /*
- * calculate the md5 for a given string
- */
- private static String MD5(String text) throws NoSuchAlgorithmException,
UnsupportedEncodingException {
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < data.length; i++) {
+ int halfbyte = (data[i] >>> 4) & 0x0F;
+ int two_halfs = 0;
+ do {
+ if ((0 <= halfbyte) && (halfbyte <= 9))
+ buf.append((char) ('0' + halfbyte));
+ else
+ buf.append((char) ('a' + (halfbyte - 10)));
+ halfbyte = data[i] & 0x0F;
+ } while(two_halfs++ < 1);
+ }
+ return buf.toString();
+ }
+ //this function will return the String representation of the MD5 hash
for the input string
+ public static String MD5(String text) throws NoSuchAlgorithmException,
UnsupportedEncodingException {
MessageDigest md;
md = MessageDigest.getInstance("MD5");
byte[] md5hash = new byte[32];
@@ -711,9 +834,9 @@
md5hash = md.digest();
return convertToHex(md5hash);
}
-
+
public void generateSubIndex(String filename){
-// generates the new subIndex
+//generates the new subIndex
File outputFile = new File(filename);
StreamResult resultStream;
resultStream = new StreamResult(outputFile);
@@ -750,28 +873,28 @@
/* -> title */
Element subHeaderElement = xmlDoc.createElement("title");
Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-
+
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
/* -> owner */
subHeaderElement = xmlDoc.createElement("owner");
subHeaderText = xmlDoc.createTextNode(indexOwner);
-
+
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
-
-
+
+
/* -> owner email */
if(indexOwnerEmail != null) {
subHeaderElement = xmlDoc.createElement("email");
subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-
+
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
}
-
+
Element filesElement = xmlDoc.createElement("files"); /*
filesElement != fileElement */
Element EntriesElement = xmlDoc.createElement("entries");
@@ -780,7 +903,7 @@
//all index files are ready
/* Adding word index */
Element keywordsElement = xmlDoc.createElement("keywords");
-
+
rootElement.appendChild(EntriesElement);
rootElement.appendChild(headerElement);
rootElement.appendChild(filesElement);
@@ -801,7 +924,7 @@
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-
+
/* final step */
try {
serializer.transform(domSource, resultStream);
@@ -813,330 +936,361 @@
if(Logger.shouldLog(Logger.MINOR, this))
Logger.minor(this, "Spider: indexes regenerated.");
}
-
- public void terminate(){
- synchronized (this) {
- stopped = true;
- queuedURIList.clear();
+
+public void terminate(){
+ synchronized (this) {
+ stopped = true;
+ queuedURIList.clear();
+ }
+}
+
+public void runPlugin(PluginRespirator pr){
+ this.pr = pr;
+ this.id = 0;
+ this.core = pr.getNode().clientCore;
+ this.ctx = core.makeClient((short) 0).getFetchContext();
+ ctx.maxSplitfileBlockRetries = 10;
+ ctx.maxNonSplitfileRetries = 10;
+ ctx.maxTempLength = 2 * 1024 * 1024;
+ ctx.maxOutputLength = 2 * 1024 * 1024;
+ allowedMIMETypes = new HashSet();
+ allowedMIMETypes.add(new String("text/html"));
+ allowedMIMETypes.add(new String("text/plain"));
+ allowedMIMETypes.add(new String("application/xhtml+xml"));
+// allowedMIMETypes.add(new String("application/zip"));
+ ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
+// ctx.allowedMIMETypes.add("text/html");
+ tProducedIndex = System.currentTimeMillis();
+ indexing = true;
+ stopped = false;
+ count = 0;
+
+ //startPlugin();
+ Thread starterThread = new Thread("Spider Plugin Starter") {
+ public void run() {
+ try{
+ Thread.sleep(30 * 1000); // Let the node start
up
+ } catch (InterruptedException e){}
+ startSomeRequests();
}
- }
+ };
+ starterThread.setDaemon(true);
+ starterThread.start();
+}
- public void runPlugin(PluginRespirator pr){
- this.pr = pr;
- this.id = 0;
- this.core = pr.getNode().clientCore;
- this.ctx = core.makeClient((short) 0).getFetchContext();
- ctx.maxSplitfileBlockRetries = 10;
- ctx.maxNonSplitfileRetries = 10;
- ctx.maxTempLength = 2 * 1024 * 1024;
- ctx.maxOutputLength = 2 * 1024 * 1024;
- allowedMIMETypes = new HashSet();
- allowedMIMETypes.add(new String("text/html"));
- allowedMIMETypes.add(new String("text/plain"));
- allowedMIMETypes.add(new String("application/xhtml+xml"));
-
- ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-
- tProducedIndex = System.currentTimeMillis();
- indexing = true;
- stopped = false;
- count = 0;
-
- //startPlugin();
- Thread starterThread = new Thread("Spider Plugin Starter") {
- public void run() {
- try{
- Thread.sleep(30 * 1000); // Let the
node start up
- } catch (InterruptedException e){}
- startSomeRequests();
- }
- };
- starterThread.setDaemon(true);
- starterThread.start();
+public String handleHTTPGet(HTTPRequest request) throws PluginHTTPException{
+ StringBuffer out = new StringBuffer();
+ // need to produce pretty html
+ //later fredpluginhttpadvanced will give the interface
+ //this brings us to the page from visit
+ String listname = request.getParam("list");
+ if(listname.length() != 0)
+ {
+ appendDefaultHeader(out,null);
+ out.append("<p><h4>"+listname+" URIs</h4></p>");
+ appendList(listname,out,null);
+ return out.toString();
}
-
- /**
- * Interface to the Spider data
- */
- public String handleHTTPGet(HTTPRequest request) throws
PluginHTTPException{
- StringBuffer out = new StringBuffer();
-
- String listname = request.getParam("list");
- if(listname.length() != 0)
+ appendDefaultPageStart(out,null);
+ String uriParam = request.getParam("adduri");
+ if(uriParam != null && uriParam.length() != 0)
{
- appendDefaultHeader(out,null);
- out.append("<p><h4>"+listname+" URIs</h4></p>");
- appendList(listname,out,null);
- return out.toString();
- }
- appendDefaultPageStart(out,null);
- String uriParam = request.getParam("adduri");
- if(uriParam != null && uriParam.length() != 0)
- {
- try {
- FreenetURI uri = new FreenetURI(uriParam);
- synchronized (this) {
- failedURIs.remove(uri);
- visitedURIs.remove(uri);
- }
- out.append("<p>URI added :"+uriParam+"</p>");
- queueURI(uri);
- startSomeRequests();
- } catch (MalformedURLException mue1) {
- out.append("<p>MalFormed URI: "+uriParam+"</p");
+ try {
+ FreenetURI uri = new FreenetURI(uriParam);
+ synchronized (this) {
+ failedURIs.remove(uri);
+ visitedURIs.remove(uri);
}
+ out.append("<p>URI added :"+uriParam+"</p>");
+ queueURI(uri);
+ startSomeRequests();
+ } catch (MalformedURLException mue1) {
+ out.append("<p>MalFormed URI: "+uriParam+"</p");
}
- return out.toString();
+ }
+ return out.toString();
+}
+private void appendList(String listname, StringBuffer out, String stylesheet)
+{
+ Iterator it = (runningFetchesByURI.keySet()).iterator();
+ if(listname.equals("running"))
+ it = (runningFetchesByURI.keySet()).iterator();
+ if(listname.equals("visited"))
+ it = (new HashSet(visitedURIs)).iterator();
+ if(listname.equals("queued"))
+ it = (new ArrayList(queuedURIList)).iterator();
+ if(listname.equals("failed"))
+ it = (new HashSet(failedURIs)).iterator();
+ while(it.hasNext())
+ out.append("<code>"+it.next().toString()+"</code><br/>");
+}
+private void appendDefaultPageStart(StringBuffer out, String stylesheet) {
+
+ out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+ if(stylesheet != null)
+ out.append("<link href=\""+stylesheet+"\" type=\"text/css\"
rel=\"stylesheet\" />");
+ out.append("</HEAD><BODY>\n");
+ out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
+ out.append("Add uri:");
+ out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\"
/><br/><br/>");
+ out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
+ Set runningFetches = runningFetchesByURI.keySet();
+ out.append("<p><h3>Running Fetches</h3></p>");
+ Set visited = new HashSet(visitedURIs);
+ List queued = new ArrayList(queuedURIList);
+
+ Set failed = new HashSet(failedURIs);
+ Iterator it=queued.iterator();
+ out.append("<br/>Size :"+runningFetches.size());
+ appendList(runningFetches,out,stylesheet);
+ out.append("<p><a href=\"?list="+"running"+"\">Show all</a><br/></p>");
+ out.append("<br/>Size :"+queued.size());
+ int i = 0;
+ while(it.hasNext()){
+ if(i<=maxShownURIs){
+ out.append("<code>"+it.next().toString()+"</code><br/>");
+ }
+ else break;
+ i++;
}
-
- private void appendList(String listname, StringBuffer out, String
stylesheet)
- {
- Iterator it = (runningFetchesByURI.keySet()).iterator();
- if(listname.equals("running"))
- it = (runningFetchesByURI.keySet()).iterator();
- if(listname.equals("visited"))
- it = (new HashSet(visitedURIs)).iterator();
- if(listname.equals("queued"))
- it = (new ArrayList(queuedURIList)).iterator();
- if(listname.equals("failed"))
- it = (new HashSet(failedURIs)).iterator();
- while(it.hasNext())
-
out.append("<code>"+it.next().toString()+"</code><br/>");
- }
-
- private void appendDefaultPageStart(StringBuffer out, String
stylesheet) {
-
- out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
- if(stylesheet != null)
- out.append("<link href=\""+stylesheet+"\"
type=\"text/css\" rel=\"stylesheet\" />");
- out.append("</HEAD><BODY>\n");
- out.append("<CENTER><H1>" + pluginName +
"</H1><BR/><BR/><BR/>\n");
- out.append("Add uri:");
- out.append("<form method=\"GET\"><input type=\"text\"
name=\"adduri\" /><br/><br/>");
- out.append("<input type=\"submit\" value=\"Add uri\"
/></form>");
- Set runningFetches = runningFetchesByURI.keySet();
- out.append("<p><h3>Running Fetches</h3></p>");
- Set visited = new HashSet(visitedURIs);
- List queued = new ArrayList(queuedURIList);
-
- Set failed = new HashSet(failedURIs);
- Iterator it=queued.iterator();
- out.append("<br/>Size :"+runningFetches.size()+"<br/>");
- appendList(runningFetches,out,stylesheet);
- out.append("<p><a href=\"?list="+"running"+"\">Show
all</a><br/></p>");
- out.append("<p><h3>Queued URIs</h3></p>");
- out.append("<br/>Size :"+queued.size()+"<br/>");
- int i = 0;
- while(it.hasNext()){
- if(i<=maxShownURIs){
-
out.append("<code>"+it.next().toString()+"</code><br/>");
- }
- else break;
- i++;
+ out.append("<p><a href=\"?list="+"queued"+"\">Show all</a><br/></p>");
+ out.append("<br/>Size :"+visited.size());
+ appendList(visited,out,stylesheet);
+ out.append("<p><a href=\"?list="+"visited"+"\">Show all</a><br/></p>");
+ out.append("<br/>Size :"+failed.size());
+ appendList(failed,out,stylesheet);
+ out.append("<p><a href=\"?list="+"failed"+"\">Show all</a><br/></p>");
+
+
+}
+private void appendDefaultHeader(StringBuffer out, String stylesheet){
+ out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
+ if(stylesheet != null)
+ out.append("<link href=\""+stylesheet+"\" type=\"text/css\"
rel=\"stylesheet\" />");
+ out.append("</HEAD><BODY>\n");
+ out.append("<CENTER><H1>" + pluginName + "</H1><BR/><BR/><BR/>\n");
+ out.append("Add uri:");
+ out.append("<form method=\"GET\"><input type=\"text\" name=\"adduri\"
/><br/><br/>");
+ out.append("<input type=\"submit\" value=\"Add uri\" /></form>");
+}
+private void appendList(Set list,StringBuffer out, String stylesheet){
+ Iterator it = list.iterator();
+ int i = 0;
+ while(it.hasNext()){
+ if(i<=maxShownURIs){
+ out.append("<code>"+it.next().toString()+"</code><br/>");
}
- out.append("<p><a href=\"?list="+"queued"+"\">Show
all</a><br/></p>");
- out.append("<p><h3>Visited URIs</h3></p>");
- out.append("<br/>Size :"+visited.size()+"<br/>");
- appendList(visited,out,stylesheet);
- out.append("<p><a href=\"?list="+"visited"+"\">Show
all</a><br/></p>");
- out.append("<p><h3>Failed URIs</h3></p>");
- out.append("<br/>Size :"+failed.size()+"<br/>");
- appendList(failed,out,stylesheet);
- out.append("<p><a href=\"?list="+"failed"+"\">Show
all</a><br/></p>");
-
-
+ else{
+ //out.append("<form method=\"GET\"><input
type=\"submit\" name=\"Showall\" />");
+// if(listname.equals("visited"))
+// out.append("<p><a href=\"?list="+listname+">Showall
visited</a><br/></p>");
+// if(listname.equals("failed"))
+// out.append("<p><a
href=\"?list="+listname+">Showall failed</a><br/></p>");
+ break;
+ }
+ i++;
+
}
+
+}
-
- private void appendDefaultHeader(StringBuffer out, String stylesheet){
- out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
- if(stylesheet != null)
- out.append("<link href=\""+stylesheet+"\"
type=\"text/css\" rel=\"stylesheet\" />");
- out.append("</HEAD><BODY>\n");
- out.append("<CENTER><H1>" + pluginName +
"</H1><BR/><BR/><BR/>\n");
- out.append("Add uri:");
- out.append("<form method=\"GET\"><input type=\"text\"
name=\"adduri\" /><br/><br/>");
- out.append("<input type=\"submit\" value=\"Add uri\"
/></form>");
+public class PageCallBack implements FoundURICallback{
+ int id;
+
+ PageCallBack(){
+ id = 0;
}
+ public void foundURI(FreenetURI uri){
+ //now we have the id of the page that had called this link
+ queueURI(uri);
+ int iduri = (Integer) uriIds.get(uri);
+ Vector outlink = (Vector) outlinks.get(id);
+ if(!outlink.contains(iduri))
+ outlink.add(iduri);
+ outlinks.remove(id);
+ outlinks.put(id, outlink);
+ try{
+ FileWriter out = new FileWriter("outlink",true);
+ out.write(" id "+id+" size "+ outlink.size()+" \n");
+ out.close();
+ }catch(Exception e){}
-
- private void appendList(Set list,StringBuffer out, String stylesheet){
- Iterator it = list.iterator();
- int i = 0;
- while(it.hasNext()){
- if(i<=maxShownURIs){
-
out.append("<code>"+it.next().toString()+"</code><br/>");
- }
- else{
- break;
- }
- i++;
+ if(inlinks.containsKey(iduri)){
+ Vector inlink = (Vector) inlinks.get(iduri);
+ try{
+ FileWriter out = new FileWriter("inlink",true);
+ out.write(" id "+iduri+" size "+
inlink.size()+" \n");
+ out.close();
+ }catch(Exception e){}
+
+ if(!inlink.contains(id)) inlink.add(id);
+ inlinks.remove(iduri);
+ inlinks.put(iduri, inlink);
+
}
+ startSomeRequests();
}
-
- /**
- * creates the callback object for each page.
- *<p>Used to create inlinks and outlinks for each page separately.
- * @author swati
- *
- */
- public class PageCallBack implements FoundURICallback{
- int id;
- /*
- * id of the page as refrenced in uriIds
- */
- PageCallBack(){
- id = 0;
+ public void onText(String s, String type, URI baseURI){
+ try{
+ FileWriter outp = new FileWriter("ontext",true);
+ outp.write("inside on text with id"+id+" \n");
+ outp.close();
+ }catch(Exception e){}
+// FreenetURI uri;
+// try {
+// uri = new FreenetURI(baseURI.getPath().substring(1));
+// } catch (MalformedURLException e) {
+// Logger.error(this, "Caught " + e, e);
+// return;
+// }
+
+
+
+ if((type != null) && (type.length() != 0) &&
type.toLowerCase().equals("title")
+ && (s != null) && (s.length() != 0) && (s.indexOf('\n') <
0)) {
+ /* We should have a correct title */
+ // titlesOfURIs.put(uri.toString(), s);
+ titlesOfIds.put(id, s);
+
+ type = "title";
+
}
+ else type = null;
- public void foundURI(FreenetURI uri){
- queueURI(uri);
- int iduri = (Integer) uriIds.get(uri);
+ String[] words = s.split("[^A-Za-z0-9]");
- if(outlinks.containsKey(id)){
- Vector outlink = (Vector) outlinks.get(id);
- if(!outlink.contains(iduri))
- outlink.add(iduri);
- outlinks.remove(id);
- outlinks.put(id, outlink);
- }
- else
- {
- Vector outlink = new Vector();
- outlink.add(iduri);
- outlinks.put(id, outlink);
- }
+ Integer lastPosition = null;
- if(inlinks.containsKey(iduri)){
- Vector inlink = (Vector) inlinks.get(iduri);
- if(!inlink.contains(id)) inlink.add(id);
- inlinks.remove(iduri);
- inlinks.put(iduri, inlink);
+ //lastPosition = (Integer)lastPositionByURI.get(uri.toString());
+ lastPosition = (Integer)lastPositionById.get(id);
+ if(lastPosition == null)
+ lastPosition = new Integer(1); /* We start to count
from 1 */
+
+ for (int i = 0; i < words.length; i++) {
+ String word = words[i];
+ if ((word == null) || (word.length() == 0))
+ continue;
+ word = word.toLowerCase();
+ try{
+ if(type == null)
+ addWord(word, lastPosition.intValue() + i, id);
+ else
+ addWord(word, -1 * (i+1), id);
}
- else
- {
- Vector inlink = new Vector();
- inlink.add(id);
- inlinks.put(iduri, inlink);
- }
-
- startSomeRequests();
+ catch (Exception e){}
}
+
+ if(type == null) {
+ lastPosition = new Integer(lastPosition.intValue() +
words.length);
+ // lastPositionByURI.put(uri.toString(), lastPosition);
+ lastPositionById.put(id, lastPosition);
+ }
+
+ }
+private synchronized void addWord(String word, int position,int id) throws
Exception{
+
+
+ if(word.length() < 3)
+ return;
+
+ //word = word.intern();
- public void onText(String s, String type, URI baseURI){
+ //FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);
+ Integer[] ids = (Integer[]) idsByWord.get(word);
+
+ // urisWithWords.add(uri);
+ idsWithWords.add(id);
+ try{
+ FileWriter outp = new FileWriter("addWord",true);
+ outp.write("ID ="+id+" uri ="+idUris.get(id)+"\n");
+ outp.close();
+ }catch(Exception e){}
+// FileWriter outp = new FileWriter("uricheck",true);
+// outp.write(uri.getDocName()+"\n");
+// outp.write(uri.getKeyType()+"\n");
+// outp.write(uri.getMetaString()+"\n");
+// outp.write(uri.getGuessableKey()+"\n");
+// outp.write(uri.hashCode()+"\n");
+// outp.write(uri.getPreferredFilename()+"\n");
+//
+// outp.close();
- if((type != null) && (type.length() != 0) &&
type.toLowerCase().equals("title")
- && (s != null) && (s.length() != 0) &&
(s.indexOf('\n') < 0)) {
- /* We should have a correct title */
- titlesOfIds.put(id, s);
- type = "title";
- }
- else type = null;
+ /* Word position indexation */
+ HashMap wordPositionsForOneUri =
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word,
and gives position */
+
+ if(wordPositionsForOneUri == null) {
+ wordPositionsForOneUri = new HashMap();
+ wordPositionsForOneUri.put(word, new Integer[] { new
Integer(position) });
+ //positionsByWordByURI.put(uri.toString(),
wordPositionsForOneUri);
+ positionsByWordById.put(id, wordPositionsForOneUri);
+ } else {
+ Integer[] positions =
(Integer[])wordPositionsForOneUri.get(word);
- String[] words = s.split("[^A-Za-z0-9]");
- Integer lastPosition = null;
- lastPosition = (Integer)lastPositionById.get(id);
+ if(positions == null) {
+ positions = new Integer[] { new
Integer(position) };
+ wordPositionsForOneUri.put(word, positions);
+ } else {
+ Integer[] newPositions = new
Integer[positions.length + 1];
- if(lastPosition == null)
- lastPosition = new Integer(1); /* We start to
count from 1 */
- for (int i = 0; i < words.length; i++) {
- String word = words[i];
- if ((word == null) || (word.length() == 0))
- continue;
- word = word.toLowerCase();
- try{
- if(type == null)
- addWord(word,
lastPosition.intValue() + i, id);
- else
- addWord(word, -1 * (i+1), id);
- }
- catch (Exception e){}
- }
+ System.arraycopy(positions, 0, newPositions, 0,
positions.length);
+ newPositions[positions.length] = new
Integer(position);
- if(type == null) {
- lastPosition = new
Integer(lastPosition.intValue() + words.length);
- lastPositionById.put(id, lastPosition);
+ wordPositionsForOneUri.put(word, newPositions);
}
-
}
-
- private synchronized void addWord(String word, int position,int
id) throws Exception{
- if(word.length() < 3)
- return;
-
- Integer[] ids = (Integer[]) idsByWord.get(word);
- idsWithWords.add(id);
-
- /* Word position indexation */
- HashMap wordPositionsForOneUri =
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word,
and gives position */
- if(wordPositionsForOneUri == null) {
- wordPositionsForOneUri = new HashMap();
- wordPositionsForOneUri.put(word, new Integer[]
{ new Integer(position) });
- positionsByWordById.put(id,
wordPositionsForOneUri);
- }
- else {
- Integer[] positions =
(Integer[])wordPositionsForOneUri.get(word);
- if(positions == null) {
- positions = new Integer[] { new
Integer(position) };
- wordPositionsForOneUri.put(word,
positions);
- }
- else {
- Integer[] newPositions = new
Integer[positions.length + 1];
- System.arraycopy(positions, 0,
newPositions, 0, positions.length);
- newPositions[positions.length] = new
Integer(position);
- wordPositionsForOneUri.put(word,
newPositions);
- }
+
+ if (ids == null) {
+ idsByWord.put(word, new Integer[] { id });
+
+ } else {
+ for (int i = 0; i < ids.length; i++) {
+ if (ids[i].equals(id))
+ return;
}
-
- if (ids == null) {
- idsByWord.put(word, new Integer[] { id });
- } else {
- for (int i = 0; i < ids.length; i++) {
- if (ids[i].equals(id))
- return;
+ Integer[] newIDs = new Integer[ids.length + 1];
+ System.arraycopy(ids, 0, newIDs, 0, ids.length);
+ newIDs[ids.length] = id;
+ idsByWord.put(word, newIDs);
+ }
+ //the new word is added here in urisByWord
+ tMap.put(MD5(word), word);
+ long time_indexing = System.currentTimeMillis();
+ if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 <
System.currentTimeMillis()) {
+ try {
+ //produceIndex();
+ //check();
+
+ if(indexing){
+ generateIndex2();
+ produceIndex2();
+ if((System.currentTimeMillis() -
time_indexing)/(System.currentTimeMillis() - tProducedIndex) >
MAX_TIME_SPENT_INDEXING) indexing= false;
+ else indexing = true;
}
- Integer[] newIDs = new Integer[ids.length + 1];
- System.arraycopy(ids, 0, newIDs, 0, ids.length);
- newIDs[ids.length] = id;
- idsByWord.put(word, newIDs);
+
+ } catch (IOException e) {
+ Logger.error(this, "Caught " + e + " while
creating index", e);
}
-
- tMap.put(MD5(word), word);
- long time_indexing = System.currentTimeMillis();
- if (tProducedIndex + minTimeBetweenEachIndexRewriting *
10 < System.currentTimeMillis()) {
- try {
- if(indexing){
- generateIndex2();
- produceIndex2();
- /*
- * ensures that index
production doesn't eat up the processor time
- */
- if((System.currentTimeMillis()
- time_indexing)/(System.currentTimeMillis() - tProducedIndex) >
MAX_TIME_SPENT_INDEXING) indexing= false;
- else indexing = true;
- }
- } catch (IOException e) {
- Logger.error(this, "Caught " + e + "
while creating index", e);
- }
- tProducedIndex = System.currentTimeMillis();
- }
+ tProducedIndex = System.currentTimeMillis();
}
+
}
+
+}
+public String handleHTTPPut(HTTPRequest request) throws PluginHTTPException{
+ return null;
+}
+public String handleHTTPPost(HTTPRequest request) throws PluginHTTPException{
+ return null;
+}
-
- public String handleHTTPPut(HTTPRequest request) throws
PluginHTTPException{
- return null;
- }
- public String handleHTTPPost(HTTPRequest request) throws
PluginHTTPException{
- return null;
- }
-
- public void onFoundEdition(long l, USK key){
- FreenetURI uri = key.getURI();
- if(runningFetchesByURI.containsKey(uri))
runningFetchesByURI.remove(uri);
- uri = key.getURI().setSuggestedEdition(l);
- queueURI(uri);
- }
-
+public void onFoundEdition(long l, USK key){
+ FreenetURI uri = key.getURI();
+ if(runningFetchesByURI.containsKey(uri))
runningFetchesByURI.remove(uri);
+ uri = key.getURI().setSuggestedEdition(l);
+ queueURI(uri);
}
+
+
+}