Author: swatig0
Date: 2007-06-21 12:05:36 +0000 (Thu, 21 Jun 2007)
New Revision: 13678
Modified:
trunk/freenet/src/freenet/clients/http/XMLSpider.java
Log:
testing XmlSPider
Modified: trunk/freenet/src/freenet/clients/http/XMLSpider.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/XMLSpider.java 2007-06-21
09:26:28 UTC (rev 13677)
+++ trunk/freenet/src/freenet/clients/http/XMLSpider.java 2007-06-21
12:05:36 UTC (rev 13678)
@@ -62,6 +62,12 @@
import freenet.support.MultiValueTable;
import freenet.support.api.Bucket;
import freenet.support.api.HTTPRequest;
+import freenet.pluginmanager.FredPlugin;
+import freenet.pluginmanager.FredPluginHTTP;
+import freenet.pluginmanager.FredPluginThreadless;
+import freenet.pluginmanager.PluginHTTPException;
+import freenet.pluginmanager.PluginRespirator;
+
/**
* Spider. Produces an index.
*/
@@ -78,14 +84,12 @@
private final HashMap runningFetchesByURI = new HashMap();
private final HashMap urisByWord = new HashMap();
private final HashMap titlesOfURIs = new HashMap();
- private FileWriter output;
- private FileWriter output2;
- private static final int minTimeBetweenEachIndexRewriting = 1;
+ private static final int minTimeBetweenEachIndexRewriting = 10;
//private static final String indexFilename = "index.xml";
private static final String DEFAULT_INDEX_DIR = "myindex/";
public Set allowedMIMETypes;
- private static final int MAX_ENTRIES = 5;
+ private static final int MAX_ENTRIES = 50;
private static final String pluginName = "XML spider";
private static final String indexTitle= "This is an index";
@@ -104,6 +108,7 @@
private FetchContext ctx;
private final short PRIORITY_CLASS =
RequestStarter.PREFETCH_PRIORITY_CLASS;
private boolean stopped = true;
+ PluginRespirator pr;
private synchronized void queueURI(FreenetURI uri) {
//not adding the html condition
@@ -142,20 +147,26 @@
queuedURISet.remove(uri);
ClientGetter getter = makeGetter(uri);
toStart.add(getter);
+ }
+ }
+ for (int i = 0; i < toStart.size(); i++) {
- }
-
- for (int i = 0; i < toStart.size(); i++) {
ClientGetter g = (ClientGetter) toStart.get(i);
try {
runningFetchesByURI.put(g.getURI(), g);
g.start();
+ FileWriter outp = new
FileWriter("logfile2",true);
+ outp.write("URI "+g.getURI().toString()+"\n");
+ outp.close();
} catch (FetchException e) {
onFailure(e, g);
}
+ catch (IOException e){
+ Logger.error(this, "the logfile can not
be written"+e.toString(), e);
+ }
}
- }
+ //}
}
@@ -167,7 +178,14 @@
public void onSuccess(FetchResult result, ClientGetter state) {
FreenetURI uri = state.getURI();
-
+ try{
+ FileWriter output = new FileWriter("logfile",true);
+ output.write(uri.toString()+"\n");
+ output.close();
+ }
+ catch(Exception e){
+ Logger.error(this, "The uri could not be removed from
running "+e.toString(), e);
+ }
synchronized (this) {
runningFetchesByURI.remove(uri);
}
@@ -194,16 +212,23 @@
public void onFailure(FetchException e, ClientGetter state) {
FreenetURI uri = state.getURI();
-
+ try{
+ FileWriter outp = new FileWriter("failed",true);
+ outp.write("failed "+e.toString());
+ outp.close();
+
+ }catch(Exception e2){
+
+ }
synchronized (this) {
+ runningFetchesByURI.remove(uri);
failedURIs.add(uri);
- runningFetchesByURI.remove(uri);
}
if (e.newURI != null)
queueURI(e.newURI);
- else
- queueURI(uri);
- startSomeRequests();
+// else
+// queueURI(uri);
+// startSomeRequests();
}
@@ -327,7 +352,7 @@
newURIs[uris.length] = uri;
urisByWord.put(word, newURIs);
}
- if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 <
System.currentTimeMillis()) {
+ if (tProducedIndex + minTimeBetweenEachIndexRewriting * 1000 <
System.currentTimeMillis()) {
try {
produceIndex();
generateIndex();
@@ -340,9 +365,8 @@
}
private synchronized void produceIndex() throws
IOException,NoSuchAlgorithmException {
- // Produce an index file.
+ // Produce the main index file.
-
//the number of bits to consider for matching
int prefix = 1 ;
@@ -372,11 +396,8 @@
return;
}
-
impl = xmlBuilder.getDOMImplementation();
-
/* Starting to generate index */
-
xmlDoc = impl.createDocument(null, "main_index", null);
rootElement = xmlDoc.getDocumentElement();
@@ -418,9 +439,7 @@
for (int i = 0; i < uris.length; i++) {
urisToNumbers.put(uris[i], new Integer(i));
}
-
-
//all index files are ready
/* Adding word index */
Element keywordsElement = xmlDoc.createElement("keywords");
@@ -435,7 +454,6 @@
keywordsElement.appendChild(subIndexElement);
}
-
// make sure that prefix is the first child of root Element
rootElement.appendChild(prefixElement);
@@ -455,7 +473,6 @@
Logger.error(this, "Spider: Error while serializing XML
(transformFactory.newTransformer()): "+e.toString());
return;
}
-
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT,"yes");
@@ -471,7 +488,6 @@
if(Logger.shouldLog(Logger.MINOR, this))
Logger.minor(this, "Spider: indexes regenerated.");
-
//the main xml file is generated
//now as each word is generated enter it into the respective subindex
//now the parsing will start and nodes will be added as needed
@@ -491,19 +507,12 @@
if(addedWord == false)
{
-
-
-
split(prefix_match);
regenerateIndex(prefix_match);
-
prefix_match = getIndex(words[i]);
-
addWord(prefix_match,words[i]);
-
}
-
- }
+ }
catch(Exception e2){Logger.error(this,"The Word could not be
added"+ e2.toString(), e2); }
}
@@ -523,6 +532,7 @@
addWord(prefix_match,value);
}
}
+
private String getIndex(String word) throws Exception {
DocumentBuilderFactory docFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
@@ -530,42 +540,19 @@
Element root = doc.getDocumentElement();
Attr prefix_value = (Attr)
(root.getElementsByTagName("prefix").item(0)).getAttributes().getNamedItem("value");
int prefix = Integer.parseInt(prefix_value.getValue());
- output = new FileWriter(DEFAULT_INDEX_DIR+"logfile2",true);
- //Element prefixNode = (Element)root.getFirstChild();
- output.write("\nword "+word);
-
String md5 = MD5(word);
- output.write(" md5 "+md5);
-// NodeList KeywordsList = root.getElementsByTagName("keywords");
-
- //Node Keyword = KeywordsList.item(0);
-
-
NodeList subindexList = root.getElementsByTagName("subIndex");
String str = md5.substring(0,prefix);
+ String prefix_match = search(str,subindexList);
- output.write("String "+str);
- output.write("\n");
-
- output.close();
- String prefix_match = search(str,subindexList);
-
-
-
- output = new FileWriter(DEFAULT_INDEX_DIR+"search",true);
- output.write("\nPrefix returned "+prefix_match+" with md5
"+str+ " and word "+word);
- output.close();
-
-
return prefix_match;
}
+
private boolean addWord(String prefix, String str) throws Exception
{
//this word has to be added to the particular subindex
// modify the corresponding index
try{
-
-
DocumentBuilderFactory docFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder =
docFactory.newDocumentBuilder();
Document doc =
docBuilder.parse(DEFAULT_INDEX_DIR+"index_"+prefix+".xml");
@@ -654,11 +641,6 @@
try {
serializer.transform(domSource, resultStream);
} catch(javax.xml.transform.TransformerException e) {}
-
- //i.appendChild(root);
- //c.replaceChild(root,doc.getDocumentElement());
-
-
}
return true;
@@ -671,7 +653,6 @@
{
//first we need to split the current subindex into 16 newones
//then read from the original one and append to the new ones
-
// make the entry in the main index..
DocumentBuilderFactory docFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
@@ -706,12 +687,7 @@
DOMSource domSource = new DOMSource(doc);
TransformerFactory transformFactory =
TransformerFactory.newInstance();
Transformer serializer;
-
-
- serializer = transformFactory.newTransformer();
-
-
-
+ serializer = transformFactory.newTransformer();
File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
StreamResult resultStream;
resultStream = new StreamResult(outputFile);
@@ -723,9 +699,8 @@
try {
serializer.transform(domSource, resultStream);
} catch(javax.xml.transform.TransformerException e) {}
-
-
}
+
public String search(String str,NodeList list) throws Exception
{
int prefix = str.length();
@@ -734,7 +709,6 @@
String key = subIndex.getAttribute("key");
if(key.equals(str)) return key;
}
-
return search(str.substring(0, prefix-1),list);
}
@@ -1059,7 +1033,44 @@
if(Logger.shouldLog(Logger.MINOR, this))
Logger.minor(this, "Spider: indexes regenerated.");
}
+
+public void terminate(){
+ synchronized (this) {
+ stopped = true;
+ queuedURIList.clear();
+ }
+}
+
+public void runPlugin(PluginRespirator pr){
+ this.pr = pr;
+ this.core = pr.getNode().clientCore;
+ this.ctx = core.makeClient((short) 0).getFetchContext();
+ ctx.maxSplitfileBlockRetries = 10;
+ ctx.maxNonSplitfileRetries = 10;
+ ctx.maxTempLength = 2 * 1024 * 1024;
+ ctx.maxOutputLength = 2 * 1024 * 1024;
+ allowedMIMETypes = new HashSet();
+ allowedMIMETypes.add(new String("text/html"));
+ ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
+// ctx.allowedMIMETypes.add("text/html");
+ tProducedIndex = System.currentTimeMillis();
+
+ stopped = false;
+
+ Thread starterThread = new Thread("Spider Plugin Starter") {
+ public void run() {
+ try{
+ Thread.sleep(30 * 1000); // Let the node start
up
+ } catch (InterruptedException e){}
+ startSomeRequests();
+ }
+ };
+ starterThread.setDaemon(true);
+ starterThread.start();
+}
+
+
}