Author: swatig0
Date: 2007-07-11 19:51:37 +0000 (Wed, 11 Jul 2007)
New Revision: 14026
Modified:
trunk/plugins/XMLSpider/XMLSpider.java
Log:
Used Treemaps in Spider
Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java 2007-07-11 19:33:35 UTC (rev
14025)
+++ trunk/plugins/XMLSpider/XMLSpider.java 2007-07-11 19:51:37 UTC (rev
14026)
@@ -77,7 +77,7 @@
public class XMLSpider implements FredPlugin, FredPluginHTTP,
FredPluginThreadless, FredPluginHTTPAdvanced, HttpPlugin, ClientCallback,
FoundURICallback ,USKCallback{
long tProducedIndex;
-
+ int count;
// URIs visited, or fetching, or queued. Added once then forgotten
about.
private final HashSet visitedURIs = new HashSet();
private final HashSet urisWithWords = new HashSet();
@@ -92,7 +92,7 @@
//private static final String indexFilename = "index.xml";
private static final String DEFAULT_INDEX_DIR = "myindex/";
public Set allowedMIMETypes;
- private static final int MAX_ENTRIES = 50;
+ private static final int MAX_ENTRIES = 5;
private static final String pluginName = "XML spider";
private static final String indexTitle= "This is an index";
@@ -116,6 +116,10 @@
private synchronized void queueURI(FreenetURI uri) {
//not adding the html condition
+ if((uri.getKeyType()).equals("USK")){
+ if(uri.getSuggestedEdition() < 0)
+ uri = uri.setSuggestedEdition((-1)*
uri.getSuggestedEdition());
+ }
if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
queuedURIList.addLast(uri);
visitedURIs.add(uri);
@@ -150,8 +154,7 @@
FreenetURI uri = (FreenetURI)
queuedURIList.removeFirst();
queuedURISet.remove(uri);
if((uri.getKeyType()).equals("USK")){
- if(uri.getSuggestedEdition() < 0)
- uri = uri.setSuggestedEdition((-1)*
uri.getSuggestedEdition());
+
try{
(ctx.uskManager).subscribe(USK.create(uri),this, false, this);
}catch(Exception e){
@@ -169,7 +172,7 @@
try {
runningFetchesByURI.put(g.getURI(), g);
g.start();
- FileWriter outp = new
FileWriter("logfile2",true);
+ FileWriter outp = new
FileWriter("logfile23",true);
outp.write("URI "+g.getURI().toString()+'\n');
outp.close();
@@ -275,7 +278,14 @@
return;
}
-
+ try{
+ FileWriter outp = new FileWriter("onText",true);
+ outp.write("inside on text\n");
+ outp.close();
+
+ }catch(Exception e2){
+
+ }
if((type != null) && (type.length() != 0) &&
type.toLowerCase().equals("title")
&& (s != null) && (s.length() != 0) && (s.indexOf('\n') <
0)) {
@@ -323,14 +333,14 @@
if(word.length() < 3)
return;
-
+
FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);
//Integer[] positions = (Integer[])
positionsByWordByURI.get(word);
urisWithWords.add(uri);
-
+
/* Word position indexation */
HashMap wordPositionsForOneUri =
(HashMap)positionsByWordByURI.get(uri.toString()); /* For a given URI, take as
key a word, and gives position */
@@ -353,7 +363,7 @@
wordPositionsForOneUri.put(word, newPositions);
}
}
-
+
if (uris == null) {
urisByWord.put(word, new FreenetURI[] { uri });
@@ -367,7 +377,10 @@
newURIs[uris.length] = uri;
urisByWord.put(word, newURIs);
}
- if (tProducedIndex + minTimeBetweenEachIndexRewriting * 1000 <
System.currentTimeMillis()) {
+
+
+ //produceIndex();
+ if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 <
System.currentTimeMillis()) {
try {
produceIndex();
generateIndex();
@@ -384,11 +397,12 @@
//the number of bits to consider for matching
int prefix = 1 ;
-
+
if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
System.out.println("No URIs with words");
return;
}
+
File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
StreamResult resultStream;
resultStream = new StreamResult(outputFile);
@@ -443,8 +457,8 @@
}
- String[] words = (String[]) urisByWord.keySet().toArray(new
String[urisByWord.size()]);
- Arrays.sort(words);
+ //String[] words = (String[]) urisByWord.keySet().toArray(new
String[urisByWord.size()]);
+ //Arrays.sort(words);
FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new
FreenetURI[urisWithWords.size()]);
urisToNumbers = new HashMap();
Element prefixElement = xmlDoc.createElement("prefix");
@@ -1057,23 +1071,99 @@
StringBuffer out = new StringBuffer();
// need to produce pretty html
//later fredpluginhttpadvanced will give the interface
- String action = request.getParam("action");
- if(action == null || action.length() == 0){
- //put the default post fields
- appendDefaultPageStart(out,null);
- } else if ("list".equals(action)) {
- String listName = request.getParam("listName", null);
- out.append("<p>list clicked</CENTER></BODY></HTML>");
- }
+ //this brings us to the page from visit
+
+ appendDefaultPageStart(out,null);
+ String uriParam = request.getParam("adduri");
+ if(uriParam != null && uriParam.length() != 0)
+ {
+ try {
+ FreenetURI uri = new FreenetURI(uriParam);
+ synchronized (this) {
+ failedURIs.remove(uri);
+ visitedURIs.remove(uri);
+ }
+ out.append("<p>URI added :"+uriParam+"</p>");
+ queueURI(uri);
+ startSomeRequests();
+ } catch (MalformedURLException mue1) {
+ out.append("<p>MalFormed URI: "+uriParam+"</p");
+ }
+ }
+// if(action == null || action.length() == 0){
+// //put the default post fields
+// appendDefaultPageStart(out,null);
+//
+// } else if ("list".equals(action)) {
+// String listName = request.getParam("listName", null);
+// out.append("<p>list clicked</CENTER></BODY></HTML>");
+// if(listName == null){
+// //display all th
+// Set runningFetches = new
HashMap(runningFetchesByURI).keySet();
+// List queued = new ArrayList(queuedURIList);
+// Set visited = new HashSet(visitedURIs);
+// Set failed = new HashSet(failedURIs);
+//
+// out.append("<p><h3>Running Fetches</h3>");
+// Iterator it=runningFetches.iterator();
+// while(it.hasNext()){
+//
out.append("<code>"+(it.next()).toString()+"</code><br>");
+// }
+// }
+// else{
+// //display individual results
+// }
+// }
+// else if ("add".equals(action)) {
+// String uriParam = request.getParam("key");
+// try {
+// FreenetURI uri = new FreenetURI(uriParam);
+// synchronized (this) {
+// failedURIs.remove(uri);
+// visitedURIs.remove(uri);
+// }
+// queueURI(uri);
+// startSomeRequests();
+// } catch (MalformedURLException mue1) {
+// out.append("<h1>URL invalid</h1>");
+//// sendSimpleResponse(context, "URL invalid", "The
given URI is not valid.");
+//// return;
+// }
+// //not really necc
+//// MultiValueTable responseHeaders = new MultiValueTable();
+//// responseHeaders.put("Location", "?action=list");
+//// context.sendReplyHeaders(301, "Redirect",
responseHeaders, "text/html; charset=utf-8", 0);
+//
+//
+// }
+
return out.toString();
}
private void appendDefaultPageStart(StringBuffer out, String
stylesheet) {
+ count ++;
out.append("<HTML><HEAD><TITLE>" + pluginName + "</TITLE>");
if(stylesheet != null)
out.append("<link href=\""+stylesheet+"\"
type=\"text/css\" rel=\"stylesheet\" />");
out.append("</HEAD><BODY>\n");
out.append("<CENTER><H1>" + pluginName +
"</H1><BR/><BR/><BR/>\n");
+ out.append("Add uri:");
+ out.append("<form method=\"GET\"><input type=\"text\"
name=\"adduri\" /><br/><br/>");
+ out.append("<input type=\"submit\" value=\"Add uri\"
/></form>");
+ Set runningFetches = runningFetchesByURI.keySet();
+ out.append("<p><h3>Running Fetches</h3></p>");
+ Set visited = new HashSet(visitedURIs);
+ List queued = new ArrayList(queuedURIList);
+ Set failed = new HashSet(failedURIs);
+ Iterator it=queued.iterator();
+ out.append("<br/>Size :"+runningFetches.size());
+ out.append("<br/>Size :"+queued.size());
+ out.append("<br/>Size :"+visited.size());
+ out.append("<br/>Size :"+failed.size());
+ out.append("<br/>Count : "+count);
+ while(it.hasNext()){
+
out.append("<code>"+(it.next()).toString()+"</code><br>");
+ }
}
public String handleHTTPPut(HTTPRequest request) throws
PluginHTTPException{
return null;
@@ -1104,7 +1194,7 @@
tProducedIndex = System.currentTimeMillis();
stopped = false;
-
+ count = 0;
Thread starterThread = new Thread("Spider Plugin Starter") {
public void run() {
try{