Author: nextgens Date: 2007-08-17 21:42:03 +0000 (Fri, 17 Aug 2007) New Revision: 14775
Removed: trunk/freenet/src/freenet/clients/http/XMLSpider.java Log: XMLSpider has got its own plugin now... that code is dead Deleted: trunk/freenet/src/freenet/clients/http/XMLSpider.java =================================================================== --- trunk/freenet/src/freenet/clients/http/XMLSpider.java 2007-08-17 21:37:52 UTC (rev 14774) +++ trunk/freenet/src/freenet/clients/http/XMLSpider.java 2007-08-17 21:42:03 UTC (rev 14775) @@ -1,1512 +0,0 @@ -/* This code is part of Freenet. It is distributed under the GNU General - * Public License, version 2 (or at your option any later version). See - * http://www.gnu.org/ for further details of the GPL. */ -package freenet.clients.http; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.Vector; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; - -import org.w3c.dom.Attr; -import org.w3c.dom.DOMImplementation; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.w3c.dom.Text; - -import freenet.client.ClientMetadata; -import freenet.client.FetchContext; -import freenet.client.FetchException; -import freenet.client.FetchResult; -import freenet.client.InsertException; -import freenet.client.async.BaseClientPutter; -import freenet.client.async.ClientCallback; -import freenet.client.async.ClientGetter; -import freenet.client.async.USKCallback; -import freenet.clients.http.filter.ContentFilter; -import freenet.clients.http.filter.FoundURICallback; -import freenet.clients.http.filter.UnsafeContentTypeException; -import freenet.keys.FreenetURI; -import freenet.keys.USK; -import freenet.node.NodeClientCore; -import freenet.node.RequestStarter; -import freenet.oldplugins.plugin.HttpPlugin; -import freenet.oldplugins.plugin.PluginManager; -import freenet.pluginmanager.PluginRespirator; -import freenet.support.HTMLNode; -import freenet.support.Logger; -import freenet.support.MultiValueTable; -import freenet.support.api.Bucket; -import freenet.support.api.HTTPRequest; - -/** - * Spider. Produces an index. - */ -public class XMLSpider implements HttpPlugin, ClientCallback, FoundURICallback ,USKCallback{ - - long tProducedIndex; - private TreeMap tMap = new TreeMap(); - - // URIs visited, or fetching, or queued. Added once then forgotten about. - private final HashSet visitedURIs = new HashSet(); - private final HashSet urisWithWords = new HashSet(); - private final HashSet failedURIs = new HashSet(); - private final HashSet queuedURISet = new HashSet(); - private final LinkedList queuedURIList = new LinkedList(); - private final HashMap runningFetchesByURI = new HashMap(); - private final HashMap urisByWord = new HashMap(); - private final HashMap titlesOfURIs = new HashMap(); - private Vector indices; - private int match; - private boolean indexing ; - private static final int minTimeBetweenEachIndexRewriting = 50; - //private static final String indexFilename = "index.xml"; - private static final String DEFAULT_INDEX_DIR = "myindex2/"; - public Set allowedMIMETypes; - private static final int MAX_ENTRIES = 50; - private static final String pluginName = "XML spider"; - private static final double MAX_TIME_SPENT_INDEXING = 0.5; - //MAX_TIME_SPENT_INDEXING is the fraction of the total time allowed to be spent on indexing(max value = 1) - private static final String indexTitle= "This is an index"; - private static final String indexOwner = "Another anonymous"; - private static final String indexOwnerEmail = null; - private final HashMap sizeOfURIs = new HashMap(); /* String (URI) -> Long */ - private final HashMap mimeOfURIs = new HashMap(); /* String (URI) -> String */ - private final HashMap lastPositionByURI = new HashMap(); /* String (URI) -> Integer */ /* Use to determine word position on each uri */ - private final HashMap positionsByWordByURI = new HashMap(); /* String (URI) -> HashMap (String (word) -> Integer[] (Positions)) */ - - // Can have many; this limit only exists to save memory. - private static final int maxParallelRequests = 100; - private int maxShownURIs = 50; - private HashMap urisToNumbers; - private NodeClientCore core; - private FetchContext ctx; - private final short PRIORITY_CLASS = RequestStarter.BULK_SPLITFILE_PRIORITY_CLASS; - private boolean stopped = true; - PluginRespirator pr; - - - private synchronized void queueURI(FreenetURI uri) { - //not adding the html condition - if((uri.getKeyType()).equals("USK")){ - if(uri.getSuggestedEdition() < 0) - uri = uri.setSuggestedEdition((-1)* uri.getSuggestedEdition()); - } - if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) { - queuedURIList.addLast(uri); - visitedURIs.add(uri); - } - } - - private void startSomeRequests() { - - - FreenetURI[] initialURIs = core.bookmarkManager.getBookmarkURIs(); - for (int i = 0; i < initialURIs.length; i++) - { - queueURI(initialURIs[i]); - } - - ArrayList toStart = null; - synchronized (this) { - if (stopped) { - return; - } - int running = runningFetchesByURI.size(); - int queued = queuedURIList.size(); - - if ((running >= maxParallelRequests) || (queued == 0)) - return; - - toStart = new ArrayList(Math.min(maxParallelRequests - running, queued)); - - for (int i = running; i < maxParallelRequests; i++) { - if (queuedURIList.isEmpty()) - break; - FreenetURI uri = (FreenetURI) queuedURIList.removeFirst(); - queuedURISet.remove(uri); - if((uri.getKeyType()).equals("USK")){ -// if(uri.getSuggestedEdition() < 0) -// uri = uri.setSuggestedEdition((-1)* uri.getSuggestedEdition()); - try{ - (ctx.uskManager).subscribe(USK.create(uri),this, false, this); - }catch(Exception e){ - - } - - } - ClientGetter getter = makeGetter(uri); - toStart.add(getter); - } - } - for (int i = 0; i < toStart.size(); i++) { - - ClientGetter g = (ClientGetter) toStart.get(i); - try { - runningFetchesByURI.put(g.getURI(), g); - g.start(); - FileWriter outp = new FileWriter("logfile2",true); - outp.write("URI "+g.getURI().toString()+'\n'); - - outp.close(); - } catch (FetchException e) { - onFailure(e, g); - } - catch (IOException e){ - Logger.error(this, "the logfile can not be written"+e.toString(), e); - } - - } - //} - - } - - - private ClientGetter makeGetter(FreenetURI uri) { - ClientGetter g = new ClientGetter(this, core.requestStarters.chkFetchScheduler, core.requestStarters.sskFetchScheduler, uri, ctx, PRIORITY_CLASS, this, null, null); - return g; - } - - public void onSuccess(FetchResult result, ClientGetter state) { - FreenetURI uri = state.getURI(); - try{ - FileWriter output = new FileWriter("logfile",true); - output.write(uri.toString()+"\n"); - output.close(); - } - catch(Exception e){ - Logger.error(this, "The uri could not be removed from running "+e.toString(), e); - } - synchronized (this) { - runningFetchesByURI.remove(uri); - } - startSomeRequests(); - ClientMetadata cm = result.getMetadata(); - Bucket data = result.asBucket(); - String mimeType = cm.getMIMEType(); - - sizeOfURIs.put(uri.toString(), new Long(data.size())); - mimeOfURIs.put(uri.toString(), mimeType); - - try { - ContentFilter.filter(data, ctx.bucketFactory, mimeType, uri.toURI("http://127.0.0.1:8888/"), this); - } catch (UnsafeContentTypeException e) { - return; // Ignore - } catch (IOException e) { - Logger.error(this, "Bucket error?: " + e, e); - } catch (URISyntaxException e) { - Logger.error(this, "Internal error: " + e, e); - } finally { - data.free(); - } - } - - public void onFailure(FetchException e, ClientGetter state) { - FreenetURI uri = state.getURI(); - try{ - FileWriter outp = new FileWriter("failed",true); - outp.write("failed "+e.toString()+" for "+uri+'\n'); - outp.close(); - - }catch(Exception e2){ - - } - synchronized (this) { - runningFetchesByURI.remove(uri); - failedURIs.add(uri); - } - if (e.newURI != null) - queueURI(e.newURI); -// else -// queueURI(uri); - startSomeRequests(); - - - } - - public void onSuccess(BaseClientPutter state) { - // Ignore - } - - public void onFailure(InsertException e, BaseClientPutter state) { - // Ignore - } - - public void onGeneratedURI(FreenetURI uri, BaseClientPutter state) { - // Ignore - } - - public void foundURI(FreenetURI uri) { - queueURI(uri); - startSomeRequests(); - } - - public void onText(String s, String type, URI baseURI) { - - FreenetURI uri; - try { - uri = new FreenetURI(baseURI.getPath().substring(1)); - } catch (MalformedURLException e) { - Logger.error(this, "Caught " + e, e); - return; - } - - - - if((type != null) && (type.length() != 0) && type.toLowerCase().equals("title") - && (s != null) && (s.length() != 0) && (s.indexOf('\n') < 0)) { - /* We should have a correct title */ - titlesOfURIs.put(uri.toString(), s); - type = "title"; - - } - else type = null; - - - String[] words = s.split("[^A-Za-z0-9]"); - - Integer lastPosition = null; - - lastPosition = (Integer)lastPositionByURI.get(uri.toString()); - - if(lastPosition == null) - lastPosition = new Integer(1); /* We start to count from 1 */ - - for (int i = 0; i < words.length; i++) { - String word = words[i]; - if ((word == null) || (word.length() == 0)) - continue; - word = word.toLowerCase(); - try{ - if(type == null) - addWord(word, lastPosition.intValue() + i, uri); - else - addWord(word, -1 * (i+1), uri); - } - catch (Exception e){} - } - - if(type == null) { - lastPosition = new Integer(lastPosition.intValue() + words.length); - lastPositionByURI.put(uri.toString(), lastPosition); - } - - } - - private synchronized void addWord(String word, int position,FreenetURI uri) throws Exception{ - - - if(word.length() < 3) - return; - - //word = word.intern(); - - - FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word); - - //Integer[] positions = (Integer[]) positionsByWordByURI.get(word); - - urisWithWords.add(uri); - - - /* Word position indexation */ - HashMap wordPositionsForOneUri = (HashMap)positionsByWordByURI.get(uri.toString()); /* For a given URI, take as key a word, and gives position */ - - if(wordPositionsForOneUri == null) { - wordPositionsForOneUri = new HashMap(); - wordPositionsForOneUri.put(word, new Integer[] { new Integer(position) }); - positionsByWordByURI.put(uri.toString(), wordPositionsForOneUri); - } else { - Integer[] positions = (Integer[])wordPositionsForOneUri.get(word); - - if(positions == null) { - positions = new Integer[] { new Integer(position) }; - wordPositionsForOneUri.put(word, positions); - } else { - Integer[] newPositions = new Integer[positions.length + 1]; - - System.arraycopy(positions, 0, newPositions, 0, positions.length); - newPositions[positions.length] = new Integer(position); - - wordPositionsForOneUri.put(word, newPositions); - } - } - - if (uris == null) { - urisByWord.put(word, new FreenetURI[] { uri }); - - } else { - for (int i = 0; i < uris.length; i++) { - if (uris[i].equals(uri)) - return; - } - FreenetURI[] newURIs = new FreenetURI[uris.length + 1]; - System.arraycopy(uris, 0, newURIs, 0, uris.length); - newURIs[uris.length] = uri; - urisByWord.put(word, newURIs); - } - //the new word is added here in urisByWord - tMap.put(MD5(word), word); - long time_indexing = System.currentTimeMillis(); - if (tProducedIndex + minTimeBetweenEachIndexRewriting * 1000 < System.currentTimeMillis()) { - try { - //produceIndex(); - //check(); - - if(indexing){ - generateIndex2(); - produceIndex2(); - if((System.currentTimeMillis() - time_indexing)/(System.currentTimeMillis() - tProducedIndex) > MAX_TIME_SPENT_INDEXING) indexing= false; - else indexing = true; - } - - } catch (IOException e) { - Logger.error(this, "Caught " + e + " while creating index", e); - } - tProducedIndex = System.currentTimeMillis(); - } - - } -// private synchronized void check() throws IOException{ -// FileWriter outp = new FileWriter("logs/indexing",true); -// outp.write("size = "+urisByWord.size()+"\n"); -// Iterator it = urisByWord.keySet().iterator(); -// while(it.hasNext()) -// outp.write(it.next()+"\n"); -// outp.close(); -// } - - private synchronized void produceIndex() throws IOException,NoSuchAlgorithmException { - // Produce the main index file. - - //the number of bits to consider for matching - int prefix = 1 ; - - if (urisByWord.isEmpty() || urisWithWords.isEmpty()) { - System.out.println("No URIs with words"); - return; - } - File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml"); - StreamResult resultStream; - resultStream = new StreamResult(outputFile); - - /* Initialize xml builder */ - Document xmlDoc = null; - DocumentBuilderFactory xmlFactory = null; - DocumentBuilder xmlBuilder = null; - DOMImplementation impl = null; - Element rootElement = null; - - xmlFactory = DocumentBuilderFactory.newInstance(); - - - try { - xmlBuilder = xmlFactory.newDocumentBuilder(); - } catch(javax.xml.parsers.ParserConfigurationException e) { - /* Will (should ?) never happen */ - Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString()); - return; - } - - impl = xmlBuilder.getDOMImplementation(); - /* Starting to generate index */ - xmlDoc = impl.createDocument(null, "main_index", null); - rootElement = xmlDoc.getDocumentElement(); - - /* Adding header to the index */ - Element headerElement = xmlDoc.createElement("header"); - - /* -> title */ - Element subHeaderElement = xmlDoc.createElement("title"); - Text subHeaderText = xmlDoc.createTextNode(indexTitle); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - /* -> owner */ - subHeaderElement = xmlDoc.createElement("owner"); - subHeaderText = xmlDoc.createTextNode(indexOwner); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - /* -> owner email */ - if(indexOwnerEmail != null) { - subHeaderElement = xmlDoc.createElement("email"); - subHeaderText = xmlDoc.createTextNode(indexOwnerEmail); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - } - - - //String[] words = (String[]) urisByWord.keySet().toArray(new String[urisByWord.size()]); - //Arrays.sort(words); - FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new FreenetURI[urisWithWords.size()]); - urisToNumbers = new HashMap(); - Element prefixElement = xmlDoc.createElement("prefix"); - prefixElement.setAttribute("value", prefix+""); - - - for (int i = 0; i < uris.length; i++) { - urisToNumbers.put(uris[i], new Integer(i)); - } - - //all index files are ready - /* Adding word index */ - Element keywordsElement = xmlDoc.createElement("keywords"); - for(int i = 0;i<16;i++){ - generateSubIndex(DEFAULT_INDEX_DIR+"index_"+Integer.toHexString(i)+".xml"); - Element subIndexElement = xmlDoc.createElement("subIndex"); - if(i<=9) - subIndexElement.setAttribute("key",i+""); - else - subIndexElement.setAttribute("key",Integer.toHexString(i)); - //the subindex element key will contain the bits used for matching in that subindex - keywordsElement.appendChild(subIndexElement); - } - - - // make sure that prefix is the first child of root Element - rootElement.appendChild(prefixElement); - rootElement.appendChild(headerElement); - - //rootElement.appendChild(filesElement); - rootElement.appendChild(keywordsElement); - - /* Serialization */ - DOMSource domSource = new DOMSource(xmlDoc); - TransformerFactory transformFactory = TransformerFactory.newInstance(); - Transformer serializer; - - try { - serializer = transformFactory.newTransformer(); - } catch(javax.xml.transform.TransformerConfigurationException e) { - Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString()); - return; - } - - serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - serializer.setOutputProperty(OutputKeys.INDENT,"yes"); - - /* final step */ - try { - serializer.transform(domSource, resultStream); - } catch(javax.xml.transform.TransformerException e) { - Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString()); - return; - } - - if(Logger.shouldLog(Logger.MINOR, this)) - Logger.minor(this, "Spider: indexes regenerated."); - - //the main xml file is generated - //now as each word is generated enter it into the respective subindex - //now the parsing will start and nodes will be added as needed - - - } - - private synchronized void produceIndex2() throws IOException,NoSuchAlgorithmException { - // Produce the main index file. - - //the number of bits to consider for matching - - - if (urisByWord.isEmpty() || urisWithWords.isEmpty()) { - System.out.println("No URIs with words"); - return; - } - File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml"); - StreamResult resultStream; - resultStream = new StreamResult(outputFile); - - /* Initialize xml builder */ - Document xmlDoc = null; - DocumentBuilderFactory xmlFactory = null; - DocumentBuilder xmlBuilder = null; - DOMImplementation impl = null; - Element rootElement = null; - - xmlFactory = DocumentBuilderFactory.newInstance(); - - - try { - xmlBuilder = xmlFactory.newDocumentBuilder(); - } catch(javax.xml.parsers.ParserConfigurationException e) { - /* Will (should ?) never happen */ - Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString()); - return; - } - - impl = xmlBuilder.getDOMImplementation(); - /* Starting to generate index */ - xmlDoc = impl.createDocument(null, "main_index", null); - rootElement = xmlDoc.getDocumentElement(); - - /* Adding header to the index */ - Element headerElement = xmlDoc.createElement("header"); - - /* -> title */ - Element subHeaderElement = xmlDoc.createElement("title"); - Text subHeaderText = xmlDoc.createTextNode(indexTitle); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - /* -> owner */ - subHeaderElement = xmlDoc.createElement("owner"); - subHeaderText = xmlDoc.createTextNode(indexOwner); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - /* -> owner email */ - if(indexOwnerEmail != null) { - subHeaderElement = xmlDoc.createElement("email"); - subHeaderText = xmlDoc.createTextNode(indexOwnerEmail); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - } - - - //String[] words = (String[]) urisByWord.keySet().toArray(new String[urisByWord.size()]); - //Arrays.sort(words); - - Element prefixElement = xmlDoc.createElement("prefix"); - //prefixElement.setAttribute("value",match+""); - //this match will be set after processing the TreeMap - - - - //all index files are ready - /* Adding word index */ - Element keywordsElement = xmlDoc.createElement("keywords"); - for(int i = 0;i<indices.size();i++){ - //generateSubIndex(DEFAULT_INDEX_DIR+"index_"+Integer.toHexString(i)+".xml"); - Element subIndexElement = xmlDoc.createElement("subIndex"); -// if(i<=9) -// subIndexElement.setAttribute("key",i+""); -// else -// subIndexElement.setAttribute("key",Integer.toHexString(i)); - subIndexElement.setAttribute("key", (String) indices.elementAt(i)); - //the subindex element key will contain the bits used for matching in that subindex - keywordsElement.appendChild(subIndexElement); - } - - prefixElement.setAttribute("value",match+""); - // make sure that prefix is the first child of root Element - rootElement.appendChild(prefixElement); - rootElement.appendChild(headerElement); - - //rootElement.appendChild(filesElement); - rootElement.appendChild(keywordsElement); - - /* Serialization */ - DOMSource domSource = new DOMSource(xmlDoc); - TransformerFactory transformFactory = TransformerFactory.newInstance(); - Transformer serializer; - - try { - serializer = transformFactory.newTransformer(); - } catch(javax.xml.transform.TransformerConfigurationException e) { - Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString()); - return; - } - - serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - serializer.setOutputProperty(OutputKeys.INDENT,"yes"); - - /* final step */ - try { - serializer.transform(domSource, resultStream); - } catch(javax.xml.transform.TransformerException e) { - Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString()); - return; - } - - if(Logger.shouldLog(Logger.MINOR, this)) - Logger.minor(this, "Spider: indexes regenerated."); - - //the main xml file is generated - //now as each word is generated enter it into the respective subindex - //now the parsing will start and nodes will be added as needed - - - } - private synchronized void generateIndex2() throws Exception{ - // now we the tree map and we need to use the sorted (md5s) to generate the xml indices - if (urisByWord.isEmpty() || urisWithWords.isEmpty()) { - System.out.println("No URIs with words"); - return; - } - FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new FreenetURI[urisWithWords.size()]); - urisToNumbers = new HashMap(); - for (int i = 0; i < uris.length; i++) { - urisToNumbers.put(uris[i], new Integer(i)); - } - indices = new Vector(); - int prefix = 1; - match = 1; - Vector list = new Vector(); - //String str = tMap.firstKey(); - Iterator it = tMap.keySet().iterator(); - FileWriter outp = new FileWriter("indexing"); - outp.write("size = "+tMap.size()+"\n"); - outp.close(); - String str = (String) it.next(); - int i = 0; - while(it.hasNext()) - { - outp = new FileWriter("indexing",true); - String key =(String) it.next(); - outp.write(key + "\n"); - outp.close(); - if(key.substring(0, prefix).equals(str.substring(0, prefix))) - {i++; - list.add(key); - } - else { - generateSubIndex(prefix,list); - str = key; - list = new Vector(); -// int count = list.size(); -// if(count > MAX_ENTRIES){ -// //the index has to be split up -// generateSubIndex(prefix,list); -// } -// else generateXML(list,prefix); -// str = key; -// list = new Vector(); - } - // - // this variable will keep the number of digits to be used - } - - generateSubIndex(prefix,list); - } - private synchronized Vector subVector(Vector list, int begin, int end){ - Vector tmp = new Vector(); - for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i)); - return tmp; - } - - private synchronized void generateSubIndex(int p,Vector list) throws Exception{ - - if(list.size() < MAX_ENTRIES) - { - //the index can be generated from this list - generateXML(list,p); - } - else - { - //this means that prefix needs to be incremented - if(match <= p) match = p+1; - int prefix = p+1; - int i =0; - String str = (String) list.elementAt(i); - int index=0; - while(i<list.size()) - { - String key = (String) list.elementAt(i); - if((key.substring(0, prefix)).equals(str.substring(0, prefix))) - { - //index = i; - i++; - } - else { - //generateXML(subVector(list,index,i-1),prefix); - generateSubIndex(prefix,subVector(list,index,i-1)); - index = i; - str = key; - } - - - } - generateSubIndex(prefix,subVector(list,index,i-1)); - } - } - - - private synchronized void generateXML(Vector list, int prefix) - { - String p = ((String) list.elementAt(0)).substring(0, prefix); - indices.add(p); - File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+p+".xml"); - //indices.add(p); - StreamResult resultStream; - resultStream = new StreamResult(outputFile); - - /* Initialize xml builder */ - Document xmlDoc = null; - DocumentBuilderFactory xmlFactory = null; - DocumentBuilder xmlBuilder = null; - DOMImplementation impl = null; - Element rootElement = null; - - xmlFactory = DocumentBuilderFactory.newInstance(); - - - try { - xmlBuilder = xmlFactory.newDocumentBuilder(); - } catch(javax.xml.parsers.ParserConfigurationException e) { - /* Will (should ?) never happen */ - Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString()); - return; - } - - - impl = xmlBuilder.getDOMImplementation(); - - /* Starting to generate index */ - - xmlDoc = impl.createDocument(null, "sub_index", null); - rootElement = xmlDoc.getDocumentElement(); - - /* Adding header to the index */ - Element headerElement = xmlDoc.createElement("header"); - - /* -> title */ - Element subHeaderElement = xmlDoc.createElement("title"); - Text subHeaderText = xmlDoc.createTextNode(indexTitle); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - /* -> owner */ - subHeaderElement = xmlDoc.createElement("owner"); - subHeaderText = xmlDoc.createTextNode(indexOwner); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - - /* -> owner email */ - if(indexOwnerEmail != null) { - subHeaderElement = xmlDoc.createElement("email"); - subHeaderText = xmlDoc.createTextNode(indexOwnerEmail); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - } - - - Element filesElement = xmlDoc.createElement("files"); /* filesElement != fileElement */ - - Element EntriesElement = xmlDoc.createElement("entries"); - EntriesElement.setNodeValue(list.size()+""); - EntriesElement.setAttribute("value", list.size()+""); - //all index files are ready - /* Adding word index */ - Element keywordsElement = xmlDoc.createElement("keywords"); - //words to be added - Vector fileid = new Vector(); - for(int i =0;i<list.size();i++) - { - Element wordElement = xmlDoc.createElement("word"); - String str = (String) tMap.get(list.elementAt(i)); - wordElement.setAttribute("v",str ); - FreenetURI[] urisForWord = (FreenetURI[]) urisByWord.get(str); -// - for (int j = 0; j < urisForWord.length; j++) { - FreenetURI uri = urisForWord[j]; - Integer x = (Integer) urisToNumbers.get(uri); - - if (x == null) { - Logger.error(this, "Eh?"); - continue; - } -// - Element uriElement = xmlDoc.createElement("file"); - Element fileElement = xmlDoc.createElement("file"); - uriElement.setAttribute("id", x.toString()); - fileElement.setAttribute("id", x.toString()); - fileElement.setAttribute("key", uri.toString()); -//// /* Position by position */ - HashMap positionsForGivenWord = (HashMap)positionsByWordByURI.get(uri.toString()); - Integer[] positions = (Integer[])positionsForGivenWord.get(str); - - StringBuffer positionList = new StringBuffer(); - - for(int k=0; k < positions.length ; k++) { - if(k!=0) - positionList.append(','); - - positionList.append(positions[k].toString()); - } - - uriElement.appendChild(xmlDoc.createTextNode(positionList.toString())); - wordElement.appendChild(uriElement); -// for(l = 0;l<filesElement.getChildNodes().getLength();l++) -// { Element file = (Element) filesElement.getChildNodes().item(l); -// if(file.getAttribute("id").equals(x.toString())) -// -// break; -// } - -// if(l>=filesElement.getChildNodes().getLength()) -// filesElement.appendChild(fileElement); - if(!fileid.contains(x.toString())) - { - fileid.add(x.toString()); - filesElement.appendChild(fileElement); - } - } - - //Element keywordsElement = (Element) root.getElementsByTagName("keywords").item(0); - keywordsElement.appendChild(wordElement); -// - } -// - - rootElement.appendChild(EntriesElement); - rootElement.appendChild(headerElement); - rootElement.appendChild(filesElement); - rootElement.appendChild(keywordsElement); - - /* Serialization */ - DOMSource domSource = new DOMSource(xmlDoc); - TransformerFactory transformFactory = TransformerFactory.newInstance(); - Transformer serializer; - - try { - serializer = transformFactory.newTransformer(); - } catch(javax.xml.transform.TransformerConfigurationException e) { - Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString()); - return; - } - - - serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - serializer.setOutputProperty(OutputKeys.INDENT,"yes"); - - /* final step */ - try { - serializer.transform(domSource, resultStream); - } catch(javax.xml.transform.TransformerException e) { - Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString()); - return; - } - - if(Logger.shouldLog(Logger.MINOR, this)) - Logger.minor(this, "Spider: indexes regenerated."); - - } - private synchronized void generateIndex() throws Exception{ - String[] words = (String[]) urisByWord.keySet().toArray(new String[urisByWord.size()]); - Arrays.sort(words); - for (int i = 0; i < words.length; i++) { - try{ - - String prefix_match = getIndex(words[i]); - - boolean addedWord = addWord(prefix_match,words[i]); - - if(addedWord == false) - { - split(prefix_match); - regenerateIndex(prefix_match); - prefix_match = getIndex(words[i]); - addWord(prefix_match,words[i]); - } - } - catch(Exception e2){Logger.error(this,"The Word could not be added"+ e2.toString(), e2); } - } - - - } - private void regenerateIndex(String prefix) throws Exception{ - //redistribute the entries in prefix.xml to prefix(0-f).xml - DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); - Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index_"+prefix+".xml"); - Element root = doc.getDocumentElement(); - NodeList wordList = root.getElementsByTagName("word"); - for(int i = 0;i<wordList.getLength();i++){ - Element word = (Element)wordList.item(i); - String value = word.getAttribute("v"); - String prefix_match = getIndex(value); - addWord(prefix_match,value); - } - } - - private String getIndex(String word) throws Exception { - DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); - Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index.xml"); - Element root = doc.getDocumentElement(); - Attr prefix_value = (Attr) (root.getElementsByTagName("prefix").item(0)).getAttributes().getNamedItem("value"); - int prefix = Integer.parseInt(prefix_value.getValue()); - String md5 = MD5(word); - NodeList subindexList = root.getElementsByTagName("subIndex"); - String str = md5.substring(0,prefix); - String prefix_match = search(str,subindexList); - - return prefix_match; - } - - private boolean addWord(String prefix, String str) throws Exception - { - //this word has to be added to the particular subindex - // modify the corresponding index - try{ - DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); - Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index_"+prefix+".xml"); - Element root = doc.getDocumentElement(); - - Element entry = (Element) root.getElementsByTagName("entries").item(0); - - Attr no_entries = (Attr) entry.getAttributes().getNamedItem("value"); - - Element filesElement = (Element) root.getElementsByTagName("files").item(0); - NodeList filesList = filesElement.getElementsByTagName("file"); - if(Integer.parseInt(no_entries.getValue()) >= MAX_ENTRIES) return false; - else - { - //increment the number of entries - entry.setAttribute("value",(Integer.parseInt(no_entries.getValue())+1)+""); - //add the entry - - Element wordElement = doc.createElement("word"); - wordElement.setAttribute("v", str); - - FreenetURI[] urisForWord = (FreenetURI[]) urisByWord.get(str); - - /* URI by URI */ - for (int j = 0; j < urisForWord.length; j++) { - FreenetURI uri = urisForWord[j]; - Integer x = (Integer) urisToNumbers.get(uri); - - if (x == null) { - Logger.error(this, "Eh?"); - continue; - } - - Element uriElement = doc.createElement("file"); - Element fileElement = doc.createElement("file"); - uriElement.setAttribute("id", x.toString()); - fileElement.setAttribute("id", x.toString()); - fileElement.setAttribute("key", uri.toString()); -// /* Position by position */ - HashMap positionsForGivenWord = (HashMap)positionsByWordByURI.get(uri.toString()); - Integer[] positions = (Integer[])positionsForGivenWord.get(str); - - StringBuffer positionList = new StringBuffer(); - - for(int k=0; k < positions.length ; k++) { - if(k!=0) - positionList.append(','); - - positionList.append(positions[k].toString()); - } - - uriElement.appendChild(doc.createTextNode(positionList.toString())); - int l; - for(l = 0;l<filesList.getLength();l++) - { Element file = (Element) filesList.item(l); - if(file.getAttribute("id").equals(x.toString())) - - break; - } - wordElement.appendChild(uriElement); - if(l>=filesList.getLength()) - filesElement.appendChild(fileElement); - } - Element keywordsElement = (Element) root.getElementsByTagName("keywords").item(0); - keywordsElement.appendChild(wordElement); - - - - DOMSource domSource = new DOMSource(doc); - TransformerFactory transformFactory = TransformerFactory.newInstance(); - Transformer serializer; - - - serializer = transformFactory.newTransformer(); - - - - File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+prefix+".xml"); - StreamResult resultStream; - resultStream = new StreamResult(outputFile); - - serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - serializer.setOutputProperty(OutputKeys.INDENT,"yes"); - - /* final step */ - try { - serializer.transform(domSource, resultStream); - } catch(javax.xml.transform.TransformerException e) {} - } - - return true; - } - - catch(Exception e){Logger.error(this,"Word could not be added to the subindex"+ e.toString(), e);} - return false; - } - private void split(String prefix) throws Exception - { - //first we need to split the current subindex into 16 newones - //then read from the original one and append to the new ones - // make the entry in the main index.. - DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); - Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index.xml"); - Element root = doc.getDocumentElement(); - Element prefixElt =(Element) root.getElementsByTagName("prefix").item(0); - int prefix_current = Integer.parseInt(prefixElt.getAttribute("value")); - if (prefix_current <= prefix.length()) - prefixElt.setAttribute("value", (prefix_current+1)+""); - - Element keywordElement = (Element) root.getElementsByTagName("keywords").item(0); - - NodeList subIndexElt = root.getElementsByTagName("subIndex"); - for(int i =0;i<subIndexElt.getLength();i++) - { - Element subIndex = (Element) subIndexElt.item(i); - if((subIndex.getAttribute("key")).equals(prefix)) { - keywordElement.removeChild(subIndex); - break; - } - } - - for(int i = 0;i<16;i++) - { - Element subIndex = doc.createElement("subIndex"); - generateSubIndex(DEFAULT_INDEX_DIR+"index_"+prefix+Integer.toHexString(i)+".xml"); - subIndex.setAttribute("key",prefix.concat(Integer.toHexString(i))); - keywordElement.appendChild(subIndex); - } - - - DOMSource domSource = new DOMSource(doc); - TransformerFactory transformFactory = TransformerFactory.newInstance(); - Transformer serializer; - serializer = transformFactory.newTransformer(); - File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml"); - StreamResult resultStream; - resultStream = new StreamResult(outputFile); - - serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - serializer.setOutputProperty(OutputKeys.INDENT,"yes"); - - /* final step */ - try { - serializer.transform(domSource, resultStream); - } catch(javax.xml.transform.TransformerException e) {} - } - - public String search(String str,NodeList list) throws Exception - { - int prefix = str.length(); - for(int i = 0;i<list.getLength();i++){ - Element subIndex = (Element) list.item(i); - String key = subIndex.getAttribute("key"); - if(key.equals(str)) return key; - } - return search(str.substring(0, prefix-1),list); - } - - - public void handleGet(HTTPRequest request, ToadletContext context) throws IOException, ToadletContextClosedException { - String action = request.getParam("action"); - PageMaker pageMaker = context.getPageMaker(); - if ((action == null) || (action.length() == 0)) { - MultiValueTable responseHeaders = new MultiValueTable(); - responseHeaders.put("Location", "?action=list"); - context.sendReplyHeaders(301, "Redirect", responseHeaders, "text/html; charset=utf-8", 0); - return; - } else if ("list".equals(action)) { - - String listName = request.getParam("listName", null); - HTMLNode pageNode = pageMaker.getPageNode("The XML Spider", context); - HTMLNode contentNode = pageMaker.getContentNode(pageNode); - /* create copies for multi-threaded use */ - if (listName == null) { - Map runningFetches = new HashMap(runningFetchesByURI); - List queued = new ArrayList(queuedURIList); - Set visited = new HashSet(visitedURIs); - Set failed = new HashSet(failedURIs); - contentNode.addChild(createNavbar(pageMaker, runningFetches.size(), queued.size(), visited.size(), failed.size())); - contentNode.addChild(createAddBox(pageMaker, context)); - contentNode.addChild(createList(pageMaker, "Running FetcheIIIs", "running", runningFetches.keySet(), maxShownURIs)); - contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued, maxShownURIs)); - contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited, maxShownURIs)); - contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed, maxShownURIs)); - } else { - contentNode.addChild(createBackBox(pageMaker)); - if ("failed".equals(listName)) { - Set failed = new HashSet(failedURIs); - contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed, -1)); - } else if ("visited".equals(listName)) { - Set visited = new HashSet(visitedURIs); - contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited, -1)); - } else if ("queued".equals(listName)) { - List queued = new ArrayList(queuedURIList); - contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued, -1)); - } else if ("running".equals(listName)) { - Map runningFetches = new HashMap(runningFetchesByURI); - contentNode.addChild(createList(pageMaker, "Running Fetches", "running", runningFetches.keySet(), -1)); - } - } - MultiValueTable responseHeaders = new MultiValueTable(); - byte[] responseBytes = pageNode.generate().getBytes("utf-8"); - context.sendReplyHeaders(200, "OK", responseHeaders, "text/html; charset=utf-8", responseBytes.length); - context.writeData(responseBytes); - } else if ("add".equals(action)) { - String uriParam = request.getParam("key"); - try { - FreenetURI uri = new FreenetURI(uriParam); - synchronized (this) { - failedURIs.remove(uri); - visitedURIs.remove(uri); - } - queueURI(uri); - startSomeRequests(); - } catch (MalformedURLException mue1) { - sendSimpleResponse(context, "URL invalid", "The given URI is not valid."); - return; - } - MultiValueTable responseHeaders = new MultiValueTable(); - responseHeaders.put("Location", "?action=list"); - context.sendReplyHeaders(301, "Redirect", responseHeaders, "text/html; charset=utf-8", 0); - return; - } - } - - /** - * @see freenet.oldplugins.plugin.HttpPlugin#handlePost(freenet.clients.http.HTTPRequestImpl, freenet.clients.http.ToadletContext) - */ - public void handlePost(HTTPRequest request, ToadletContext context) throws IOException { - } - - private void sendSimpleResponse(ToadletContext context, String title, String message) throws ToadletContextClosedException, IOException { - PageMaker pageMaker = context.getPageMaker(); - HTMLNode pageNode = pageMaker.getPageNode(title, context); - HTMLNode contentNode = pageMaker.getContentNode(pageNode); - HTMLNode infobox = contentNode.addChild(pageMaker.getInfobox("infobox-alter", title)); - HTMLNode infoboxContent = pageMaker.getContentNode(infobox); - infoboxContent.addChild("#", message); - byte[] responseBytes = pageNode.generate().getBytes("utf-8"); - context.sendReplyHeaders(200, "OK", new MultiValueTable(), "text/html; charset=utf-8", responseBytes.length); - context.writeData(responseBytes); - } - - private HTMLNode createBackBox(PageMaker pageMaker) { - HTMLNode backbox = pageMaker.getInfobox((String) null); - HTMLNode backContent = pageMaker.getContentNode(backbox); - backContent.addChild("#", "Return to the "); - backContent.addChild("a", "href", "?action=list", "list of all URIs"); - backContent.addChild("#", "."); - return backbox; - } - - private HTMLNode createAddBox(PageMaker pageMaker, ToadletContext ctx) { - HTMLNode addBox = pageMaker.getInfobox("Add a URI"); - HTMLNode formNode = pageMaker.getContentNode(addBox).addChild("form", new String[] { "action", "method" }, new String[] { "", "get" }); - formNode.addChild("input", new String[] { "type", "name", "value" }, new String[] { "hidden", "action", "add" }); - formNode.addChild("input", new String[] { "type", "size", "name", "value" }, new String[] { "text", "40", "key", "" }); - formNode.addChild("input", new String[] { "type", "value" }, new String[] { "submit", "Add URI" }); - return addBox; - } - - private HTMLNode createNavbar(PageMaker pageMaker, int running, int queued, int visited, int failed) { - HTMLNode navbar = pageMaker.getInfobox("navbar", "Page Navigation"); - HTMLNode list = pageMaker.getContentNode(navbar).addChild("ul"); - list.addChild("li").addChild("a", "href", "#running", "Running (" + running + ')'); - list.addChild("li").addChild("a", "href", "#queued", "Queued (" + queued + ')'); - list.addChild("li").addChild("a", "href", "#visited", "Visited (" + visited + ')'); - list.addChild("li").addChild("a", "href", "#failed", "Failed (" + failed + ')'); - return navbar; - } - - private HTMLNode createList(PageMaker pageMaker, String listName, String anchorName, Collection collection, int maxCount) { - HTMLNode listNode = new HTMLNode("div"); - listNode.addChild("a", "name", anchorName); - HTMLNode listBox = pageMaker.getInfobox(listName); - HTMLNode listContent = pageMaker.getContentNode(listBox); - listNode.addChild(listBox); - Iterator collectionItems = collection.iterator(); - int itemCount = 0; - while (collectionItems.hasNext()) { - FreenetURI uri = (FreenetURI) collectionItems.next(); - listContent.addChild("#", uri.toString()); - listContent.addChild("br"); - if (itemCount++ == maxCount) { - listContent.addChild("br"); - listContent.addChild("a", "href", "?action=list&listName=" + anchorName, "Show all\u2026"); - break; - } - } - return listNode; - } - - /** - * @see freenet.oldplugins.plugin.Plugin#getPluginName() - */ - public String getPluginName() { - return pluginName; - } - - /** - * @see freenet.oldplugins.plugin.Plugin#setPluginManager(freenet.oldplugins.plugin.PluginManager) - */ - public void setPluginManager(PluginManager pluginManager) { - - this.core = pluginManager.getClientCore(); - this.ctx = core.makeClient((short) 0).getFetchContext(); - ctx.maxSplitfileBlockRetries = 10; - ctx.maxNonSplitfileRetries = 10; - ctx.maxTempLength = 2 * 1024 * 1024; - ctx.maxOutputLength = 2 * 1024 * 1024; - allowedMIMETypes = new HashSet(); - allowedMIMETypes.add(new String("text/html")); - allowedMIMETypes.add(new String("text/plain")); - allowedMIMETypes.add(new String("application/xhtml+xml")); - // allowedMIMETypes.add(new String("application/zip")); - ctx.allowedMIMETypes = new HashSet(allowedMIMETypes); - // ctx.allowedMIMETypes.add("text/html"); - tProducedIndex = System.currentTimeMillis(); - indexing = true; - } - - - /** - * @see freenet.oldplugins.plugin.Plugin#startPlugin() - */ - public void startPlugin() { - stopped = false; - - Thread starterThread = new Thread("Spider Plugin Starter") { - public void run() { - try{ - Thread.sleep(30 * 1000); // Let the node start up - } catch (InterruptedException e){} - startSomeRequests(); - } - }; - starterThread.setDaemon(true); - starterThread.start(); - } - - /** - * @see freenet.oldplugins.plugin.Plugin#stopPlugin() - */ - public void stopPlugin() { - synchronized (this) { - stopped = true; - queuedURIList.clear(); - } - } - - public void onMajorProgress() { - // Ignore - } - - public void onFetchable(BaseClientPutter state) { - // Ignore - } - private static String convertToHex(byte[] data) { - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < data.length; i++) { - int halfbyte = (data[i] >>> 4) & 0x0F; - int two_halfs = 0; - do { - if ((0 <= halfbyte) && (halfbyte <= 9)) - buf.append((char) ('0' + halfbyte)); - else - buf.append((char) ('a' + (halfbyte - 10))); - halfbyte = data[i] & 0x0F; - } while(two_halfs++ < 1); - } - return buf.toString(); - } - //this function will return the String representation of the MD5 hash for the input string - public static String MD5(String text) throws NoSuchAlgorithmException, UnsupportedEncodingException { - MessageDigest md; - md = MessageDigest.getInstance("MD5"); - byte[] md5hash = new byte[32]; - md.update(text.getBytes("iso-8859-1"), 0, text.length()); - md5hash = md.digest(); - return convertToHex(md5hash); - } - - public void generateSubIndex(String filename){ -//generates the new subIndex - File outputFile = new File(filename); - StreamResult resultStream; - resultStream = new StreamResult(outputFile); - - /* Initialize xml builder */ - Document xmlDoc = null; - DocumentBuilderFactory xmlFactory = null; - DocumentBuilder xmlBuilder = null; - DOMImplementation impl = null; - Element rootElement = null; - - xmlFactory = DocumentBuilderFactory.newInstance(); - - - try { - xmlBuilder = xmlFactory.newDocumentBuilder(); - } catch(javax.xml.parsers.ParserConfigurationException e) { - /* Will (should ?) never happen */ - Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString()); - return; - } - - - impl = xmlBuilder.getDOMImplementation(); - - /* Starting to generate index */ - - xmlDoc = impl.createDocument(null, "sub_index", null); - rootElement = xmlDoc.getDocumentElement(); - - /* Adding header to the index */ - Element headerElement = xmlDoc.createElement("header"); - - /* -> title */ - Element subHeaderElement = xmlDoc.createElement("title"); - Text subHeaderText = xmlDoc.createTextNode(indexTitle); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - /* -> owner */ - subHeaderElement = xmlDoc.createElement("owner"); - subHeaderText = xmlDoc.createTextNode(indexOwner); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - - - /* -> owner email */ - if(indexOwnerEmail != null) { - subHeaderElement = xmlDoc.createElement("email"); - subHeaderText = xmlDoc.createTextNode(indexOwnerEmail); - - subHeaderElement.appendChild(subHeaderText); - headerElement.appendChild(subHeaderElement); - } - - - Element filesElement = xmlDoc.createElement("files"); /* filesElement != fileElement */ - - Element EntriesElement = xmlDoc.createElement("entries"); - EntriesElement.setNodeValue("0"); - EntriesElement.setAttribute("value", "0"); - //all index files are ready - /* Adding word index */ - Element keywordsElement = xmlDoc.createElement("keywords"); - - rootElement.appendChild(EntriesElement); - rootElement.appendChild(headerElement); - rootElement.appendChild(filesElement); - rootElement.appendChild(keywordsElement); - - /* Serialization */ - DOMSource domSource = new DOMSource(xmlDoc); - TransformerFactory transformFactory = TransformerFactory.newInstance(); - Transformer serializer; - - try { - serializer = transformFactory.newTransformer(); - } catch(javax.xml.transform.TransformerConfigurationException e) { - Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString()); - return; - } - - - serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - serializer.setOutputProperty(OutputKeys.INDENT,"yes"); - - /* final step */ - try { - serializer.transform(domSource, resultStream); - } catch(javax.xml.transform.TransformerException e) { - Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString()); - return; - } - - if(Logger.shouldLog(Logger.MINOR, this)) - Logger.minor(this, "Spider: indexes regenerated."); - } - -public void terminate(){ - synchronized (this) { - stopped = true; - queuedURIList.clear(); - } -} - -public void runPlugin(PluginRespirator pr){ - this.pr = pr; - this.core = pr.getNode().clientCore; - this.ctx = core.makeClient((short) 0).getFetchContext(); - ctx.maxSplitfileBlockRetries = 10; - ctx.maxNonSplitfileRetries = 10; - ctx.maxTempLength = 2 * 1024 * 1024; - ctx.maxOutputLength = 2 * 1024 * 1024; - allowedMIMETypes = new HashSet(); - allowedMIMETypes.add(new String("text/html")); - allowedMIMETypes.add(new String("text/plain")); - allowedMIMETypes.add(new String("application/xhtml+xml")); -// allowedMIMETypes.add(new String("application/zip")); - ctx.allowedMIMETypes = new HashSet(allowedMIMETypes); -// ctx.allowedMIMETypes.add("text/html"); - tProducedIndex = System.currentTimeMillis(); - indexing = true; - stopped = false; - - Thread starterThread = new Thread("Spider Plugin Starter") { - public void run() { - try{ - Thread.sleep(30 * 1000); // Let the node start up - } catch (InterruptedException e){} - startSomeRequests(); - } - }; - starterThread.setDaemon(true); - starterThread.start(); -} - - -public void onFoundEdition(long l, USK key){ - FreenetURI uri = key.getURI(); - if(runningFetchesByURI.containsKey(uri)) runningFetchesByURI.remove(uri); - uri = key.getURI().setSuggestedEdition(l); - queueURI(uri); -} - - -}
