XMLSpider

j16sdiz Mon, 08 Dec 2008 04:04:48 -0800

Author: j16sdiz
Date: 2008-12-08 12:04:33 +0000 (Mon, 08 Dec 2008)
New Revision: 24121


Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
java generic

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2008-12-08 08:50:27 UTC (rev 
24120)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2008-12-08 12:04:33 UTC (rev 
24121)
@@ -82,22 +82,21 @@
        /**
         * Stores the found words along with md5
         */
-       public TreeMap tMap = new TreeMap();
+       public TreeMap<String, String> tMap = new TreeMap<String, String>();
        int count;
        // URIs visited, or fetching, or queued. Added once then forgotten 
about.
        /**
         * 
         * Lists the uris that have been vistied by the spider
         */
-       public final HashSet visitedURIs = new HashSet();
-       private final HashSet idsWithWords = new HashSet();
+       public final HashSet<FreenetURI> visitedURIs = new 
HashSet<FreenetURI>();
+       private final HashSet<Integer> idsWithWords = new HashSet<Integer>();
        /**
-        * 
         * Lists the uris that were visited but failed.
         */
-       public final HashSet failedURIs = new HashSet();
+       public final HashSet<FreenetURI> failedURIs = new HashSet<FreenetURI>();
 
-       private final HashSet queuedURISet = new HashSet();
+       private final HashSet<FreenetURI> queuedURISet = new 
HashSet<FreenetURI>();
        /**
         * 
         * Lists the uris that are still queued.
@@ -106,14 +105,15 @@
         * long period, we use 2 retries (to stay off the cooldown queue), and 
we go over the queued
         * list 3 times for each key.
         */
-       public final LinkedList[] queuedURIList = new LinkedList[] { new 
LinkedList(), new LinkedList(), new LinkedList() };
-       private final HashMap runningFetchesByURI = new HashMap();
+       public final LinkedList<FreenetURI>[] queuedURIList = new LinkedList[] 
{ new LinkedList<FreenetURI>(),
+               new LinkedList<FreenetURI>(), new LinkedList<FreenetURI>() };
+       private final HashMap<FreenetURI, ClientGetter> runningFetchesByURI = 
new HashMap<FreenetURI, ClientGetter>();
 
-       private final HashMap idsByWord = new HashMap();
+       private final HashMap<String, Integer[]> idsByWord = new 
HashMap<String, Integer[]>();
 
-       private final HashMap titlesOfIds = new HashMap();
-       private final HashMap uriIds = new HashMap();
-       private final HashMap idUris = new HashMap();
+       private final HashMap<Integer, String> titlesOfIds = new 
HashMap<Integer, String>();
+       private final HashMap<FreenetURI, Integer> uriIds = new 
HashMap<FreenetURI, Integer>();
+       private final HashMap<Integer, FreenetURI> idUris = new 
HashMap<Integer, FreenetURI>();
        
        // Re-enable outlinks/inlinks when we publish them or use them for 
ranking.
        /**
@@ -126,7 +126,7 @@
         *  indexed by the id of page uri.
         */
 //     public final HashMap inlinks = new HashMap();
-       private Vector indices;
+       private Vector<String> indices;
        private int match;
        private Integer id;
        private long time_taken;
@@ -142,7 +142,7 @@
        /**
         * Lists the allowed mime types of the fetched page. 
         */
-       public Set allowedMIMETypes;
+       public Set<String> allowedMIMETypes;
        private static final int MAX_ENTRIES = 2000;
        private static final long MAX_SUBINDEX_UNCOMPRESSED_SIZE = 4*1024*1024;
        private static int version = 32;
@@ -156,12 +156,18 @@
        private static final String indexTitle= "XMLSpider index";
        private static final String indexOwner = "Freenet";
        private static final String indexOwnerEmail = null;
-       private final HashMap sizeOfURIs = new HashMap(); /* String (URI) -> 
Long */
-       private final HashMap mimeOfURIs = new HashMap(); /* String (URI) -> 
String */
+       private final HashMap<String, Long> sizeOfURIs = new HashMap<String, 
Long>(); /*
+                                                                               
                                                                                
 * String (URI) ->
+                                                                               
                                                                                
 * Long
+                                                                               
                                                                                
 */
+       private final HashMap<String, String> mimeOfURIs = new HashMap<String, 
String>(); /*
+                                                                               
                                                                                
         * String (URI)
+                                                                               
                                                                                
         * -> String
+                                                                               
                                                                                
         */
 //     private final HashMap lastPositionByURI = new HashMap(); /* String 
(URI) -> Integer */ /* Use to determine word position on each uri */
-       private final HashMap lastPositionById = new HashMap();
+       private final HashMap<Integer, Integer> lastPositionById = new 
HashMap<Integer, Integer>();
 //     private final HashMap positionsByWordByURI = new HashMap(); /* String 
(URI) -> HashMap (String (word) -> Integer[] (Positions)) */
-       private final HashMap positionsByWordById = new HashMap();
+       private final HashMap<Integer, HashMap<String, Integer[]>> 
positionsByWordById = new HashMap<Integer, HashMap<String, Integer[]>>();
        // Can have many; this limit only exists to save memory.
        private static final int maxParallelRequests = 100;
        private int maxShownURIs = 15;
@@ -207,7 +213,7 @@
                        queueURI(initialURIs[i]);
                }
 
-               ArrayList toStart = null;
+               ArrayList<ClientGetter> toStart = null;
                synchronized (this) {
                        if (stopped) {
                                return;
@@ -218,7 +224,7 @@
                        if ((running >= maxParallelRequests) || (queued == 0))
                                return;
 
-                       toStart = new ArrayList(Math.min(maxParallelRequests - 
running, queued));
+                       toStart = new 
ArrayList<ClientGetter>(Math.min(maxParallelRequests - running, queued));
 
                        for (int i = running; i < maxParallelRequests; i++) {
                                boolean found = false;
@@ -236,7 +242,7 @@
                }
                for (int i = 0; i < toStart.size(); i++) {
 
-                       ClientGetter g = (ClientGetter) toStart.get(i);
+                       ClientGetter g = toStart.get(i);
                        try {
                                runningFetchesByURI.put(g.getURI(), g);
                                g.start();
@@ -315,7 +321,7 @@
                        synchronized(this) {
                                sizeOfURIs.put(uri.toString(), new 
Long(data.size()));
                                mimeOfURIs.put(uri.toString(), mimeType);
-                               id = (Integer) uriIds.get(uri);
+                               id = uriIds.get(uri);
 //                             inlinks.put(page.id, new Vector());
 //                             outlinks.put(page.id, new Vector());
                        }
@@ -457,7 +463,7 @@
                for(int i = 0;i<indices.size();i++){
 
                        Element subIndexElement = 
xmlDoc.createElement("subIndex");
-                       subIndexElement.setAttribute("key", (String) 
indices.elementAt(i));
+                       subIndexElement.setAttribute("key", 
indices.elementAt(i));
                        //the subindex element key will contain the bits used 
for matching in that subindex
                        keywordsElement.appendChild(subIndexElement);
                }
@@ -516,17 +522,17 @@
                        return;
                }
 
-               indices = new Vector();
+               indices = new Vector<String>();
                int prefix = 1;
                match = 1;
-               Vector list = new Vector();
-               Iterator it = tMap.keySet().iterator();
+               Vector<String> list = new Vector<String>();
+               Iterator<String> it = tMap.keySet().iterator();
 
-               String str = (String) it.next();
+               String str = it.next();
                int i = 0;
                while(it.hasNext())
                {
-                       String key =(String) it.next();
+                       String key = it.next();
                        //create a list of the words to be added in the same 
subindex
                        if(key.substring(0, prefix).equals(str.substring(0, 
prefix))) 
                        {i++;
@@ -536,20 +542,20 @@
                                //generate the appropriate subindex with the 
current list
                                generateSubIndex(prefix,list);
                                str = key;
-                               list = new Vector();
+                               list = new Vector<String>();
                        }
                }
 
                generateSubIndex(prefix,list);
        }
        
-       private synchronized Vector subVector(Vector list, int begin, int end){
-               Vector tmp = new Vector();
+       private synchronized Vector<String> subVector(Vector<String> list, int 
begin, int end) {
+               Vector<String> tmp = new Vector<String>();
                for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i));
                return tmp;
        }
 
-       private synchronized void generateSubIndex(int p,Vector list) throws 
Exception{
+       private synchronized void generateSubIndex(int p, Vector<String> list) 
throws Exception {
                boolean logMINOR = Logger.shouldLog(Logger.MINOR, this);
                /*
                 * if the list is less than max allowed entries in a file then 
directly generate the xml 
@@ -574,11 +580,11 @@
                        if(match <= p) match = p+1; 
                        int prefix = p+1;
                        int i =0;
-                       String str = (String) list.elementAt(i);
+                       String str = list.elementAt(i);
                        int index=0;
                        while(i<list.size())
                        {
-                               String key = (String) list.elementAt(i);
+                               String key = list.elementAt(i);
                                if((key.substring(0, 
prefix)).equals(str.substring(0, prefix))) 
                                {
                                        i++;
@@ -602,9 +608,9 @@
         * @param prefix number of matching bits of md5
         * @throws Exception
         */
-       public synchronized void generateXML (Vector list, int prefix) throws 
TooBigIndexException, Exception
+       public synchronized void generateXML(Vector<String> list, int prefix) 
throws TooBigIndexException, Exception
        {
-               String p = ((String) list.elementAt(0)).substring(0, prefix);
+               String p = list.elementAt(0).substring(0, prefix);
                indices.add(p);
                File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+p+".xml");
                BufferedOutputStream fos = new BufferedOutputStream(new 
FileOutputStream(outputFile));
@@ -647,13 +653,13 @@
 
                /* Adding word index */
                Element keywordsElement = xmlDoc.createElement("keywords");
-               Vector fileid = new Vector();
+               Vector<Integer> fileid = new Vector<Integer>();
                for(int i =0;i<list.size();i++)
                {
                        Element wordElement = xmlDoc.createElement("word");
-                       String str = (String) tMap.get(list.elementAt(i));
+                       String str = tMap.get(list.elementAt(i));
                        wordElement.setAttribute("v",str );
-                       Integer[] idsForWord = (Integer[]) idsByWord.get(str);
+                       Integer[] idsForWord = idsByWord.get(str);
                        for (int j = 0; j < idsForWord.length; j++) {
                                Integer id = idsForWord[j];
                                Integer x = id;
@@ -678,7 +684,7 @@
                                
                                /* Position by position */
 
-                               HashMap positionsForGivenWord = 
(HashMap)positionsByWordById.get(x);
+                               HashMap positionsForGivenWord = 
positionsByWordById.get(x);
                                Integer[] positions = 
(Integer[])positionsForGivenWord.get(str);
                                StringBuilder positionList = new 
StringBuilder();
 
@@ -943,12 +949,12 @@
                ctx.maxNonSplitfileRetries = 10;
                ctx.maxTempLength = 2 * 1024 * 1024;
                ctx.maxOutputLength = 2 * 1024 * 1024;
-               allowedMIMETypes = new HashSet();
+               allowedMIMETypes = new HashSet<String>();
                allowedMIMETypes.add(new String("text/html"));
                allowedMIMETypes.add(new String("text/plain"));
                allowedMIMETypes.add(new String("application/xhtml+xml"));
 
-               ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
+               ctx.allowedMIMETypes = new HashSet<String>(allowedMIMETypes);
 
                tProducedIndex = System.currentTimeMillis();
                stopped = false;
@@ -1009,15 +1015,16 @@
  */
        private synchronized void appendList(String listname, StringBuilder 
out, String stylesheet)
        {
-               Iterator it = (runningFetchesByURI.keySet()).iterator();
+               Iterator<FreenetURI> it = 
(runningFetchesByURI.keySet()).iterator();
                if(listname.equals("running"))
                        it = (runningFetchesByURI.keySet()).iterator();
                if(listname.equals("visited"))
-                       it = (new HashSet(visitedURIs)).iterator();
+                       it = (new HashSet<FreenetURI>(visitedURIs)).iterator();
                if(listname.startsWith("queued"))
-                       it = (new 
ArrayList(queuedURIList[Integer.parseInt(listname.substring("queued".length()))])).iterator();
+                       it = (new 
ArrayList<FreenetURI>(queuedURIList[Integer.parseInt(listname.substring("queued".length()))]))
+                               .iterator();
                if(listname.equals("failed"))
-                       it = (new HashSet(failedURIs)).iterator();
+                       it = (new HashSet<FreenetURI>(failedURIs)).iterator();
                while(it.hasNext())
                        
out.append("<code>"+it.next().toString()+"</code><br/>");
        }
@@ -1032,16 +1039,16 @@
                out.append("Add uri:");
                out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
                out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
-               Set runningFetches;
-               Set visited;
-               Set failed;
+               Set<FreenetURI> runningFetches;
+               Set<FreenetURI> visited;
+               Set<FreenetURI> failed;
                List[] queued = new List[queuedURIList.length];
                synchronized(this) {
-                       visited = new HashSet(visitedURIs);
-                       failed = new HashSet(failedURIs);
+                       visited = new HashSet<FreenetURI>(visitedURIs);
+                       failed = new HashSet<FreenetURI>(failedURIs);
                        for(int i=0;i<queuedURIList.length;i++)
                                queued[i] = new ArrayList(queuedURIList[i]);
-                       runningFetches = new 
HashSet(runningFetchesByURI.keySet());
+                       runningFetches = new 
HashSet<FreenetURI>(runningFetchesByURI.keySet());
                }
                out.append("<p><h3>Running Fetches</h3></p>");
                out.append("<br/>Size :"+runningFetches.size()+"<br/>");
@@ -1085,8 +1092,8 @@
        }
 
 
-       private void appendList(Set  list,StringBuilder out, String stylesheet){
-               Iterator it = list.iterator();
+       private void appendList(Set<FreenetURI> list, StringBuilder out, String 
stylesheet) {
+               Iterator<FreenetURI> it = list.iterator();
                int i = 0;
                while(it.hasNext()){
                        if(i<=maxShownURIs){
@@ -1181,7 +1188,7 @@
                         */
                        String[] words = s.split("[^\\p{L}\\{N}]");
                        Integer lastPosition = null;
-                       lastPosition = (Integer)lastPositionById.get(id);
+                       lastPosition = lastPositionById.get(id);
 
                        if(lastPosition == null)
                                lastPosition = new Integer(1); 
@@ -1212,18 +1219,33 @@
                        if(word.length() < 3)
                                return;
 
-                       Integer[] ids = (Integer[]) idsByWord.get(word);
+                       Integer[] ids = idsByWord.get(word);
                        idsWithWords.add(id);
 
                        /* Word position indexation */
-                       HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordById.get(id); /* For a given URI, take as key a word, 
and gives position */
+                       HashMap<String, Integer[]> wordPositionsForOneUri = 
positionsByWordById.get(id); /*
+                                                                               
                                                                                
                                 * For
+                                                                               
                                                                                
                                 * a
+                                                                               
                                                                                
                                 * given
+                                                                               
                                                                                
                                 * URI
+                                                                               
                                                                                
                                 * ,
+                                                                               
                                                                                
                                 * take
+                                                                               
                                                                                
                                 * as
+                                                                               
                                                                                
                                 * key
+                                                                               
                                                                                
                                 * a
+                                                                               
                                                                                
                                 * word
+                                                                               
                                                                                
                                 * ,
+                                                                               
                                                                                
                                 * and
+                                                                               
                                                                                
                                 * gives
+                                                                               
                                                                                
                                 * position
+                                                                               
                                                                                
                                 */
                        if(wordPositionsForOneUri == null) {
-                               wordPositionsForOneUri = new HashMap();
+                               wordPositionsForOneUri = new HashMap<String, 
Integer[]>();
                                wordPositionsForOneUri.put(word, new Integer[] 
{ new Integer(position) });
                                positionsByWordById.put(id, 
wordPositionsForOneUri);
                        } 
                        else {
-                               Integer[] positions = 
(Integer[])wordPositionsForOneUri.get(word);
+                               Integer[] positions = 
wordPositionsForOneUri.get(word);
                                if(positions == null) {
                                        positions = new Integer[] { new 
Integer(position) };
                                        wordPositionsForOneUri.put(word, 
positions);

_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs

[freenet-cvs] r24121 - trunk/plugins/XMLSpider

Reply via email to