Author: j16sdiz
Date: 2008-12-08 13:03:12 +0000 (Mon, 08 Dec 2008)
New Revision: 24125

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
minor memory optimization: store md5 of visited uri

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2008-12-08 12:05:03 UTC (rev 
24124)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2008-12-08 13:03:12 UTC (rev 
24125)
@@ -87,9 +87,9 @@
        // URIs visited, or fetching, or queued. Added once then forgotten 
about.
        /**
         * 
-        * Lists the uris that have been vistied by the spider
+        * Lists the md5 of uris that have been vistied by the spider
         */
-       public final HashSet<FreenetURI> visitedURIs = new 
HashSet<FreenetURI>();
+       public final HashSet<String> visitedURIMD5s = new HashSet<String>();
        private final HashSet<Integer> idsWithWords = new HashSet<Integer>();
        /**
         * Lists the uris that were visited but failed.
@@ -195,9 +195,9 @@
                        catch(Exception e){}
                }
 
-               if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
+               if ((!visitedURIMD5s.contains(MD5(uri.toString()))) && 
queuedURISet.add(uri)) {
                        queuedURIList[0].addLast(uri);
-                       visitedURIs.add(uri);
+                       visitedURIMD5s.add(MD5(uri.toString()));
                        uriIds.put(uri, id);
                        idUris.put(id, uri);
                        id++;
@@ -802,14 +802,21 @@
        /*
         * calculate the md5 for a given string
         */
-       private static String MD5(String text) throws NoSuchAlgorithmException, 
UnsupportedEncodingException  {
-               MessageDigest md;
-               md = MessageDigest.getInstance("MD5");
-               byte[] md5hash = new byte[32];
-               byte[] b = text.getBytes("UTF-8");
-               md.update(b, 0, b.length);
-               md5hash = md.digest();
-               return convertToHex(md5hash);
+       private static String MD5(String text) {
+               try {
+                       MessageDigest md = MessageDigest.getInstance("MD5");
+                       byte[] md5hash = new byte[32];
+                       byte[] b = text.getBytes("UTF-8");
+                       md.update(b, 0, b.length);
+                       md5hash = md.digest();
+                       return convertToHex(md5hash);
+               } catch (NoSuchAlgorithmException e) {
+                       // impossible
+                       throw new RuntimeException("MD5 not supported", e);
+               } catch (UnsupportedEncodingException e) {
+                       // impossible
+                       throw new RuntimeException("UTF-8 not supported", e);
+               }
        }
 
        public void generateSubIndex(String filename){
@@ -995,8 +1002,8 @@
                        try {
                                FreenetURI uri = new FreenetURI(uriParam);
                                synchronized (this) {
+                                       
visitedURIMD5s.remove(MD5(uri.toString()));
                                        failedURIs.remove(uri);
-                                       visitedURIs.remove(uri);
                                }
                                out.append("<p>URI added :"+uriParam+"</p>");
                                queueURI(uri);
@@ -1012,11 +1019,11 @@
  */
        private synchronized void appendList(String listname, StringBuilder 
out, String stylesheet)
        {
-               Iterator<FreenetURI> it = 
(runningFetchesByURI.keySet()).iterator();
+               Iterator it = (runningFetchesByURI.keySet()).iterator();
                if(listname.equals("running"))
                        it = (runningFetchesByURI.keySet()).iterator();
                if(listname.equals("visited"))
-                       it = (new HashSet<FreenetURI>(visitedURIs)).iterator();
+                       it = (new HashSet<String>(visitedURIMD5s)).iterator();
                if(listname.startsWith("queued"))
                        it = (new 
ArrayList<FreenetURI>(queuedURIList[Integer.parseInt(listname.substring("queued".length()))]))
                                .iterator();
@@ -1037,11 +1044,11 @@
                out.append("<form method=\"GET\"><input type=\"text\" 
name=\"adduri\" /><br/><br/>");
                out.append("<input type=\"submit\" value=\"Add uri\" 
/></form>");
                Set<FreenetURI> runningFetches;
-               Set<FreenetURI> visited;
+               Set<String> visited;
                Set<FreenetURI> failed;
                List[] queued = new List[queuedURIList.length];
                synchronized(this) {
-                       visited = new HashSet<FreenetURI>(visitedURIs);
+                       visited = new HashSet<String>(visitedURIMD5s);
                        failed = new HashSet<FreenetURI>(failedURIs);
                        for(int i=0;i<queuedURIList.length;i++)
                                queued[i] = new ArrayList(queuedURIList[i]);
@@ -1065,7 +1072,7 @@
                        }
                        out.append("<p><a href=\"?list="+"queued"+j+"\">Show 
all</a><br/></p>");
                }
-               out.append("<p><h3>Visited URIs</h3></p>");
+               out.append("<p><h3>Visited URIs MD5</h3></p>");
                out.append("<br/>Size :"+visited.size()+"<br/>");
                appendList(visited,out,stylesheet);
                out.append("<p><a href=\"?list="+"visited"+"\">Show 
all</a><br/></p>");
@@ -1089,8 +1096,8 @@
        }
 
 
-       private void appendList(Set<FreenetURI> list, StringBuilder out, String 
stylesheet) {
-               Iterator<FreenetURI> it = list.iterator();
+       private void appendList(Set<?> list, StringBuilder out, String 
stylesheet) {
+               Iterator<?> it = list.iterator();
                int i = 0;
                while(it.hasNext()){
                        if(i<=maxShownURIs){

_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs

Reply via email to