Author: toad
Date: 2007-09-20 21:49:17 +0000 (Thu, 20 Sep 2007)
New Revision: 15214

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
14: disable inlinks/outlinks, for now. sync fixes.

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2007-09-20 21:35:55 UTC (rev 
15213)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2007-09-20 21:49:17 UTC (rev 
15214)
@@ -110,16 +110,18 @@
        private final HashMap titlesOfIds = new HashMap();
        private final HashMap uriIds = new HashMap();
        private final HashMap idUris = new HashMap();
+       
+       // Re-enable outlinks/inlinks when we publish them or use them for 
ranking.
        /**
         * Lists the outlinks from a particular page, 
         * </br> indexed by the id of page uri
         */
-       public final HashMap outlinks = new HashMap();
+//     public final HashMap outlinks = new HashMap();
        /**
         * Lists the inlinks to a particular page,
         *  indexed by the id of page uri.
         */
-       public final HashMap inlinks = new HashMap();
+//     public final HashMap inlinks = new HashMap();
        private Vector indices;
        private int match;
        private Integer id;
@@ -138,7 +140,7 @@
         */
        public Set allowedMIMETypes;
        private static final int MAX_ENTRIES = 200;
-       private static int version = 13;
+       private static int version = 14;
        private static final String pluginName = "XML spider "+version;
        /**
         * Gives the allowed fraction of total time spent on generating indices 
with
@@ -254,16 +256,21 @@
                        Bucket data = result.asBucket();
                        String mimeType = cm.getMIMEType();

-                       sizeOfURIs.put(uri.toString(), new Long(data.size()));
-                       mimeOfURIs.put(uri.toString(), mimeType);
-                       PageCallBack page = new PageCallBack((Integer) 
uriIds.get(uri));
-                       Logger.minor(this, "Successful: "+uri+" : "+page.id);
-                       inlinks.put(page.id, new Vector());
-                       outlinks.put(page.id, new Vector());
+                       Integer id;
+                       synchronized(this) {
+                               sizeOfURIs.put(uri.toString(), new 
Long(data.size()));
+                               mimeOfURIs.put(uri.toString(), mimeType);
+                               id = (Integer) uriIds.get(uri);
+//                             inlinks.put(page.id, new Vector());
+//                             outlinks.put(page.id, new Vector());
+                       }
                        /*
                         * instead of passing the current object, the 
pagecallback object for every page is passed to the content filter
-                        * this is to allow inlinks and outlinks be indexed by 
specific pages
+                        * this has many benefits to efficiency, and allows us 
to identify trivially which page is being indexed.
+                        * (we CANNOT rely on the base href provided).
                         */
+                       PageCallBack page = new PageCallBack(id);
+                       Logger.minor(this, "Successful: "+uri+" : "+page.id);

                        try {
                                Logger.minor(this, "Filtering "+uri+" : 
"+page.id);
@@ -1054,39 +1061,42 @@

                        Logger.minor(this, "foundURI "+uri+" on "+id);
                        queueURI(uri);
-                       Integer iduri = (Integer) uriIds.get(uri);
+                       // FIXME re-enable outlinks/inlinks when we can do 
something useful with them
+//                     synchronized(XMLSpider.this) {
+//                     Integer iduri = (Integer) uriIds.get(uri);
 /*
  * update the outlink information for the current page
  */
-                       if(outlinks.containsKey(id)){
-                               Vector outlink = (Vector) outlinks.get(id);
-                               if(!outlink.contains(iduri))    
-                                       outlink.add(iduri);
-                               outlinks.remove(id);
-                               outlinks.put(id, outlink);
-                       }
-                       else 
-                       {
-                               Vector outlink = new Vector();
-                               outlink.add(iduri);
-                               outlinks.put(id, outlink);
-                       }
+//                     if(outlinks.containsKey(id)){
+//                             Vector outlink = (Vector) outlinks.get(id);
+//                             if(!outlink.contains(iduri))    
+//                                     outlink.add(iduri);
+//                             outlinks.remove(id);
+//                             outlinks.put(id, outlink);
+//                     }
+//                     else 
+//                     {
+//                             Vector outlink = new Vector();
+//                             outlink.add(iduri);
+//                             outlinks.put(id, outlink);
+//                     }
 /*
  * update the inlink information for the new link 
  */
-                       if(inlinks.containsKey(iduri)){
-                               Vector inlink = (Vector) inlinks.get(iduri);
-                               if(!inlink.contains(id)) inlink.add(id);
-                               inlinks.remove(iduri);
-                               inlinks.put(iduri, inlink);
-                       }
-                       else 
-                       {
-                               Vector inlink = new Vector();
-                               inlink.add(id);
-                               inlinks.put(iduri, inlink);
-                       }
-
+//                     if(inlinks.containsKey(iduri)){
+//                             Vector inlink = (Vector) inlinks.get(iduri);
+//                             if(!inlink.contains(id)) inlink.add(id);
+//                             inlinks.remove(iduri);
+//                             inlinks.put(iduri, inlink);
+//                     }
+//                     else 
+//                     {
+//                             Vector inlink = new Vector();
+//                             inlink.add(id);
+//                             inlinks.put(iduri, inlink);
+//                     }
+//                     } // synchronized
+                       
                        startSomeRequests();
                }



Reply via email to