Author: swatig0
Date: 2007-07-13 18:41:27 +0000 (Fri, 13 Jul 2007)
New Revision: 14086

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
USK Uris added as SSK -- to avoid duplicate keys in the index

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2007-07-13 18:37:21 UTC (rev 
14085)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2007-07-13 18:41:27 UTC (rev 
14086)
@@ -100,14 +100,14 @@
        private Vector list;
        private boolean indexing ;

-       private static final int minTimeBetweenEachIndexRewriting = 10;
+       private static final int minTimeBetweenEachIndexRewriting = 50;
 /**
  * DEFAULT_INDEX_DIR is the directory where the generated indices are stored.
  * Needs to be created before it can be used
  */
        private static final String DEFAULT_INDEX_DIR = "myindex3/";
        public Set allowedMIMETypes;
-       private static final int MAX_ENTRIES = 5;
+       private static final int MAX_ENTRIES = 30;
        private static final String pluginName = "XML spider";
        /**
         * This gives the allowed fraction of total time spent on generating 
indices
@@ -139,7 +139,16 @@
                if((uri.getKeyType()).equals("USK")){
                        if(uri.getSuggestedEdition() < 0)
                                uri = uri.setSuggestedEdition((-1)* 
uri.getSuggestedEdition());
+                       try{
+                       uri = (USK.create(uri)).getBaseSSK();
+                       /**
+                        * All uris are added as SSK
+                        * 
+                        */
+                       }
+                       catch(Exception e){}
                }
+               
                if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
                        queuedURIList.addLast(uri);
                        visitedURIs.add(uri);
@@ -355,8 +364,16 @@
                //Integer[] positions = (Integer[]) 
positionsByWordByURI.get(word);

                urisWithWords.add(uri);
+//     FileWriter outp = new FileWriter("uricheck",true);
+//     outp.write(uri.getDocName()+"\n");
+//     outp.write(uri.getKeyType()+"\n");
+//     outp.write(uri.getMetaString()+"\n");
+//     outp.write(uri.getGuessableKey()+"\n");
+//     outp.write(uri.hashCode()+"\n");
+//     outp.write(uri.getPreferredFilename()+"\n");
+//     
+//     outp.close();

-
                /* Word position indexation */
                HashMap wordPositionsForOneUri = 
(HashMap)positionsByWordByURI.get(uri.toString()); /* For a given URI, take as 
key a word, and gives position */

@@ -396,7 +413,7 @@
                //the new word is added here in urisByWord
                tMap.put(MD5(word), word);
                long time_indexing = System.currentTimeMillis();
-               if (tProducedIndex + minTimeBetweenEachIndexRewriting * 10 < 
System.currentTimeMillis()) {
+               if (tProducedIndex + minTimeBetweenEachIndexRewriting * 1000 < 
System.currentTimeMillis()) {
                        try {
                                //produceIndex();
                                //check();
@@ -1305,16 +1322,16 @@
        public void startPlugin() {
                stopped = false;

-               Thread starterThread = new Thread("Spider Plugin Starter") {
-                       public void run() {
-                               try{
-                                       Thread.sleep(30 * 1000); // Let the 
node start up
-                               } catch (InterruptedException e){}
-                               startSomeRequests();
-                       }
-               };
-               starterThread.setDaemon(true);
-               starterThread.start();
+//             Thread starterThread = new Thread("Spider Plugin Starter") {
+//                     public void run() {
+//                             try{
+//                                     Thread.sleep(30 * 1000); // Let the 
node start up
+//                             } catch (InterruptedException e){}
+//                             startSomeRequests();
+//                     }
+//             };
+//             starterThread.setDaemon(true);
+//             starterThread.start();
        }

        /**
@@ -1487,6 +1504,8 @@
        indexing = true;
        stopped = false;
        count = 0;
+       
+       //startPlugin();
        Thread starterThread = new Thread("Spider Plugin Starter") {
                public void run() {
                        try{
@@ -1612,7 +1631,7 @@
        Iterator it=queued.iterator();
        out.append("<br/>Size :"+runningFetches.size());
        appendList(runningFetches,out,stylesheet);
-       out.append("<p><a href=\"?list="+"running"+"\">Showall 
running</a><br/></p>");
+       out.append("<p><a href=\"?list="+"running"+"\">Show all</a><br/></p>");
        out.append("<br/>Size :"+queued.size());
        int i = 0;
        while(it.hasNext()){
@@ -1622,13 +1641,13 @@
                else break;
                i++;
        }
-       out.append("<p><a href=\"?list="+"queued"+"\">Showall 
queued</a><br/></p>");
+       out.append("<p><a href=\"?list="+"queued"+"\">Show all</a><br/></p>");
        out.append("<br/>Size :"+visited.size());
        appendList(visited,out,stylesheet);
-       out.append("<p><a href=\"?list="+"visited"+"\">Showall 
visited</a><br/></p>");
+       out.append("<p><a href=\"?list="+"visited"+"\">Show all</a><br/></p>");
        out.append("<br/>Size :"+failed.size());
        appendList(failed,out,stylesheet);
-       out.append("<p><a href=\"?list="+"failed"+"\">Showall 
failed</a><br/></p>");
+       out.append("<p><a href=\"?list="+"failed"+"\">Show all</a><br/></p>");


 }


Reply via email to