Author: toad
Date: 2008-01-28 16:52:13 +0000 (Mon, 28 Jan 2008)
New Revision: 17365

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
XMLSpider: split by uncompressed subindex XML size.

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2008-01-28 16:39:30 UTC (rev 
17364)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2008-01-28 16:52:13 UTC (rev 
17365)
@@ -140,6 +140,7 @@
         */
        public Set allowedMIMETypes;
        private static final int MAX_ENTRIES = 2000;
+       private static final long MAX_SUBINDEX_UNCOMPRESSED_SIZE = 
256*1024*1024;
        private static int version = 15;
        private static final String pluginName = "XML spider "+version;
        /**
@@ -497,12 +498,15 @@
                 * and iterate till the number of entries per subindex is less 
than the allowed value
                 */

-               if(list.size() < MAX_ENTRIES)
-               {       
-                       generateXML(list,p);
+               try {
+                       if(list.size() < MAX_ENTRIES)
+                       {       
+                               generateXML(list,p);
+                               return;
+                       }
+               } catch (TooBigIndexException e) {
+                       // Handle below
                }
-               else
-               {
                        //prefix needs to be incremented
                        if(match <= p) match = p+1; 
                        int prefix = p+1;
@@ -523,16 +527,19 @@
                                }
                        }
                        generateSubIndex(prefix,subVector(list,index,i-1));
-               }
        }       

+       private class TooBigIndexException extends Exception {
+               
+       }
+       
        /**
         * generates the xml index with the given list of words with prefix 
number of matching bits in md5
         * @param list  list of the words to be added in the index
         * @param prefix number of matching bits of md5
         * @throws Exception
         */
-       public synchronized void generateXML (Vector list, int prefix) throws 
Exception
+       public synchronized void generateXML (Vector list, int prefix) throws 
TooBigIndexException, Exception
        {
                String p = ((String) list.elementAt(0)).substring(0, prefix);
                indices.add(p);
@@ -655,6 +662,10 @@
                } finally {
                        fos.close();
                }
+               if(outputFile.length() > MAX_SUBINDEX_UNCOMPRESSED_SIZE) {
+                       outputFile.delete();
+                       throw new TooBigIndexException();
+               }

                if(Logger.shouldLog(Logger.MINOR, this))
                        Logger.minor(this, "Spider: indexes regenerated.");


Reply via email to