Author: j16sdiz
Date: 2009-01-03 06:41:14 +0000 (Sat, 03 Jan 2009)
New Revision: 24891

Modified:
   trunk/plugins/XMLSpider/IndexWriter.java
Log:
shortcut for index size too big case

Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java    2009-01-03 06:41:05 UTC (rev 
24890)
+++ trunk/plugins/XMLSpider/IndexWriter.java    2009-01-03 06:41:14 UTC (rev 
24891)
@@ -251,12 +251,12 @@
                Config config = perstRoot.getConfig();
                
                File outputFile = new File(config.getIndexDir() + "index_" + 
prefix + ".xml");
-               BufferedOutputStream fos = new BufferedOutputStream(new 
FileOutputStream(outputFile));
-               StreamResult resultStream = new StreamResult(fos);
+               BufferedOutputStream fos = null;
 
                IterableIterator<Term> termIterator = 
perstRoot.getTermIterator(prefix, prefix + "g");
 
                int count = 0;
+               int estimateSize = 0;
                try {
                        /* Initialize xml builder */
                        Document xmlDoc = null;
@@ -293,13 +293,19 @@
                        Element keywordsElement = 
xmlDoc.createElement("keywords");
                        Vector<Long> fileid = new Vector<Long>();
                        for (Term term : termIterator) {
-                               count++;
-                               
                                Element wordElement = 
xmlDoc.createElement("word");
                                wordElement.setAttribute("v", term.getWord());
 
+                               count++;
+                               estimateSize += 12;
+                               estimateSize += term.getWord().length();
+                               
+                               if ((count > 1 && estimateSize > 
config.getIndexSubindexMaxSize())
+                                       || (count > 
config.getIndexMaxEntries())) {
+                                       return false;
+                               }
+
                                Set<Page> pages = term.getPages();
-
                                for (Page page : pages) {
                                        TermPosition termPos = 
page.getTermPosition(term, false);
                                        if (termPos == null) continue;
@@ -331,9 +337,18 @@
                                                        }
                                                        
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
                                                        
wordElement.appendChild(uriElement);
+                                                       
+                                                       estimateSize += 13;
+                                                       estimateSize += 
positionList.length();
+                                               
                                                        if 
(!fileid.contains(page.getId())) {
                                                                
fileid.add(page.getId());
                                                                
filesElement.appendChild(fileElement);
+                                                               
+                                                               estimateSize += 
15;
+                                                               estimateSize += 
filesElement.getAttribute("id").length();
+                                                               estimateSize += 
filesElement.getAttribute("key").length();
+                                                               estimateSize += 
filesElement.getAttribute("title").length();
                                                        }
                                                }
                                        }
@@ -355,7 +370,7 @@
                        DOMSource domSource = new DOMSource(xmlDoc);
                        TransformerFactory transformFactory = 
TransformerFactory.newInstance();
                        Transformer serializer;
-
+                       
                        try {
                                serializer = transformFactory.newTransformer();
                        } catch 
(javax.xml.transform.TransformerConfigurationException e) {
@@ -363,6 +378,10 @@
                        }
                        serializer.setOutputProperty(OutputKeys.ENCODING, 
"UTF-8");
                        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
+
+                       fos = new BufferedOutputStream(new 
FileOutputStream(outputFile));
+                       StreamResult resultStream = new StreamResult(fos);
+                       
                        /* final step */
                        try {
                                serializer.transform(domSource, resultStream);

_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs

Reply via email to