Author: j16sdiz
Date: 2009-01-03 06:41:14 +0000 (Sat, 03 Jan 2009)
New Revision: 24891
Modified:
trunk/plugins/XMLSpider/IndexWriter.java
Log:
shortcut for index size too big case
Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java 2009-01-03 06:41:05 UTC (rev
24890)
+++ trunk/plugins/XMLSpider/IndexWriter.java 2009-01-03 06:41:14 UTC (rev
24891)
@@ -251,12 +251,12 @@
Config config = perstRoot.getConfig();
File outputFile = new File(config.getIndexDir() + "index_" +
prefix + ".xml");
- BufferedOutputStream fos = new BufferedOutputStream(new
FileOutputStream(outputFile));
- StreamResult resultStream = new StreamResult(fos);
+ BufferedOutputStream fos = null;
IterableIterator<Term> termIterator =
perstRoot.getTermIterator(prefix, prefix + "g");
int count = 0;
+ int estimateSize = 0;
try {
/* Initialize xml builder */
Document xmlDoc = null;
@@ -293,13 +293,19 @@
Element keywordsElement =
xmlDoc.createElement("keywords");
Vector<Long> fileid = new Vector<Long>();
for (Term term : termIterator) {
- count++;
-
Element wordElement =
xmlDoc.createElement("word");
wordElement.setAttribute("v", term.getWord());
+ count++;
+ estimateSize += 12;
+ estimateSize += term.getWord().length();
+
+ if ((count > 1 && estimateSize >
config.getIndexSubindexMaxSize())
+ || (count >
config.getIndexMaxEntries())) {
+ return false;
+ }
+
Set<Page> pages = term.getPages();
-
for (Page page : pages) {
TermPosition termPos =
page.getTermPosition(term, false);
if (termPos == null) continue;
@@ -331,9 +337,18 @@
}
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
wordElement.appendChild(uriElement);
+
+ estimateSize += 13;
+ estimateSize +=
positionList.length();
+
if
(!fileid.contains(page.getId())) {
fileid.add(page.getId());
filesElement.appendChild(fileElement);
+
+ estimateSize +=
15;
+ estimateSize +=
filesElement.getAttribute("id").length();
+ estimateSize +=
filesElement.getAttribute("key").length();
+ estimateSize +=
filesElement.getAttribute("title").length();
}
}
}
@@ -355,7 +370,7 @@
DOMSource domSource = new DOMSource(xmlDoc);
TransformerFactory transformFactory =
TransformerFactory.newInstance();
Transformer serializer;
-
+
try {
serializer = transformFactory.newTransformer();
} catch
(javax.xml.transform.TransformerConfigurationException e) {
@@ -363,6 +378,10 @@
}
serializer.setOutputProperty(OutputKeys.ENCODING,
"UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
+
+ fos = new BufferedOutputStream(new
FileOutputStream(outputFile));
+ StreamResult resultStream = new StreamResult(fos);
+
/* final step */
try {
serializer.transform(domSource, resultStream);
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs