Author: j16sdiz
Date: 2009-01-03 06:41:25 +0000 (Sat, 03 Jan 2009)
New Revision: 24892
Modified:
trunk/plugins/XMLSpider/IndexWriter.java
Log:
more optimization
Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java 2009-01-03 06:41:14 UTC (rev
24891)
+++ trunk/plugins/XMLSpider/IndexWriter.java 2009-01-03 06:41:25 UTC (rev
24892)
@@ -9,6 +9,7 @@
import java.io.IOException;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
+import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Vector;
@@ -248,7 +249,9 @@
* @throws IOException
*/
private boolean generateXML(PerstRoot perstRoot, String prefix) throws
IOException {
- Config config = perstRoot.getConfig();
+ final Config config = perstRoot.getConfig();
+ final long MAX_SIZE = config.getIndexSubindexMaxSize();
+ final int MAX_ENTRIES = config.getIndexMaxEntries();
File outputFile = new File(config.getIndexDir() + "index_" +
prefix + ".xml");
BufferedOutputStream fos = null;
@@ -291,7 +294,7 @@
/* Adding word index */
Element keywordsElement =
xmlDoc.createElement("keywords");
- Vector<Long> fileid = new Vector<Long>();
+ Set<Long> fileid = new HashSet<Long>();
for (Term term : termIterator) {
Element wordElement =
xmlDoc.createElement("word");
wordElement.setAttribute("v", term.getWord());
@@ -299,13 +302,14 @@
count++;
estimateSize += 12;
estimateSize += term.getWord().length();
+
+ Set<Page> pages = term.getPages();
- if ((count > 1 && estimateSize >
config.getIndexSubindexMaxSize())
- || (count >
config.getIndexMaxEntries())) {
+ if ((count > 1 && (estimateSize + pages.size()
* 13) > MAX_SIZE) || //
+ (count > MAX_ENTRIES)) {
return false;
}
- Set<Page> pages = term.getPages();
for (Page page : pages) {
TermPosition termPos =
page.getTermPosition(term, false);
if (termPos == null) continue;
@@ -318,12 +322,7 @@
* the files mentioned
in the entire subindex
*/
Element uriElement =
xmlDoc.createElement("file");
- Element fileElement =
xmlDoc.createElement("file");
uriElement.setAttribute("id", Long.toString(page.getId()));
-
fileElement.setAttribute("id", Long.toString(page.getId()));
-
fileElement.setAttribute("key", page.getURI());
-
fileElement.setAttribute("title", page.getPageTitle() != null ?
page.getPageTitle() : page
- .getURI());
/* Position by position
*/
int[] positions =
termPos.positions;
@@ -343,6 +342,13 @@
if
(!fileid.contains(page.getId())) {
fileid.add(page.getId());
+
+ Element
fileElement = xmlDoc.createElement("file");
+
fileElement.setAttribute("id", Long.toString(page.getId()));
+
fileElement.setAttribute("key", page.getURI());
+
fileElement.setAttribute("title", page.getPageTitle() != null ?
page.getPageTitle()
+ :
page.getURI());
+
filesElement.appendChild(fileElement);
estimateSize +=
15;
@@ -392,7 +398,7 @@
Closer.close(fos);
}
- if (outputFile.length() > config.getIndexSubindexMaxSize() &&
count > 1) {
+ if (outputFile.length() > MAX_SIZE && count > 1) {
outputFile.delete();
return false;
}
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs