On Fri, Dec 19, 2008 at 12:06 AM, <j16sdiz at freenetproject.org> wrote:
> Author: j16sdiz
> Date: 2008-12-18 16:06:13 +0000 (Thu, 18 Dec 2008)
> New Revision: 24519
>
> Modified:
> trunk/plugins/XMLSpider/XMLSpider.java
> Log:
> pass Term directory, no getTermByMd5
"directly", not "directory"
this make index faster ~30%
>
> Modified: trunk/plugins/XMLSpider/XMLSpider.java
> ===================================================================
> --- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-18 09:12:23 UTC (rev
> 24518)
> +++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-18 16:06:13 UTC (rev
> 24519)
> @@ -619,11 +619,12 @@
> int prefix = (int) ((Math.log(termSet.size()) -
> Math.log(MAX_ENTRIES)) / Math.log(16)) - 1;
> if (prefix <= 0) prefix = 1;
> match = 1;
> - Vector<String> list = new Vector<String>();
> + Vector<Term> list = new Vector<Term>();
>
> - String str = termSet.get(0).md5;
> + Term term0 = termSet.get(0);
> + String str = term0.md5;
> String currentPrefix = str.substring(0, prefix);
> - list.add(str);
> + list.add(term0);
>
> int i = 0;
> for (Term term : termSet)
> @@ -632,15 +633,15 @@
> //create a list of the words to be added in the same
> subindex
> if (key.startsWith(currentPrefix))
> {i++;
> - list.add(key);
> + list.add(term);
> }
> else {
> //generate the appropriate subindex with the
> current list
> generateSubIndex(prefix,list);
> str = key;
> currentPrefix = str.substring(0, prefix);
> - list = new Vector<String>();
> - list.add(key);
> + list = new Vector<Term>();
> + list.add(term);
> }
> }
>
> @@ -648,7 +649,7 @@
> }
>
>
> - private void generateSubIndex(int p, List<String> list) throws
> Exception {
> + private void generateSubIndex(int p, List<Term> list) throws
> Exception {
> boolean logMINOR = Logger.shouldLog(Logger.MINOR, this);
> /*
> * if the list is less than max allowed entries in a file then
> directly generate the xml
> @@ -676,10 +677,11 @@
> match = p + 1;
> int prefix = p + 1;
> int i = 0;
> - String str = list.get(i);
> + String str = list.get(i).md5;
> int index = 0;
> while (i < list.size()) {
> - String key = list.get(i);
> + Term term = list.get(i);
> + String key = term.md5;
> if ((key.substring(0, prefix)).equals(str.substring(0,
> prefix)))
> {
> i++;
> @@ -702,9 +704,9 @@
> * @param prefix number of matching bits of md5
> * @throws Exception
> */
> - protected void generateXML(List<String> list, int prefix) throws
> TooBigIndexException, Exception
> + protected void generateXML(List<Term> list, int prefix) throws
> TooBigIndexException, Exception
> {
> - String p = list.get(0).substring(0, prefix);
> + String p = list.get(0).md5.substring(0, prefix);
> indices.add(p);
> File outputFile = new
> File(DEFAULT_INDEX_DIR+"index_"+p+".xml");
> BufferedOutputStream fos = new BufferedOutputStream(new
> FileOutputStream(outputFile));
> @@ -750,7 +752,7 @@
> Vector<Long> fileid = new Vector<Long>();
> for (int i = 0; i < list.size(); i++) {
> Element wordElement =
> xmlDoc.createElement("word");
> - Term term = getTermByMd5(list.get(i));
> + Term term = list.get(i);
> wordElement.setAttribute("v", term.word);
>
> Query query = db.query();
>
> _______________________________________________
> cvs mailing list
> cvs at freenetproject.org
> http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs
>