No, not really... just an optimization I found when hunting ;) I'm working now on an AutomatonTermsEnum that uses the flex API directly, to test that performance.
One of the major challenges with flex is the 4-way testing required. Ie, you can have a non-flex or flex index, and then you can access it via non-flex or flex API. All 4 are allowed, and must work (for back-compat). I'm most concerned about performance of flex API on top of flex index, since that's the future, but not hurting performance of the other 3 is also important. Mike On Sun, Nov 22, 2009 at 10:22 AM, Robert Muir <rcm...@gmail.com> wrote: > was this why i saw strange benchmark results? > > On Sun, Nov 22, 2009 at 9:52 AM, <mikemcc...@apache.org> wrote: >> >> Author: mikemccand >> Date: Sun Nov 22 14:52:02 2009 >> New Revision: 883088 >> >> URL: http://svn.apache.org/viewvc?rev=883088&view=rev >> Log: >> LUCENE-1458 (on flex branch): small optimization to terms dict cache: >> don't store redundant TermRef >> >> Modified: >> >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java >> >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java >> >> Modified: >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java >> URL: >> http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java?rev=883088&r1=883087&r2=883088&view=diff >> >> ============================================================================== >> --- >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java >> (original) >> +++ >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java >> Sun Nov 22 14:52:02 2009 >> @@ -36,6 +36,8 @@ >> copy(text); >> } >> >> + // nocommit: we could do this w/ UnicodeUtil w/o requiring >> + // allocation of new bytes[]? >> public void copy(String text) { >> try { >> bytes = text.getBytes("UTF-8"); >> >> Modified: >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java >> URL: >> http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=883088&r1=883087&r2=883088&view=diff >> >> ============================================================================== >> --- >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java >> (original) >> +++ >> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java >> Sun Nov 22 14:52:02 2009 >> @@ -304,6 +304,7 @@ >> public SeekStatus seek(TermRef term) throws IOException { >> ReuseLRUCache<TermRef, CacheEntry> cache = null; >> CacheEntry entry = null; >> + TermRef entryKey = null; >> >> if (docs.canCaptureState()) { >> final ThreadResources resources = getThreadResources(); >> @@ -312,7 +313,7 @@ >> entry = cache.get(term); >> if (entry != null) { >> docFreq = entry.freq; >> - bytesReader.term.copy(entry.term); >> + bytesReader.term.copy(term); >> docs.setState(entry, docFreq); >> termUpto = entry.termUpTo; >> // nocommit -- would be better to do this lazy? >> @@ -384,16 +385,17 @@ >> entry = cache.eldest; >> cache.eldest = null; >> docs.captureState(entry); >> - entry.term.copy(bytesReader.term); >> + entryKey = cache.eldestKey; >> + entryKey.copy(bytesReader.term); >> } else { >> entry = docs.captureState(null); >> - entry.term = (TermRef) bytesReader.term.clone(); >> + entryKey = (TermRef) bytesReader.term.clone(); >> } >> entry.freq = docFreq; >> entry.termUpTo = termUpto; >> entry.filePointer = in.getFilePointer(); >> >> - cache.put(entry.term, entry); >> + cache.put(entryKey, entry); >> } >> return SeekStatus.FOUND; >> } else if (cmp > 0) { >> @@ -517,9 +519,8 @@ >> >> // nocommit -- scrutinize API >> public static class CacheEntry { >> - int termUpTo; >> - TermRef term; // nocommit -- really needed? >> - long filePointer; >> + int termUpTo; // ord for this term >> + long filePointer; // fp into the terms >> dict primary file (_X.tis) >> >> // nocommit -- belongs in Pulsing's CacheEntry class: >> public int freq; >> @@ -563,6 +564,7 @@ >> private final static float LOADFACTOR = 0.75f; >> private int cacheSize; >> V eldest; >> + K eldestKey; >> >> /** >> * Creates a last-recently-used cache with the specified size. >> @@ -580,6 +582,7 @@ >> boolean remove = size() > ReuseLRUCache.this.cacheSize; >> if (remove) { >> this.eldest = eldest.getValue(); >> + this.eldestKey = eldest.getKey(); >> } >> return remove; >> } >> >> > > > > -- > Robert Muir > rcm...@gmail.com > --------------------------------------------------------------------- To unsubscribe, e-mail: java-dev-unsubscr...@lucene.apache.org For additional commands, e-mail: java-dev-h...@lucene.apache.org