was this why i saw strange benchmark results? On Sun, Nov 22, 2009 at 9:52 AM, <mikemcc...@apache.org> wrote:
> Author: mikemccand > Date: Sun Nov 22 14:52:02 2009 > New Revision: 883088 > > URL: http://svn.apache.org/viewvc?rev=883088&view=rev > Log: > LUCENE-1458 (on flex branch): small optimization to terms dict cache: don't > store redundant TermRef > > Modified: > > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java > > > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java > > Modified: > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java > URL: > http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java?rev=883088&r1=883087&r2=883088&view=diff > > ============================================================================== > --- > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java > (original) > +++ > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java > Sun Nov 22 14:52:02 2009 > @@ -36,6 +36,8 @@ > copy(text); > } > > + // nocommit: we could do this w/ UnicodeUtil w/o requiring > + // allocation of new bytes[]? > public void copy(String text) { > try { > bytes = text.getBytes("UTF-8"); > > Modified: > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java > URL: > http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=883088&r1=883087&r2=883088&view=diff > > ============================================================================== > --- > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java > (original) > +++ > lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java > Sun Nov 22 14:52:02 2009 > @@ -304,6 +304,7 @@ > public SeekStatus seek(TermRef term) throws IOException { > ReuseLRUCache<TermRef, CacheEntry> cache = null; > CacheEntry entry = null; > + TermRef entryKey = null; > > if (docs.canCaptureState()) { > final ThreadResources resources = getThreadResources(); > @@ -312,7 +313,7 @@ > entry = cache.get(term); > if (entry != null) { > docFreq = entry.freq; > - bytesReader.term.copy(entry.term); > + bytesReader.term.copy(term); > docs.setState(entry, docFreq); > termUpto = entry.termUpTo; > // nocommit -- would be better to do this lazy? > @@ -384,16 +385,17 @@ > entry = cache.eldest; > cache.eldest = null; > docs.captureState(entry); > - entry.term.copy(bytesReader.term); > + entryKey = cache.eldestKey; > + entryKey.copy(bytesReader.term); > } else { > entry = docs.captureState(null); > - entry.term = (TermRef) bytesReader.term.clone(); > + entryKey = (TermRef) bytesReader.term.clone(); > } > entry.freq = docFreq; > entry.termUpTo = termUpto; > entry.filePointer = in.getFilePointer(); > > - cache.put(entry.term, entry); > + cache.put(entryKey, entry); > } > return SeekStatus.FOUND; > } else if (cmp > 0) { > @@ -517,9 +519,8 @@ > > // nocommit -- scrutinize API > public static class CacheEntry { > - int termUpTo; > - TermRef term; // nocommit -- really needed? > - long filePointer; > + int termUpTo; // ord for this term > + long filePointer; // fp into the terms > dict primary file (_X.tis) > > // nocommit -- belongs in Pulsing's CacheEntry class: > public int freq; > @@ -563,6 +564,7 @@ > private final static float LOADFACTOR = 0.75f; > private int cacheSize; > V eldest; > + K eldestKey; > > /** > * Creates a last-recently-used cache with the specified size. > @@ -580,6 +582,7 @@ > boolean remove = size() > ReuseLRUCache.this.cacheSize; > if (remove) { > this.eldest = eldest.getValue(); > + this.eldestKey = eldest.getKey(); > } > return remove; > } > > > -- Robert Muir rcm...@gmail.com