Author: yonik
Date: Tue Jun 16 15:34:07 2009
New Revision: 785258
URL: http://svn.apache.org/viewvc?rev=785258&view=rev
Log:
SOLR-1220: use doubling strategy for keeping track of the number of each term
in UnInvertedField
Modified:
lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
Modified:
lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java?rev=785258&r1=785257&r2=785258&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java Tue
Jun 16 15:34:07 2009
@@ -223,9 +223,9 @@
int termNum = te.getTermNumber();
if (termNum >= maxTermCounts.length) {
- // resize, but conserve memory by not doubling
- // resize at end??? we waste a maximum of 16K (average of 8K)
- int[] newMaxTermCounts = new int[maxTermCounts.length+4096];
+ // resize by doubling - for very large number of unique terms,
expanding
+ // by 4K and resultant GC will dominate uninvert times. Resize at end
if material
+ int[] newMaxTermCounts = new int[maxTermCounts.length*2];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
maxTermCounts = newMaxTermCounts;
}
@@ -332,6 +332,14 @@
numTermsInField = te.getTermNumber();
te.close();
+ // free space if outrageously wasteful (tradeoff memory/cpu)
+
+ if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
+ int[] newMaxTermCounts = new int[numTermsInField];
+ System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
+ maxTermCounts = newMaxTermCounts;
+ }
+
long midPoint = System.currentTimeMillis();
if (termInstances == 0) {