Now using lucene-4.0.0-alpha
As the index size increased the indexes do not fit into IO cache.
So I dropped the biggest index for searching.
$ java misc.LuceneKeyValuePerformance write
WRITING
#entries seconds entries/s
1000 0,40 2520
2000 0,29 6863
5000 0,28 18019
10000 0,27 36821
20000 0,23 85280
50000 0,43 115547
100000 0,74 134535
200000 1,46 136860
500000 3,41 146479
1000000 7,00 142820
2000000 13,44 148835
5000000 33,66 148540
10000000 65,66 152298
20000000 141,29 141556
50000000 336,12 148756
$ java misc.LuceneKeyValuePerformance writereverse
WRITING
#entries seconds entries/s
50000000 344,62 145085
20000000 143,08 139786
10000000 66,67 149985
5000000 34,61 144448
2000000 13,39 149358
1000000 6,82 146696
500000 3,50 142745
200000 1,41 141548
100000 0,74 135208
50000 0,39 128984
20000 0,21 94763
10000 0,18 56561
5000 0,17 29339
2000 0,07 27589
1000 0,07 15303
$ du -sk test-lucene-* | sort -n
96 test-lucene-1000
168 test-lucene-2000
388 test-lucene-5000
748 test-lucene-10000
1548 test-lucene-20000
3964 test-lucene-50000
7992 test-lucene-100000
16648 test-lucene-200000
42652 test-lucene-500000
85976 test-lucene-1000000
178600 test-lucene-2000000
456508 test-lucene-5000000
919860 test-lucene-10000000
1925988 test-lucene-20000000
4925280 test-lucene-50000000
$ # sync; echo 3 > /proc/sys/vm/drop_caches
$ free -m
total used free shared buffers cached
Mem: 9970 1465 8505 0 0 64
-/+ buffers/cache: 1399 8571
Swap: 11443 273 11170
$ cat test-lucene-*/* > /dev/null
$ free -m
total used free shared buffers cached
Mem: 9970 5022 4948 0 2 3621
-/+ buffers/cache: 1399 8571
Swap: 11443 273 11170
$ java misc.LuceneKeyValuePerformance read
START JIT WARMUP
1000 1000 0,43 2345
1000 1000 0,11 8812
END JIT WARMUP
READING
#searches #entries seconds Searches/s
1000 1000 0,09 11213
1000 2000 0,08 12771
1000 5000 0,07 15057
1000 10000 0,06 16708
1000 20000 0,04 22594
1000 50000 0,03 32633
1000 100000 0,03 34326
1000 200000 0,06 16163
1000 500000 0,05 19555
1000 1000000 0,09 11266
1000 2000000 0,08 13254
1000 5000000 0,10 10191
1000 10000000 0,11 8734
1000 20000000 0,12 8535
10000 1000 0,16 64349
10000 2000 0,21 48223
10000 5000 0,05 183837
10000 10000 0,04 225421
10000 20000 0,05 211630
10000 50000 0,05 211818
10000 100000 0,06 165344
10000 200000 0,09 108970
10000 500000 0,11 89131
10000 1000000 0,18 55930
10000 2000000 0,16 63920
10000 5000000 0,22 46078
10000 10000000 0,30 33393
10000 20000000 0,29 34944
100000 1000 0,51 195464
100000 2000 0,47 214451
100000 5000 0,49 204164
100000 10000 0,49 202677
100000 20000 0,51 197485
100000 50000 0,52 191373
100000 100000 0,54 185766
100000 200000 0,74 134885
100000 500000 0,97 102861
100000 1000000 1,32 75534
100000 2000000 1,37 72925
100000 5000000 1,88 53283
100000 10000000 2,10 47636
100000 20000000 2,49 40194
1000000 1000 4,58 218414
1000000 2000 4,51 221609
1000000 5000 4,76 210279
1000000 10000 4,73 211334
1000000 20000 4,82 207257
1000000 50000 4,98 200731
1000000 100000 4,96 201668
1000000 200000 7,24 138184
1000000 500000 9,51 105160
1000000 1000000 12,81 78062
1000000 2000000 12,99 76965
1000000 5000000 17,90 55878
1000000 10000000 19,91 50219
1000000 20000000 23,03 43423
REPEAT THE FIRST TEST SEQUENCE
1000 1000 0,00 203849
1000 2000 0,01 169528
1000 5000 0,01 165931
1000 10000 0,01 189977
1000 20000 0,01 179946
1000 50000 0,01 165042
1000 100000 0,01 158967
1000 200000 0,01 103597
1000 500000 0,01 68551
1000 1000000 0,02 47994
1000 2000000 0,02 48608
1000 5000000 0,03 30297
1000 10000000 0,03 29105
1000 20000000 0,04 25194
FINISHED
$ free -m
total used free shared buffers cached
Mem: 9970 5110 4860 0 13 3665
-/+ buffers/cache: 1431 8539
Swap: 11443 268 11175
package misc;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class LuceneKeyValuePerformance {
private static final int[] allNumEntries = {
1000,
2000,
5000,
10000,
20000,
50000,
100000,
200000,
500000,
1000000,
2000000,
5000000,
10000000,
20000000
//50000000
};
private static final int[] allNumSearches = {
1000,
10000,
100000,
1000000
};
public LuceneKeyValuePerformance() {
}
public static void main(final String[] args) throws IOException {
if (args.length == 0) {
throw new IllegalArgumentException("usage: java
LuceneKeyValuePerformance (read | write)");
}
final String mode = args[0];
if ("write".equals(mode)) {
writingTest(allNumEntries);
}
else if ("writereverse".equals(mode)) {
writingTest(reverse(allNumEntries));
}
else if ("read".equals(mode)) {
readingTest(allNumEntries, allNumSearches);
}
}
private static int[] reverse(final int[] array) {
final int[] a = Arrays.copyOf(array, array.length);
for (int i = 0; i < a.length / 2; i++) {
final int i2 = a.length - i - 1;
final int tmp = a[i];
a[i] = a[i2];
a[i2] = tmp;
}
return a;
}
private static void readingTest(final int[] allNumEntries, final int[]
allNumSearches) throws IOException, CorruptIndexException {
System.out.println("START JIT WARMUP");
singleIndexReadingTest(allNumEntries[0], allNumSearches[0]);
singleIndexReadingTest(allNumEntries[0], allNumSearches[0]);
System.out.println("END JIT WARMUP");
System.out.println("READING");
System.out.printf("%-10s %-10s %-10s %-10s\n", "#searches", "#entries",
"seconds", "Searches/s");
for (final int numSearches: allNumSearches) {
for (final int numEntries: allNumEntries) {
singleIndexReadingTest(numSearches, numEntries);
}
}
System.out.println("REPEAT THE FIRST TEST SEQUENCE");
for (final int numEntries: allNumEntries) {
singleIndexReadingTest(allNumSearches[0], numEntries);
}
System.out.println("FINISHED");
}
private static void writingTest(final int[] allNumEntries) throws
IOException, CorruptIndexException, LockObtainFailedException {
System.out.println("WRITING");
System.out.printf("%-10s %-10s %10s\n", "#entries", "seconds",
"entries/s");
for (final int numEntries: allNumEntries) {
final Directory directory = FSDirectory.open(new
File("test-lucene-" + numEntries));
final double duration = writing(directory, numEntries);
System.out.printf("%10d %10.2f %10d\n", numEntries, duration, (int)
(numEntries / duration));
}
}
private static void singleIndexReadingTest(final int numSearches, final int
countEntries) throws IOException, CorruptIndexException {
final Directory directory = FSDirectory.open(new File("test-lucene-" +
countEntries));
final double duration = reading(directory, countEntries, numSearches);
System.out.printf("%10d %10d %10.2f %10d\n", numSearches, countEntries,
duration, (int) (numSearches / duration));
}
private static double writing(final Directory directory, final int
countEntries) throws CorruptIndexException, LockObtainFailedException,
IOException {
final long startTime = System.nanoTime();
final IndexWriterConfig config = new
IndexWriterConfig(Version.LUCENE_40, new KeywordAnalyzer());
config.setOpenMode(OpenMode.CREATE);
final IndexWriter writer = new IndexWriter(directory, config);
for (int k = 0; k < countEntries; k++) {
final String key = makeKey(k);
final String value = new
StringBuilder(64).append("value::").append(key).append(key).append(key).toString();
final Document doc = new Document();
doc.add(new StringField("key", key, Store.YES));
doc.add(new StringField("value", value, Store.YES));
writer.addDocument(doc);
}
writer.close();
final long endTime = System.nanoTime();
final long nanoDuration = endTime - startTime;
final double duration = nanoDuration / 1e9f;
return duration;
}
private static double reading(final Directory directory, final int
countEntries, final int searchEntries) throws CorruptIndexException,
IOException {
final FileWriter fancySink = new FileWriter(new File("fancySink"));
final long startTime = System.nanoTime();
final DirectoryReader reader = DirectoryReader.open(directory);
final IndexSearcher searcher = new IndexSearcher(reader);
int random = 0;
for (int k = 0; k < searchEntries; k++) {
final String key = makeKey(random);
final Query query = new TermQuery(new Term("key", key));
final TopDocs topDocs = searcher.search(query, 1);
final int docId = topDocs.scoreDocs[0].doc;
final Document doc = searcher.doc(docId);
random = (random + 9973) % countEntries; //advance in larger steps,
9973 is prime
// ensure that we read the "value" and there is no fancy
optimization
final String str = doc.get("value");
fancySink.append(str.length() > 1 ? str.charAt(1) : '-');
}
reader.close();
//searcher.close();
fancySink.close();
final long endTime = System.nanoTime();
final long nanoDuration = endTime - startTime;
final double duration = nanoDuration / 1e9f;
return duration;
}
private static String makeKey(final int i) {
return new
StringBuilder(16).append("x").append(i).append("y").toString();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]