I can spend the program, it is attached.

----------------------------------------------------------------------

A short resumee:

* There must be 2 "caches" in the system: The performance degrades
  significantly beyond 1000 and again beyond 200000 documents
  in the index.

* JIT is very significant

* Index start up is very significant allthough the index is in the
  IO cache and the query is the most simple.

* The performance of index size 20000000 is strange.
  Worse performance than 50000000

----------------------------------------------------------------------
Environment:

lucene-3.5.0

$ cat /proc/sys/kernel/osrelease 
2.6.32-5-amd64

$ cat /proc/cpuinfo | tail ...
processor       : 3
vendor_id       : GenuineIntel
cpu family      : 6
model           : 42
model name      : Intel(R) Core(TM) i5-2500 CPU @ 3.30GHz
stepping        : 7
cpu MHz         : 1600.000
cache size      : 6144 KB
physical id     : 0
siblings        : 4
core id         : 3
cpu cores       : 4
apicid          : 6
initial apicid  : 6
fpu             : yes
fpu_exception   : yes
cpuid level     : 13
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm 
constant_tsc arch_perfmon pebs bts rep_good xtopology nonstop_tsc aperfmperf 
pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm sse4_1 
sse4_2 x2apic popcnt aes xsave avx lahf_lm ida arat tpr_shadow vnmi 
flexpriority ept vpid
bogomips        : 6584.99
clflush size    : 64
cache_alignment : 64
address sizes   : 36 bits physical, 48 bits virtual
power management:
----------------------------------------------------------------------

No some results for writing:

$ java misc.LuceneKeyValuePerformance write
WRITING
#entries   seconds     entries/s
      1000       0,28       3607
      2000       0,26       7612
      5000       0,26      19176
     10000       0,20      50419
     20000       0,16     122044
     50000       0,26     191989
    100000       0,38     259987
    200000       0,72     276975
    500000       1,87     267547
   1000000       3,00     333436
   2000000       6,25     320167
   5000000      14,22     351645
  10000000      27,77     360082
  20000000      53,00     377385
  50000000     155,36     321839
 100000000     262,72     380636

# Now writing indexes in the opposite order.
# That show JIT startup

$ java misc.LuceneKeyValuePerformance writereverse
WRITING
#entries   seconds     entries/s
  20000000      56,39     354686
  10000000      26,90     371752
   5000000      13,60     367781
   2000000       6,17     324106
   1000000       2,97     336962
    500000       1,58     315545
    200000       0,59     336558
    100000       0,32     308275
     50000       0,25     198007
     20000       0,14     147298
     10000       0,17      57645
      5000       0,10      50244
      2000       0,12      16364
      1000       0,12       8587

Stefan
package misc;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


public class LuceneKeyValuePerformance {

    private static final int[] allNumEntries = {
        1000,
        2000,
        5000,
        10000,
        20000,
        50000,
        100000,
        200000,
        500000,
        1000000,
        2000000,
        5000000,
        10000000,
        20000000,
        50000000
    };
    private static final int[] allNumSearches = {
        1000,
        10000,
        100000,
        1000000
    };

    public LuceneKeyValuePerformance() {
    }

    public static void main(final String[] args) throws IOException {
        if (args.length == 0) {
            throw new IllegalArgumentException("usage: java 
LuceneKeyValuePerformance (read | write)");
        }
        final String mode = args[0];
        if ("write".equals(mode)) {
            writingTest(allNumEntries);
        }
        else if ("writereverse".equals(mode)) {
            writingTest(reverse(allNumEntries));
        }
        else if ("read".equals(mode)) {
            readingTest(allNumEntries, allNumSearches);
        }
    }

    private static int[] reverse(final int[] array) {
        final int[] a = Arrays.copyOf(array, array.length);
        for (int i = 0; i < a.length / 2; i++) {
            final int i2 = a.length - i - 1;
            final int tmp = a[i];
            a[i] = a[i2];
            a[i2] = tmp;

        }
        return a;
    }

    private static void readingTest(final int[] allNumEntries, final int[] 
allNumSearches) throws IOException, CorruptIndexException {
        System.out.println("START JIT WARMUP");
        singleIndexReadingTest(allNumEntries[0], allNumSearches[0]);
        singleIndexReadingTest(allNumEntries[0], allNumSearches[0]);
        System.out.println("END   JIT WARMUP");
        System.out.println("READING");
        System.out.printf("%-10s %-10s %-10s %-10s\n", "#searches", "#entries", 
"seconds", "Searches/s");
        for (final int numSearches: allNumSearches) {
            for (final int numEntries: allNumEntries) {
                singleIndexReadingTest(numSearches, numEntries);
            }
        }
        System.out.println("REPEAT THE FIRST TEST SEQUENCE");
        for (final int numEntries: allNumEntries) {
            singleIndexReadingTest(allNumSearches[0], numEntries);
        }
        System.out.println("FINISHED");
    }

    private static void writingTest(final int[] allNumEntries) throws 
IOException, CorruptIndexException, LockObtainFailedException {
        System.out.println("WRITING");
        System.out.printf("%-10s %-10s %10s\n", "#entries", "seconds", 
"entries/s");
        for (final int numEntries: allNumEntries) {
            final Directory directory = FSDirectory.open(new 
File("test-lucene-" + numEntries));
            final double duration = writing(directory, numEntries);
            System.out.printf("%10d %10.2f %10d\n", numEntries, duration, (int) 
(numEntries / duration));
        }
    }

    private static void singleIndexReadingTest(final int numSearches, final int 
countEntries) throws IOException, CorruptIndexException {
        final Directory directory = FSDirectory.open(new File("test-lucene-" + 
countEntries));
        final double duration = reading(directory, countEntries, numSearches);
        System.out.printf("%10d %10d %10.2f %10d\n", numSearches, countEntries, 
duration, (int) (numSearches / duration));
    }

    private static double writing(final Directory directory, final int 
countEntries) throws CorruptIndexException, LockObtainFailedException, 
IOException {
        final long startTime = System.nanoTime();
        final IndexWriterConfig config = new 
IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer());
        config.setOpenMode(OpenMode.CREATE);
        final IndexWriter writer = new IndexWriter(directory, config);
        for (int k = 0; k < countEntries; k++) {
            final String key = makeKey(k);
            final String value = new 
StringBuilder(64).append("value::").append(key).append(key).append(key).toString();
            final Document doc = new Document();
            doc.add(new Field("key", key, Store.YES, 
Index.NOT_ANALYZED_NO_NORMS));
            doc.add(new Field("value", value, Store.YES, Index.NO));
            writer.addDocument(doc);
        }
        writer.close();

        final long endTime = System.nanoTime();
        final long nanoDuration = endTime - startTime;
        final double duration = nanoDuration / 1e9f;
        return duration;
    }

    private static double reading(final Directory directory, final int 
countEntries, final int searchEntries) throws CorruptIndexException, 
IOException {
        final FileWriter fancySink = new FileWriter(new File("fancySink"));
        final long startTime = System.nanoTime();
        final IndexReader reader = IndexReader.open(directory, true);
        final IndexSearcher searcher = new IndexSearcher(reader);

        int random = 0;
        for (int k = 0; k < searchEntries; k++) {
            final String key = makeKey(random);
            final Query query = new TermQuery(new Term("key", key));
            final TopDocs topDocs = searcher.search(query, 1);
            final int docId = topDocs.scoreDocs[0].doc;
            final Document doc = searcher.doc(docId);
            random = (random + 9973) % countEntries; //advance in larger steps, 
9973 is prime

            // ensure that we read the "value" and there is no fancy 
optimization
            final String str = doc.get("value");
            fancySink.append(str.length() > 1 ? str.charAt(1) : '-');
        }
        reader.close();
        searcher.close();
        fancySink.close();

        final long endTime = System.nanoTime();
        final long nanoDuration = endTime - startTime;
        final double duration = nanoDuration / 1e9f;
        return duration;
    }

    private static String makeKey(final int i) {
        return new 
StringBuilder(16).append("x").append(i).append("y").toString();
    }

}

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to