Adding a microbenchmark of create and query performance.
Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/1f597bb7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/1f597bb7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/1f597bb7 Branch: refs/heads/develop Commit: 1f597bb707fb8482a3808e085c86d3c6b9d25fcf Parents: d623cf6 Author: Dan Smith <[email protected]> Authored: Fri Oct 16 14:53:58 2015 -0700 Committer: Dan Smith <[email protected]> Committed: Fri Oct 16 14:54:50 2015 -0700 ---------------------------------------------------------------------- ...IndexRepositoryImplJUnitPerformanceTest.java | 422 +++++++++++++++++++ 1 file changed, 422 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/1f597bb7/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java ---------------------------------------------------------------------- diff --git a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java new file mode 100644 index 0000000..8641254 --- /dev/null +++ b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java @@ -0,0 +1,422 @@ +package com.gemstone.gemfire.cache.lucene.internal.repository; + +import static org.junit.Assert.*; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.RAMDirectory; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.carrotsearch.randomizedtesting.generators.RandomStrings; +import com.gemstone.gemfire.DataSerializable; +import com.gemstone.gemfire.DataSerializer; +import com.gemstone.gemfire.cache.Cache; +import com.gemstone.gemfire.cache.CacheFactory; +import com.gemstone.gemfire.cache.PartitionAttributesFactory; +import com.gemstone.gemfire.cache.Region; +import com.gemstone.gemfire.cache.RegionShortcut; +import com.gemstone.gemfire.cache.asyncqueue.AsyncEventQueue; +import com.gemstone.gemfire.cache.lucene.LuceneIndex; +import com.gemstone.gemfire.cache.lucene.LuceneQuery; +import com.gemstone.gemfire.cache.lucene.LuceneQueryProvider; +import com.gemstone.gemfire.cache.lucene.LuceneQueryResults; +import com.gemstone.gemfire.cache.lucene.LuceneService; +import com.gemstone.gemfire.cache.lucene.LuceneServiceProvider; +import com.gemstone.gemfire.cache.lucene.internal.LuceneServiceImpl; +import com.gemstone.gemfire.cache.lucene.internal.directory.RegionDirectory; +import com.gemstone.gemfire.cache.lucene.internal.distributed.TopEntriesCollector; +import com.gemstone.gemfire.cache.lucene.internal.filesystem.ChunkKey; +import com.gemstone.gemfire.cache.lucene.internal.filesystem.File; +import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.HeterogenousLuceneSerializer; +import com.gemstone.gemfire.cache.query.QueryException; +import com.gemstone.gemfire.test.junit.categories.PerformanceTest; + +/** + * Microbenchmark of the IndexRepository to compare an + * IndexRepository built on top of cache with a + * stock lucene IndexWriter with a RAMDirectory. + */ +@Category(PerformanceTest.class) +public class IndexRepositoryImplJUnitPerformanceTest { + + private static final int NUM_WORDS = 1000; + private static int[] COMMIT_INTERVAL = new int[] {100, 1000, 5000}; + private static int NUM_ENTRIES = 500_000; + private static int NUM_QUERIES = 500_000; + + private StandardAnalyzer analyzer = new StandardAnalyzer(); + + @Test + public void testIndexRepository() throws Exception { + + + doTest("IndexRepository", new TestCallbacks() { + + private Cache cache; + private IndexRepositoryImpl repo; + private IndexWriter writer; + + @Override + public void addObject(String key, String text) throws Exception { + repo.create(key, new TestObject(text)); + } + + @Override + public void commit() throws Exception { + repo.commit(); + } + + @Override + public void init() throws Exception { + cache = new CacheFactory().set("mcast-port", "0") + .set("log-level", "error") + .create(); + Region<String, File> fileRegion = cache.<String, File>createRegionFactory(RegionShortcut.REPLICATE).create("files"); + Region<ChunkKey, byte[]> chunkRegion = cache.<ChunkKey, byte[]>createRegionFactory(RegionShortcut.REPLICATE).create("chunks"); + + RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion); + + + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + String[] indexedFields= new String[] {"text"}; + HeterogenousLuceneSerializer mapper = new HeterogenousLuceneSerializer(indexedFields); + repo = new IndexRepositoryImpl(fileRegion, writer, mapper); + } + + @Override + public void cleanup() throws IOException { + writer.close(); + cache.close(); + } + + @Override + public void waitForAsync() throws Exception { + //do nothing + } + + @Override + public int query(Query query) throws IOException { + TopEntriesCollector collector = new TopEntriesCollector(); + repo.query(query, 100, collector); + return collector.size(); + } + }); + } + + /** + * Test our full lucene index implementation + * @throws Exception + */ + @Test + public void testLuceneIndex() throws Exception { + + + doTest("LuceneIndex", new TestCallbacks() { + + private Cache cache; + private Region<String, TestObject> region; + private LuceneService service; + + @Override + public void addObject(String key, String text) throws Exception { + region.create(key, new TestObject(text)); + } + + @Override + public void commit() throws Exception { + //NA + } + + @Override + public void init() throws Exception { + cache = new CacheFactory().set("mcast-port", "0") + .set("log-level", "warning") + .create(); + service = LuceneServiceProvider.get(cache); + service.createIndex("index", "/region", "text"); + region = cache.<String, TestObject>createRegionFactory(RegionShortcut.PARTITION) + .setPartitionAttributes(new PartitionAttributesFactory<>().setTotalNumBuckets(1).create()) + .create("region"); + } + + @Override + public void cleanup() throws IOException { + cache.close(); + } + + @Override + public void waitForAsync() throws Exception { + AsyncEventQueue aeq = cache.getAsyncEventQueue(LuceneServiceImpl.getUniqueIndexName("index", "/region")); + + //We will be at most 10 ms off + while(aeq.size() > 0) { + Thread.sleep(10); + } + } + + @Override + public int query(final Query query) throws Exception { + LuceneQuery<Object, Object> luceneQuery = service.createLuceneQueryFactory().create("index", "/region", new LuceneQueryProvider() { + + @Override + public Query getQuery(LuceneIndex index) throws QueryException { + return query; + } + }); + + LuceneQueryResults<Object, Object> results = luceneQuery.search(); + return results.size(); + } + }); + } + + @Test + public void testLuceneWithRegionDirectory() throws Exception { + doTest("RegionDirectory", new TestCallbacks() { + + private IndexWriter writer; + private SearcherManager searcherManager; + + @Override + public void init() throws Exception { + RegionDirectory dir = new RegionDirectory(new ConcurrentHashMap<String, File>(), new ConcurrentHashMap<ChunkKey, byte[]>()); + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + searcherManager = new SearcherManager(writer, true, null); + } + + @Override + public void addObject(String key, String text) throws Exception { + Document doc = new Document(); + doc.add(new TextField("key", key, Store.YES)); + doc.add(new TextField("text", text, Store.NO)); + writer.addDocument(doc); + } + + @Override + public void commit() throws Exception { + writer.commit(); + searcherManager.maybeRefresh(); + } + + @Override + public void cleanup() throws Exception { + writer.close(); + } + + @Override + public void waitForAsync() throws Exception { + //do nothing + } + + @Override + public int query(Query query) throws Exception { + IndexSearcher searcher = searcherManager.acquire(); + try { + return searcher.count(query); + } finally { + searcherManager.release(searcher); + } + } + + }); + + } + + @Test + public void testLucene() throws Exception { + doTest("Lucene", new TestCallbacks() { + + private IndexWriter writer; + private SearcherManager searcherManager; + + @Override + public void init() throws Exception { + RAMDirectory dir = new RAMDirectory(); + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + searcherManager = new SearcherManager(writer, true, null); + } + + @Override + public void addObject(String key, String text) throws Exception { + Document doc = new Document(); + doc.add(new TextField("key", key, Store.YES)); + doc.add(new TextField("text", text, Store.NO)); + writer.addDocument(doc); + } + + @Override + public void commit() throws Exception { + writer.commit(); + searcherManager.maybeRefresh(); + } + + @Override + public void cleanup() throws Exception { + writer.close(); + } + + @Override + public void waitForAsync() throws Exception { + //do nothing + } + + @Override + public int query(Query query) throws Exception { + IndexSearcher searcher = searcherManager.acquire(); + try { + return searcher.count(query); + } finally { + searcherManager.release(searcher); + } + } + + }); + + } + + private void doTest(String testName, TestCallbacks callbacks) throws Exception { + + //Create some random words. We need to be careful + //to make sure we get NUM_WORDS distinct words here + Set<String> wordSet = new HashSet<String>(); + Random rand = new Random(); + while(wordSet.size() < NUM_WORDS) { + int length = rand.nextInt(12) + 3; + char[] text = new char[length]; + for(int i = 0; i < length; i++) { + text[i] = (char) (rand.nextInt(26) + 97); + } + wordSet.add(new String(text)); + } + List<String> words = new ArrayList<String>(wordSet.size()); + words.addAll(wordSet); + + + + //warm up + writeRandomWords(callbacks, words, rand, NUM_ENTRIES / 10, NUM_QUERIES / 10, COMMIT_INTERVAL[0]); + + //Do the actual test + + for(int i = 0; i < COMMIT_INTERVAL.length; i++) { + Results results = writeRandomWords(callbacks, words, rand, NUM_ENTRIES, NUM_QUERIES / 10, COMMIT_INTERVAL[i]); + + System.out.println(testName + " writes(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.writeTime)); + System.out.println(testName + " queries(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.queryTime)); + } + } + + private Results writeRandomWords(TestCallbacks callbacks, List<String> words, + Random rand, int numEntries, int numQueries, int commitInterval) throws Exception { + Results results = new Results(); + callbacks.init(); + int[] counts = new int[words.size()]; + long start = System.nanoTime(); + try { + for(int i =0; i < numEntries; i++) { + int word1 = rand.nextInt(words.size()); + int word2 = rand.nextInt(words.size()); + counts[word1]++; + counts[word2]++; + String value = words.get(word1) + " " + words.get(word2); + callbacks.addObject("key" + i, value); + + if(i % commitInterval == 0 && i != 0) { + callbacks.commit(); + } + } + callbacks.commit(); + callbacks.waitForAsync(); + long end = System.nanoTime(); + results.writeTime = end - start; + + + start = System.nanoTime(); + for(int i=0; i < numQueries; i++) { + int wordIndex = rand.nextInt(words.size()); + String word = words.get(wordIndex); + Query query = new TermQuery(new Term("text", word)); + int size = callbacks.query(query); +// int size = callbacks.query(parser.parse(word)); + //All of my tests sometimes seem to be missing a couple of words, including the stock lucene +// assertEquals("Error on query " + i + " word=" + word, counts[wordIndex], size); + } + end = System.nanoTime(); + results.queryTime = end - start; + + return results; + } finally { + callbacks.cleanup(); + } + } + + private static class TestObject implements DataSerializable { + private String text; + + public TestObject() { + + } + + public TestObject(String text) { + super(); + this.text = text; + } + + @Override + public void toData(DataOutput out) throws IOException { + DataSerializer.writeString(text, out); + } + + @Override + public void fromData(DataInput in) + throws IOException, ClassNotFoundException { + text = DataSerializer.readString(in); + } + + @Override + public String toString() { + return text; + } + + + } + + private interface TestCallbacks { + public void init() throws Exception; + public int query(Query query) throws Exception; + public void addObject(String key, String text) throws Exception; + public void commit() throws Exception; + public void waitForAsync() throws Exception; + public void cleanup() throws Exception; + } + + private static class Results { + long writeTime; + long queryTime; + } +}
