Hm, i'll have to fix that... Any thoughts on a Trie implementation? Would it be able to compete?
atle On Wed, Jun 23, 2010 at 11:04 AM, Mattias Persson <matt...@neotechnology.com> wrote: > I think the lucene test is flawed since it never returns any results in > lookup method. That's why it's so fast :) > > 2010/6/22 Atle Prange <atle.pra...@gmail.com> > >> Started a new thread since the old got a bit long, if you want to >> catch up read the thread "The event framework has landed". >> >> Okay, i changed the tests to reflect a bit more realistic usage. >> >> The tests first inserts 1M entries to create a base of data. After >> that it makes reads and writes 1000 entries a thousand times. >> >> BabuDB: >> First million: 4s >> 1000 inserts, 4ms >> 1000 lookups: 30ms >> >> Lucene: >> First million entries took 1 ms. This shows the async behavior of Lucene. >> 1000 inserts: about 4 seconds (!) >> 1000 lookups: under 1 ms. >> >> (All numbers extremely approximated, and the numbers can only be seen >> as relative performance indicators) >> >> >> This is what i excpected. Lucene is optimized towards collecting large >> amount of data batchwise, and then handle many searches. (Correct me >> if i am wrong) >> BabuDB "just writes" data and "just reads" them later on. >> >> The test can of course be flawed. >> >> BabuDB test: >> >> >> >> package org.ogrm.test; >> >> import java.io.File; >> import java.io.IOException; >> import java.util.Iterator; >> import java.util.Map.Entry; >> >> import org.apache.commons.lang.math.RandomUtils; >> import org.xtreemfs.babudb.BabuDB; >> import org.xtreemfs.babudb.BabuDBException; >> import org.xtreemfs.babudb.BabuDBFactory; >> import org.xtreemfs.babudb.config.BabuDBConfig; >> import org.xtreemfs.babudb.log.DiskLogger.SyncMode; >> import org.xtreemfs.babudb.lsmdb.BabuDBInsertGroup; >> import org.xtreemfs.babudb.lsmdb.Database; >> >> public class TestBabuDb { >> >> private static Database db; >> >> public static void main( String[] args ) throws Exception { >> deleteFileOrDirectory( new File( "babudb" ) ); >> BabuDB babuDb = BabuDBFactory.createBabuDB( new >> BabuDBConfig( >> "babudb/db", "babudb/log", 1, 1024 * 1024 * 20, >> 10, SyncMode.ASYNC, 0, 0, false, 512, 1024 * >> 1024 * 100 ) ); >> db = babuDb.getDatabaseManager().createDatabase( "test", 1 >> ); >> int init = 1000000; >> int num = 1000; >> int base = 0; >> int iterations = 1000; >> insert( init, base ); >> base = init; >> for (int i = 0; i < iterations; i++) { >> lookup( num, base ); >> insert( num, base ); >> base = base + num; >> } >> >> db.shutdown(); >> babuDb.shutdown(); >> } >> >> private static byte[] fastToBytes( long value ) throws IOException { >> byte[] array = new byte[8]; >> for (int i = 0; i < 8; i++) { >> array[7 - i] = (byte) (value >>> (i * 8)); >> } >> return array; >> } >> >> private static long fastToLong( byte[] array ) throws IOException { >> long value = 0; >> for (int i = 0; i < array.length; i++) { >> value <<= 8; >> value ^= (long) array[i] & 0xFF; >> } >> return value; >> } >> >> private static byte[] lookupKey( String key, Object value ) { >> return String.valueOf( key + "|" + value + "|" ).getBytes(); >> } >> >> private static byte[] key( long id, String key, Object value ) { >> return String.valueOf( key + "|" + value + "|" + id >> ).getBytes(); >> } >> >> private static void lookup( int num, int start ) throws Exception { >> long t = System.currentTimeMillis(); >> for (int i = start; i < (start + num); i++) { >> Iterator<Entry<byte[], byte[]>> entries = >> db.prefixLookup( 0, >> lookupKey( "key", "value" + i ), null ).get(); >> while (entries.hasNext()) { >> Entry<byte[], byte[]> entry = >> entries.next(); >> fastToLong( entry.getValue() ); >> } >> } >> System.out.println( num + " lookups:" + >> (System.currentTimeMillis() - t) ); >> } >> >> private static void insert( int num, int start ) throws Exception { >> long t = System.currentTimeMillis(); >> BabuDBInsertGroup group = db.createInsertGroup(); >> >> for (int i = start; i < (num + start); i++) { >> long id = i; >> group.addInsert( 0, key( id, "key", "value" + i % >> 10000 ), >> fastToBytes( id ) ); >> } >> db.insert( group, null ).get(); >> System.out.println( "insert time (" + num + "):" + >> (System.currentTimeMillis() - t) ); >> } >> >> public static void deleteFileOrDirectory( File file ) { >> if (!file.exists()) { >> return; >> } >> >> if (file.isDirectory()) { >> for (File child : file.listFiles()) { >> deleteFileOrDirectory( child ); >> } >> file.delete(); >> } else { >> file.delete(); >> } >> } >> >> private static long randomId() { >> return RandomUtils.nextLong(); >> } >> } >> >> >> TestLucene >> >> >> package org.ogrm.test; >> >> import java.io.File; >> import java.io.IOException; >> >> import org.apache.lucene.analysis.KeywordAnalyzer; >> import org.apache.lucene.document.Document; >> import org.apache.lucene.document.Field; >> import org.apache.lucene.document.Field.Index; >> import org.apache.lucene.document.Field.Store; >> import org.apache.lucene.index.IndexReader; >> import org.apache.lucene.index.IndexWriter; >> import org.apache.lucene.index.Term; >> import org.apache.lucene.index.IndexWriter.MaxFieldLength; >> import org.apache.lucene.search.IndexSearcher; >> import org.apache.lucene.search.Query; >> import org.apache.lucene.search.ScoreDoc; >> import org.apache.lucene.search.TermQuery; >> import org.apache.lucene.search.TopDocs; >> import org.apache.lucene.store.Directory; >> import org.apache.lucene.store.FSDirectory; >> >> public class TestLucene { >> private static IndexWriter writer; >> private static IndexSearcher searcher; >> >> public static void main( String[] args ) throws Exception { >> File path = new File( "lcn" ); >> deleteFileOrDirectory( path ); >> Directory dir = FSDirectory.open( path ); >> writer = new IndexWriter( dir, new KeywordAnalyzer(), >> MaxFieldLength.UNLIMITED ); >> writer.setMaxBufferedDocs( 100000 ); >> >> IndexReader reader = writer.getReader(); >> searcher = new IndexSearcher( reader ); >> >> int init = 1000000; >> int num = 1000; >> int base = 0; >> int iterations = 1000; >> insert( init, base ); >> base = init; >> for (int i = 0; i < iterations; i++) { >> lookup( num, base ); >> insert( num, base ); >> base = base + num; >> } >> } >> >> public static void deleteFileOrDirectory( File file ) { >> if (!file.exists()) { >> return; >> } >> >> if (file.isDirectory()) { >> for (File child : file.listFiles()) { >> deleteFileOrDirectory( child ); >> } >> file.delete(); >> } else { >> file.delete(); >> } >> } >> >> private static void insert( int num, int base ) throws Exception { >> long t = System.currentTimeMillis(); >> for (int i = num; i < (num + base); i++) { >> Document doc = new Document(); >> doc.add( new Field( "_id_", fastToBytes( i ), >> Store.YES ) ); >> doc.add( new Field( "key", "value" + i % 10000, >> Store.NO, >> Index.NOT_ANALYZED ) ); >> writer.addDocument( doc ); >> } >> writer.commit(); >> System.out.println( num + " inserts in " + >> (System.currentTimeMillis() - t) ); >> } >> >> private static void lookup( int num, int base ) throws Exception { >> long t = System.currentTimeMillis(); >> for (int i = 0; i < 100; i++) { >> Query query = new TermQuery( new Term( "key", >> "value" + (i + 2500000) ) ); >> TopDocs docs = searcher.search( query, 100 ); >> for (ScoreDoc scoreDoc : docs.scoreDocs) { >> Document doc = searcher.doc( scoreDoc.doc ); >> fastToLong( doc.getBinaryValue( "_id_" ) ); >> } >> } >> System.out.println( num + " get " + >> (System.currentTimeMillis() - t) ); >> } >> >> private static byte[] fastToBytes( long value ) throws IOException { >> byte[] array = new byte[8]; >> for (int i = 0; i < 8; i++) { >> array[7 - i] = (byte) (value >>> (i * 8)); >> } >> return array; >> } >> >> private static long fastToLong( byte[] array ) throws IOException { >> long value = 0; >> for (int i = 0; i < array.length; i++) { >> value <<= 8; >> value ^= (long) array[i] & 0xFF; >> } >> return value; >> } >> } >> >> >> >> -atle >> _______________________________________________ >> Neo4j mailing list >> User@lists.neo4j.org >> https://lists.neo4j.org/mailman/listinfo/user >> > > > > -- > Mattias Persson, [matt...@neotechnology.com] > Hacker, Neo Technology > www.neotechnology.com > _______________________________________________ > Neo4j mailing list > User@lists.neo4j.org > https://lists.neo4j.org/mailman/listinfo/user > _______________________________________________ Neo4j mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user