I guess i have to brush the dust of my tree knowledge then... -atle
On Thu, 2010-06-24 at 09:43 +0200, Mattias Persson wrote: > 2010/6/23 Atle Prange <atle.pra...@gmail.com> > > > Hm, i'll have to fix that... > > > > Any thoughts on a Trie implementation? Would it be able to compete? > > > > I have no idea on performance or what would be the best approach. I though > your alphabet-relationship-types approach sounded quite interesting. Or as a > b-tree or some other ??-tree. > > > > atle > > > > > > > > > > On Wed, Jun 23, 2010 at 11:04 AM, Mattias Persson > > <matt...@neotechnology.com> wrote: > > > I think the lucene test is flawed since it never returns any results in > > > lookup method. That's why it's so fast :) > > > > > > 2010/6/22 Atle Prange <atle.pra...@gmail.com> > > > > > >> Started a new thread since the old got a bit long, if you want to > > >> catch up read the thread "The event framework has landed". > > >> > > >> Okay, i changed the tests to reflect a bit more realistic usage. > > >> > > >> The tests first inserts 1M entries to create a base of data. After > > >> that it makes reads and writes 1000 entries a thousand times. > > >> > > >> BabuDB: > > >> First million: 4s > > >> 1000 inserts, 4ms > > >> 1000 lookups: 30ms > > >> > > >> Lucene: > > >> First million entries took 1 ms. This shows the async behavior of > > Lucene. > > >> 1000 inserts: about 4 seconds (!) > > >> 1000 lookups: under 1 ms. > > >> > > >> (All numbers extremely approximated, and the numbers can only be seen > > >> as relative performance indicators) > > >> > > >> > > >> This is what i excpected. Lucene is optimized towards collecting large > > >> amount of data batchwise, and then handle many searches. (Correct me > > >> if i am wrong) > > >> BabuDB "just writes" data and "just reads" them later on. > > >> > > >> The test can of course be flawed. > > >> > > >> BabuDB test: > > >> > > >> > > >> > > >> package org.ogrm.test; > > >> > > >> import java.io.File; > > >> import java.io.IOException; > > >> import java.util.Iterator; > > >> import java.util.Map.Entry; > > >> > > >> import org.apache.commons.lang.math.RandomUtils; > > >> import org.xtreemfs.babudb.BabuDB; > > >> import org.xtreemfs.babudb.BabuDBException; > > >> import org.xtreemfs.babudb.BabuDBFactory; > > >> import org.xtreemfs.babudb.config.BabuDBConfig; > > >> import org.xtreemfs.babudb.log.DiskLogger.SyncMode; > > >> import org.xtreemfs.babudb.lsmdb.BabuDBInsertGroup; > > >> import org.xtreemfs.babudb.lsmdb.Database; > > >> > > >> public class TestBabuDb { > > >> > > >> private static Database db; > > >> > > >> public static void main( String[] args ) throws Exception { > > >> deleteFileOrDirectory( new File( "babudb" ) ); > > >> BabuDB babuDb = BabuDBFactory.createBabuDB( new > > >> BabuDBConfig( > > >> "babudb/db", "babudb/log", 1, 1024 * 1024 * 20, > > >> 10, SyncMode.ASYNC, 0, 0, false, 512, > > 1024 * > > >> 1024 * 100 ) ); > > >> db = babuDb.getDatabaseManager().createDatabase( "test", > > 1 > > >> ); > > >> int init = 1000000; > > >> int num = 1000; > > >> int base = 0; > > >> int iterations = 1000; > > >> insert( init, base ); > > >> base = init; > > >> for (int i = 0; i < iterations; i++) { > > >> lookup( num, base ); > > >> insert( num, base ); > > >> base = base + num; > > >> } > > >> > > >> db.shutdown(); > > >> babuDb.shutdown(); > > >> } > > >> > > >> private static byte[] fastToBytes( long value ) throws > > IOException { > > >> byte[] array = new byte[8]; > > >> for (int i = 0; i < 8; i++) { > > >> array[7 - i] = (byte) (value >>> (i * 8)); > > >> } > > >> return array; > > >> } > > >> > > >> private static long fastToLong( byte[] array ) throws IOException > > { > > >> long value = 0; > > >> for (int i = 0; i < array.length; i++) { > > >> value <<= 8; > > >> value ^= (long) array[i] & 0xFF; > > >> } > > >> return value; > > >> } > > >> > > >> private static byte[] lookupKey( String key, Object value ) { > > >> return String.valueOf( key + "|" + value + "|" > > ).getBytes(); > > >> } > > >> > > >> private static byte[] key( long id, String key, Object value ) { > > >> return String.valueOf( key + "|" + value + "|" + id > > >> ).getBytes(); > > >> } > > >> > > >> private static void lookup( int num, int start ) throws Exception > > { > > >> long t = System.currentTimeMillis(); > > >> for (int i = start; i < (start + num); i++) { > > >> Iterator<Entry<byte[], byte[]>> entries = > > >> db.prefixLookup( 0, > > >> lookupKey( "key", "value" + i ), null ).get(); > > >> while (entries.hasNext()) { > > >> Entry<byte[], byte[]> entry = > > >> entries.next(); > > >> fastToLong( entry.getValue() ); > > >> } > > >> } > > >> System.out.println( num + " lookups:" + > > >> (System.currentTimeMillis() - t) ); > > >> } > > >> > > >> private static void insert( int num, int start ) throws Exception > > { > > >> long t = System.currentTimeMillis(); > > >> BabuDBInsertGroup group = db.createInsertGroup(); > > >> > > >> for (int i = start; i < (num + start); i++) { > > >> long id = i; > > >> group.addInsert( 0, key( id, "key", "value" + i % > > >> 10000 ), > > >> fastToBytes( id ) ); > > >> } > > >> db.insert( group, null ).get(); > > >> System.out.println( "insert time (" + num + "):" + > > >> (System.currentTimeMillis() - t) ); > > >> } > > >> > > >> public static void deleteFileOrDirectory( File file ) { > > >> if (!file.exists()) { > > >> return; > > >> } > > >> > > >> if (file.isDirectory()) { > > >> for (File child : file.listFiles()) { > > >> deleteFileOrDirectory( child ); > > >> } > > >> file.delete(); > > >> } else { > > >> file.delete(); > > >> } > > >> } > > >> > > >> private static long randomId() { > > >> return RandomUtils.nextLong(); > > >> } > > >> } > > >> > > >> > > >> TestLucene > > >> > > >> > > >> package org.ogrm.test; > > >> > > >> import java.io.File; > > >> import java.io.IOException; > > >> > > >> import org.apache.lucene.analysis.KeywordAnalyzer; > > >> import org.apache.lucene.document.Document; > > >> import org.apache.lucene.document.Field; > > >> import org.apache.lucene.document.Field.Index; > > >> import org.apache.lucene.document.Field.Store; > > >> import org.apache.lucene.index.IndexReader; > > >> import org.apache.lucene.index.IndexWriter; > > >> import org.apache.lucene.index.Term; > > >> import org.apache.lucene.index.IndexWriter.MaxFieldLength; > > >> import org.apache.lucene.search.IndexSearcher; > > >> import org.apache.lucene.search.Query; > > >> import org.apache.lucene.search.ScoreDoc; > > >> import org.apache.lucene.search.TermQuery; > > >> import org.apache.lucene.search.TopDocs; > > >> import org.apache.lucene.store.Directory; > > >> import org.apache.lucene.store.FSDirectory; > > >> > > >> public class TestLucene { > > >> private static IndexWriter writer; > > >> private static IndexSearcher searcher; > > >> > > >> public static void main( String[] args ) throws Exception { > > >> File path = new File( "lcn" ); > > >> deleteFileOrDirectory( path ); > > >> Directory dir = FSDirectory.open( path ); > > >> writer = new IndexWriter( dir, new KeywordAnalyzer(), > > >> MaxFieldLength.UNLIMITED ); > > >> writer.setMaxBufferedDocs( 100000 ); > > >> > > >> IndexReader reader = writer.getReader(); > > >> searcher = new IndexSearcher( reader ); > > >> > > >> int init = 1000000; > > >> int num = 1000; > > >> int base = 0; > > >> int iterations = 1000; > > >> insert( init, base ); > > >> base = init; > > >> for (int i = 0; i < iterations; i++) { > > >> lookup( num, base ); > > >> insert( num, base ); > > >> base = base + num; > > >> } > > >> } > > >> > > >> public static void deleteFileOrDirectory( File file ) { > > >> if (!file.exists()) { > > >> return; > > >> } > > >> > > >> if (file.isDirectory()) { > > >> for (File child : file.listFiles()) { > > >> deleteFileOrDirectory( child ); > > >> } > > >> file.delete(); > > >> } else { > > >> file.delete(); > > >> } > > >> } > > >> > > >> private static void insert( int num, int base ) throws Exception > > { > > >> long t = System.currentTimeMillis(); > > >> for (int i = num; i < (num + base); i++) { > > >> Document doc = new Document(); > > >> doc.add( new Field( "_id_", fastToBytes( i ), > > >> Store.YES ) ); > > >> doc.add( new Field( "key", "value" + i % 10000, > > >> Store.NO, > > >> Index.NOT_ANALYZED ) ); > > >> writer.addDocument( doc ); > > >> } > > >> writer.commit(); > > >> System.out.println( num + " inserts in " + > > >> (System.currentTimeMillis() - t) ); > > >> } > > >> > > >> private static void lookup( int num, int base ) throws Exception > > { > > >> long t = System.currentTimeMillis(); > > >> for (int i = 0; i < 100; i++) { > > >> Query query = new TermQuery( new Term( "key", > > >> "value" + (i + 2500000) ) ); > > >> TopDocs docs = searcher.search( query, 100 ); > > >> for (ScoreDoc scoreDoc : docs.scoreDocs) { > > >> Document doc = searcher.doc( scoreDoc.doc > > ); > > >> fastToLong( doc.getBinaryValue( "_id_" ) > > ); > > >> } > > >> } > > >> System.out.println( num + " get " + > > >> (System.currentTimeMillis() - t) ); > > >> } > > >> > > >> private static byte[] fastToBytes( long value ) throws > > IOException { > > >> byte[] array = new byte[8]; > > >> for (int i = 0; i < 8; i++) { > > >> array[7 - i] = (byte) (value >>> (i * 8)); > > >> } > > >> return array; > > >> } > > >> > > >> private static long fastToLong( byte[] array ) throws IOException > > { > > >> long value = 0; > > >> for (int i = 0; i < array.length; i++) { > > >> value <<= 8; > > >> value ^= (long) array[i] & 0xFF; > > >> } > > >> return value; > > >> } > > >> } > > >> > > >> > > >> > > >> -atle > > >> _______________________________________________ > > >> Neo4j mailing list > > >> User@lists.neo4j.org > > >> https://lists.neo4j.org/mailman/listinfo/user > > >> > > > > > > > > > > > > -- > > > Mattias Persson, [matt...@neotechnology.com] > > > Hacker, Neo Technology > > > www.neotechnology.com > > > _______________________________________________ > > > Neo4j mailing list > > > User@lists.neo4j.org > > > https://lists.neo4j.org/mailman/listinfo/user > > > > > _______________________________________________ > > Neo4j mailing list > > User@lists.neo4j.org > > https://lists.neo4j.org/mailman/listinfo/user > > > > > _______________________________________________ Neo4j mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user