Re: [Neo4j] Index Performance(Was: Event framework has landed)

Atle Prange Wed, 30 Jun 2010 00:12:12 -0700

Yes, but isn't that what you in practice have to do when using lucene
as an index for values in neo4j? Data could be written all the time,
and then you have to open a new reader every time you do a search, at
least when the index for the value you are searching for has changed.


How is that implemented in neo4j? Do you open a new reader whenever
you do a search on a dirty index?

-atle





On Tue, Jun 29, 2010 at 9:22 AM, Mattias Persson
<matt...@neotechnology.com> wrote:
> I think that calling writer.getReader() and closing it after each lookup
> causes it to be slow. Basically only create a new reader/searcher if the
> underlying writer has written something since last time you used the
> reader/searcher
>
> 2010/6/28 Atle Prange <atle.pra...@gmail.com>
>
>> I looked over the Lucene test. I am no Lucene expert, so when i
>> finally managed to actually find the documents i indexed, it was so
>> slow i cannot imagine i am doing things right:
>>
>> package org.ogrm.test;
>>
>> import java.io.File;
>> import java.io.IOException;
>>
>> import org.apache.lucene.analysis.KeywordAnalyzer;
>> import org.apache.lucene.document.Document;
>> import org.apache.lucene.document.Field;
>> import org.apache.lucene.document.Field.Index;
>> import org.apache.lucene.document.Field.Store;
>> import org.apache.lucene.index.IndexReader;
>> import org.apache.lucene.index.IndexWriter;
>> import org.apache.lucene.index.Term;
>> import org.apache.lucene.index.IndexWriter.MaxFieldLength;
>> import org.apache.lucene.search.IndexSearcher;
>> import org.apache.lucene.search.Query;
>> import org.apache.lucene.search.ScoreDoc;
>> import org.apache.lucene.search.TermQuery;
>> import org.apache.lucene.search.TopDocs;
>> import org.apache.lucene.store.Directory;
>> import org.apache.lucene.store.FSDirectory;
>>
>> public class TestLucene {
>>        private static IndexWriter writer;
>>        private static IndexSearcher searcher;
>>         private static IndexReader reader;
>>
>>        public static void main( String[] args ) throws Exception {
>>                File path = new File( "lcn" );
>>                deleteFileOrDirectory( path );
>>                Directory dir = FSDirectory.open( path );
>>                writer = new IndexWriter( dir, new KeywordAnalyzer(),
>> MaxFieldLength.UNLIMITED );
>>                writer.setMaxBufferedDocs( 100000 );
>>
>>
>>
>>                 int init = 1000;
>>                 int num = 1000;
>>                int base = 0;
>>                int iterations = 1000;
>>                insert( init, base );
>>                base = init;
>>                 Thread.sleep( 1000 );
>>                 for (int i = 0; i < iterations; i++) {
>>                         lookup( num / 10, base );
>>                         insert( num, base );
>>                        base = base + num;
>>                }
>>        }
>>
>>        public static void deleteFileOrDirectory( File file ) {
>>                if (!file.exists()) {
>>                        return;
>>                }
>>
>>                if (file.isDirectory()) {
>>                        for (File child : file.listFiles()) {
>>                                deleteFileOrDirectory( child );
>>                        }
>>                        file.delete();
>>                } else {
>>                        file.delete();
>>                }
>>        }
>>
>>        private static void insert( int num, int base ) throws Exception {
>>                long t = System.currentTimeMillis();
>>                for (int i = num; i < (num + base); i++) {
>>                        Document doc = new Document();
>>                        doc.add( new Field( "_id_", fastToBytes( i ),
>> Store.YES ) );
>>                        doc.add( new Field( "key", "value" + (i % 10000),
>> Store.NO,
>> Index.NOT_ANALYZED ) );
>>                        writer.addDocument( doc );
>>                }
>>                writer.commit();
>>                System.out.println( num + " inserts in " +
>> (System.currentTimeMillis() - t) + " total "+ writer.numDocs());
>>         }
>>
>>        private static void lookup( int num, int base ) throws Exception {
>>         reader = writer.getReader();
>>
>>                searcher = new IndexSearcher( reader );
>>                 long t = System.currentTimeMillis();
>>                int hits = 0;
>>                 for (int i = num; i < (num + base); i++) {
>>                         Query query = new TermQuery( new Term( "key",
>> "value" + (i % 10000) ) );
>>                         TopDocs docs = searcher.search( query, 100 );
>>                         hits += docs.totalHits;
>>                         for (ScoreDoc scoreDoc : docs.scoreDocs) {
>>                                Document doc = searcher.doc( scoreDoc.doc );
>>                                 long id = fastToLong( doc.getBinaryValue(
>> "_id_" ) );
>>                        }
>>                }
>>                reader.close();
>>                System.out.println( num + " hits:" +hits+" time: " +
>> (System.currentTimeMillis() - t) );
>>        }
>>
>>        private static byte[] fastToBytes( long value ) throws IOException {
>>                byte[] array = new byte[8];
>>                for (int i = 0; i < 8; i++) {
>>                        array[7 - i] = (byte) (value >>> (i * 8));
>>                }
>>                return array;
>>        }
>>
>>        private static long fastToLong( byte[] array ) throws IOException {
>>                long value = 0;
>>                for (int i = 0; i < array.length; i++) {
>>                        value <<= 8;
>>                        value ^= (long) array[i] & 0xFF;
>>                }
>>                return value;
>>        }
>> }
>>
>>
>> On Thu, Jun 24, 2010 at 11:29 AM, Atle Prange <atle.pra...@gmail.com>
>> wrote:
>> > I guess i have to brush the dust of my tree knowledge then...
>> >
>> > -atle
>> >
>> > On Thu, 2010-06-24 at 09:43 +0200, Mattias Persson wrote:
>> >> 2010/6/23 Atle Prange <atle.pra...@gmail.com>
>> >>
>> >> > Hm, i'll have to fix that...
>> >> >
>> >> > Any thoughts on a Trie implementation? Would it be able to compete?
>> >> >
>> >> > I have no idea on performance or what would be the best approach. I
>> though
>> >> your alphabet-relationship-types approach sounded quite interesting. Or
>> as a
>> >> b-tree or some other ??-tree.
>> >> >
>> >> > atle
>> >> >
>> >> >
>> >> >
>> >> >
>> >> > On Wed, Jun 23, 2010 at 11:04 AM, Mattias Persson
>> >> > <matt...@neotechnology.com> wrote:
>> >> > > I think the lucene test is flawed since it never returns any results
>> in
>> >> > > lookup method. That's why it's so fast :)
>> >> > >
>> >> > > 2010/6/22 Atle Prange <atle.pra...@gmail.com>
>> >> > >
>> >> > >> Started a new thread since the old got a bit long, if you want to
>> >> > >> catch up read the thread "The event framework has landed".
>> >> > >>
>> >> > >> Okay, i changed the tests to reflect a bit more realistic usage.
>> >> > >>
>> >> > >> The tests first inserts 1M entries to create a base of data. After
>> >> > >> that it makes reads and writes 1000 entries a thousand times.
>> >> > >>
>> >> > >> BabuDB:
>> >> > >> First million: 4s
>> >> > >> 1000 inserts, 4ms
>> >> > >> 1000 lookups: 30ms
>> >> > >>
>> >> > >> Lucene:
>> >> > >> First million entries took 1 ms. This shows the async behavior of
>> >> > Lucene.
>> >> > >> 1000 inserts: about 4 seconds (!)
>> >> > >> 1000 lookups: under 1 ms.
>> >> > >>
>> >> > >> (All numbers extremely approximated, and the numbers can only be
>> seen
>> >> > >> as relative performance indicators)
>> >> > >>
>> >> > >>
>> >> > >> This is what i excpected. Lucene is optimized towards collecting
>> large
>> >> > >> amount of data batchwise, and then handle many searches. (Correct
>> me
>> >> > >> if i am wrong)
>> >> > >> BabuDB "just writes" data and "just reads" them later on.
>> >> > >>
>> >> > >> The test can of course be flawed.
>> >> > >>
>> >> > >> BabuDB test:
>> >> > >>
>> >> > >>
>> >> > >>
>> >> > >> package org.ogrm.test;
>> >> > >>
>> >> > >> import java.io.File;
>> >> > >> import java.io.IOException;
>> >> > >> import java.util.Iterator;
>> >> > >> import java.util.Map.Entry;
>> >> > >>
>> >> > >> import org.apache.commons.lang.math.RandomUtils;
>> >> > >> import org.xtreemfs.babudb.BabuDB;
>> >> > >> import org.xtreemfs.babudb.BabuDBException;
>> >> > >> import org.xtreemfs.babudb.BabuDBFactory;
>> >> > >> import org.xtreemfs.babudb.config.BabuDBConfig;
>> >> > >> import org.xtreemfs.babudb.log.DiskLogger.SyncMode;
>> >> > >> import org.xtreemfs.babudb.lsmdb.BabuDBInsertGroup;
>> >> > >> import org.xtreemfs.babudb.lsmdb.Database;
>> >> > >>
>> >> > >> public class TestBabuDb {
>> >> > >>
>> >> > >>        private static Database db;
>> >> > >>
>> >> > >>        public static void main( String[] args ) throws Exception {
>> >> > >>                deleteFileOrDirectory( new File( "babudb" ) );
>> >> > >>                BabuDB babuDb = BabuDBFactory.createBabuDB( new
>> >> > >> BabuDBConfig(
>> >> > >> "babudb/db", "babudb/log", 1, 1024 * 1024 * 20,
>> >> > >>                                10, SyncMode.ASYNC, 0, 0, false,
>> 512,
>> >> > 1024 *
>> >> > >> 1024 * 100 ) );
>> >> > >>                db = babuDb.getDatabaseManager().createDatabase(
>> "test",
>> >> > 1
>> >> > >> );
>> >> > >>                int init = 1000000;
>> >> > >>                int num = 1000;
>> >> > >>                int base = 0;
>> >> > >>                int iterations = 1000;
>> >> > >>                insert( init, base );
>> >> > >>                base = init;
>> >> > >>                for (int i = 0; i < iterations; i++) {
>> >> > >>                        lookup( num, base );
>> >> > >>                        insert( num, base );
>> >> > >>                        base = base + num;
>> >> > >>                }
>> >> > >>
>> >> > >>                db.shutdown();
>> >> > >>                babuDb.shutdown();
>> >> > >>        }
>> >> > >>
>> >> > >>        private static byte[] fastToBytes( long value ) throws
>> >> > IOException {
>> >> > >>                byte[] array = new byte[8];
>> >> > >>                for (int i = 0; i < 8; i++) {
>> >> > >>                        array[7 - i] = (byte) (value >>> (i * 8));
>> >> > >>                }
>> >> > >>                return array;
>> >> > >>        }
>> >> > >>
>> >> > >>        private static long fastToLong( byte[] array ) throws
>> IOException
>> >> > {
>> >> > >>                long value = 0;
>> >> > >>                for (int i = 0; i < array.length; i++) {
>> >> > >>                        value <<= 8;
>> >> > >>                        value ^= (long) array[i] & 0xFF;
>> >> > >>                }
>> >> > >>                return value;
>> >> > >>        }
>> >> > >>
>> >> > >>        private static byte[] lookupKey( String key, Object value )
>> {
>> >> > >>                return String.valueOf( key + "|" + value + "|"
>> >> > ).getBytes();
>> >> > >>        }
>> >> > >>
>> >> > >>        private static byte[] key( long id, String key, Object value
>> ) {
>> >> > >>                return String.valueOf( key + "|" + value + "|" + id
>> >> > >> ).getBytes();
>> >> > >>        }
>> >> > >>
>> >> > >>        private static void lookup( int num, int start ) throws
>> Exception
>> >> > {
>> >> > >>                long t = System.currentTimeMillis();
>> >> > >>                for (int i = start; i < (start + num); i++) {
>> >> > >>                        Iterator<Entry<byte[], byte[]>> entries =
>> >> > >> db.prefixLookup( 0,
>> >> > >> lookupKey( "key", "value" + i ), null ).get();
>> >> > >>                        while (entries.hasNext()) {
>> >> > >>                                Entry<byte[], byte[]> entry =
>> >> > >> entries.next();
>> >> > >>                                fastToLong( entry.getValue() );
>> >> > >>                        }
>> >> > >>                }
>> >> > >>                System.out.println( num + " lookups:" +
>> >> > >> (System.currentTimeMillis() - t) );
>> >> > >>        }
>> >> > >>
>> >> > >>        private static void insert( int num, int start ) throws
>> Exception
>> >> > {
>> >> > >>                long t = System.currentTimeMillis();
>> >> > >>                BabuDBInsertGroup group = db.createInsertGroup();
>> >> > >>
>> >> > >>                for (int i = start; i < (num + start); i++) {
>> >> > >>                        long id = i;
>> >> > >>                        group.addInsert( 0, key( id, "key", "value"
>> + i %
>> >> > >> 10000 ),
>> >> > >> fastToBytes( id ) );
>> >> > >>                }
>> >> > >>                db.insert( group, null ).get();
>> >> > >>                System.out.println( "insert time (" + num + "):" +
>> >> > >> (System.currentTimeMillis() - t) );
>> >> > >>        }
>> >> > >>
>> >> > >>        public static void deleteFileOrDirectory( File file ) {
>> >> > >>                if (!file.exists()) {
>> >> > >>                        return;
>> >> > >>                }
>> >> > >>
>> >> > >>                if (file.isDirectory()) {
>> >> > >>                        for (File child : file.listFiles()) {
>> >> > >>                                deleteFileOrDirectory( child );
>> >> > >>                        }
>> >> > >>                        file.delete();
>> >> > >>                } else {
>> >> > >>                        file.delete();
>> >> > >>                }
>> >> > >>        }
>> >> > >>
>> >> > >>        private static long randomId() {
>> >> > >>                return RandomUtils.nextLong();
>> >> > >>        }
>> >> > >> }
>> >> > >>
>> >> > >>
>> >> > >> TestLucene
>> >> > >>
>> >> > >>
>> >> > >> package org.ogrm.test;
>> >> > >>
>> >> > >> import java.io.File;
>> >> > >> import java.io.IOException;
>> >> > >>
>> >> > >> import org.apache.lucene.analysis.KeywordAnalyzer;
>> >> > >> import org.apache.lucene.document.Document;
>> >> > >> import org.apache.lucene.document.Field;
>> >> > >> import org.apache.lucene.document.Field.Index;
>> >> > >> import org.apache.lucene.document.Field.Store;
>> >> > >> import org.apache.lucene.index.IndexReader;
>> >> > >> import org.apache.lucene.index.IndexWriter;
>> >> > >> import org.apache.lucene.index.Term;
>> >> > >> import org.apache.lucene.index.IndexWriter.MaxFieldLength;
>> >> > >> import org.apache.lucene.search.IndexSearcher;
>> >> > >> import org.apache.lucene.search.Query;
>> >> > >> import org.apache.lucene.search.ScoreDoc;
>> >> > >> import org.apache.lucene.search.TermQuery;
>> >> > >> import org.apache.lucene.search.TopDocs;
>> >> > >> import org.apache.lucene.store.Directory;
>> >> > >> import org.apache.lucene.store.FSDirectory;
>> >> > >>
>> >> > >> public class TestLucene {
>> >> > >>        private static IndexWriter writer;
>> >> > >>        private static IndexSearcher searcher;
>> >> > >>
>> >> > >>        public static void main( String[] args ) throws Exception {
>> >> > >>                File path = new File( "lcn" );
>> >> > >>                deleteFileOrDirectory( path );
>> >> > >>                Directory dir = FSDirectory.open( path );
>> >> > >>                writer = new IndexWriter( dir, new
>> KeywordAnalyzer(),
>> >> > >> MaxFieldLength.UNLIMITED );
>> >> > >>                writer.setMaxBufferedDocs( 100000 );
>> >> > >>
>> >> > >>                IndexReader reader = writer.getReader();
>> >> > >>                searcher = new IndexSearcher( reader );
>> >> > >>
>> >> > >>                int init = 1000000;
>> >> > >>                int num = 1000;
>> >> > >>                int base = 0;
>> >> > >>                int iterations = 1000;
>> >> > >>                insert( init, base );
>> >> > >>                base = init;
>> >> > >>                for (int i = 0; i < iterations; i++) {
>> >> > >>                        lookup( num, base );
>> >> > >>                        insert( num, base );
>> >> > >>                        base = base + num;
>> >> > >>                }
>> >> > >>        }
>> >> > >>
>> >> > >>        public static void deleteFileOrDirectory( File file ) {
>> >> > >>                if (!file.exists()) {
>> >> > >>                        return;
>> >> > >>                }
>> >> > >>
>> >> > >>                if (file.isDirectory()) {
>> >> > >>                        for (File child : file.listFiles()) {
>> >> > >>                                deleteFileOrDirectory( child );
>> >> > >>                        }
>> >> > >>                        file.delete();
>> >> > >>                } else {
>> >> > >>                        file.delete();
>> >> > >>                }
>> >> > >>        }
>> >> > >>
>> >> > >>        private static void insert( int num, int base ) throws
>> Exception
>> >> > {
>> >> > >>                long t = System.currentTimeMillis();
>> >> > >>                for (int i = num; i < (num + base); i++) {
>> >> > >>                        Document doc = new Document();
>> >> > >>                        doc.add( new Field( "_id_", fastToBytes( i
>> ),
>> >> > >> Store.YES ) );
>> >> > >>                        doc.add( new Field( "key", "value" + i %
>> 10000,
>> >> > >> Store.NO,
>> >> > >> Index.NOT_ANALYZED ) );
>> >> > >>                        writer.addDocument( doc );
>> >> > >>                }
>> >> > >>                writer.commit();
>> >> > >>                System.out.println( num + " inserts in " +
>> >> > >> (System.currentTimeMillis() - t) );
>> >> > >>        }
>> >> > >>
>> >> > >>        private static void lookup( int num, int base ) throws
>> Exception
>> >> > {
>> >> > >>                long t = System.currentTimeMillis();
>> >> > >>                for (int i = 0; i < 100; i++) {
>> >> > >>                        Query query = new TermQuery( new Term(
>> "key",
>> >> > >> "value" + (i + 2500000) ) );
>> >> > >>                        TopDocs docs = searcher.search( query, 100
>> );
>> >> > >>                        for (ScoreDoc scoreDoc : docs.scoreDocs) {
>> >> > >>                                Document doc = searcher.doc(
>> scoreDoc.doc
>> >> > );
>> >> > >>                                fastToLong( doc.getBinaryValue(
>> "_id_" )
>> >> > );
>> >> > >>                        }
>> >> > >>                }
>> >> > >>                System.out.println( num + " get " +
>> >> > >> (System.currentTimeMillis() - t) );
>> >> > >>        }
>> >> > >>
>> >> > >>        private static byte[] fastToBytes( long value ) throws
>> >> > IOException {
>> >> > >>                byte[] array = new byte[8];
>> >> > >>                for (int i = 0; i < 8; i++) {
>> >> > >>                        array[7 - i] = (byte) (value >>> (i * 8));
>> >> > >>                }
>> >> > >>                return array;
>> >> > >>        }
>> >> > >>
>> >> > >>        private static long fastToLong( byte[] array ) throws
>> IOException
>> >> > {
>> >> > >>                long value = 0;
>> >> > >>                for (int i = 0; i < array.length; i++) {
>> >> > >>                        value <<= 8;
>> >> > >>                        value ^= (long) array[i] & 0xFF;
>> >> > >>                }
>> >> > >>                return value;
>> >> > >>        }
>> >> > >> }
>> >> > >>
>> >> > >>
>> >> > >>
>> >> > >> -atle
>> >> > >> _______________________________________________
>> >> > >> Neo4j mailing list
>> >> > >> User@lists.neo4j.org
>> >> > >> https://lists.neo4j.org/mailman/listinfo/user
>> >> > >>
>> >> > >
>> >> > >
>> >> > >
>> >> > > --
>> >> > > Mattias Persson, [matt...@neotechnology.com]
>> >> > > Hacker, Neo Technology
>> >> > > www.neotechnology.com
>> >> > > _______________________________________________
>> >> > > Neo4j mailing list
>> >> > > User@lists.neo4j.org
>> >> > > https://lists.neo4j.org/mailman/listinfo/user
>> >> > >
>> >> > _______________________________________________
>> >> > Neo4j mailing list
>> >> > User@lists.neo4j.org
>> >> > https://lists.neo4j.org/mailman/listinfo/user
>> >> >
>> >>
>> >>
>> >>
>> >
>> >
>> >
>> _______________________________________________
>> Neo4j mailing list
>> User@lists.neo4j.org
>> https://lists.neo4j.org/mailman/listinfo/user
>>
>
>
>
> --
> Mattias Persson, [matt...@neotechnology.com]
> Hacker, Neo Technology
> www.neotechnology.com
> _______________________________________________
> Neo4j mailing list
> User@lists.neo4j.org
> https://lists.neo4j.org/mailman/listinfo/user
>
_______________________________________________
Neo4j mailing list
User@lists.neo4j.org
https://lists.neo4j.org/mailman/listinfo/user

Re: [Neo4j] Index Performance(Was: Event framework has landed)

Reply via email to