Yep, your use case is a good fit for using indexes... looping through 300k
nodes just to find one particular isn't very efficient. Take a look at
http://components.neo4j.org/neo4j.py/ for how to use indexing in the neo4j
python bindings.

2010/10/5 Francois Kassis <francois_kas...@hotmail.com>

> Hi all,
> I am trying to retrieve data from neo4j database using python version.
> I have over 300000 nodes all joined to a master node by a OFTYPE
> relationships.
>
> I used the following to traverse it:
>
> #!/usr/bin/env python
> # -*- coding: UTF-8 -*-
> # Traversal
> import neo4j
>
>
> def get_OFTYPE(ar_node):#, ar_filter):
>    return OFTYPE(ar_node)
>
> class OFTYPE(neo4j.Traversal):
> #    my_pos_filter = ""
> #
> #    def __init__(self, start, pos_filter=""):
> #        # set an internal variable
> #        self.my_pos_filter = pos_filter
> #        neo4j.Traversal.__init__(self, start)
>
>    types = [
>        neo4j.Incoming.OFTYPE,
>        ]
>    order = neo4j.BREADTH_FIRST
>    stop = neo4j.StopAtDepth(1)
>
>    def isReturnable(self, position):
>        return (not position.is_start
> #                and position.label == self.my_pos_filter
>                and position.last_relationship.type == 'OFTYPE')
>
>
>
> and call the above by:
>
> #!/usr/bin/env python
> # -*- coding: UTF-8 -*-
>
> import argparse
> import neo4j
> import ConfigParser
> import os, sys, datetime, string
> import neoentity_traverse_test
> from neo4j.util import Subreference
>
> def main():
>    ls_current_script_path = sys.path[0] # os.getcwd()
>    ls_current_script_filename = sys.argv[0]
>
>    config = ConfigParser.RawConfigParser()
>    config.read(ls_current_script_path + '/' + 'neoentity.cfg')
>
>    parser = argparse.ArgumentParser(description='Initialize neo4j database
> for mediasharks root enteties. NOTE: if the database '\
>                                     'does not exists, it will simply be
> created.')
>    parser.add_argument('--neodbpath', dest='neodbpath',
> metavar='NEODB-PATH',
>                        default=config.get('database', 'database_path'),
>                        help='a directory where neodb should be created or
> opened.')
>    parser.add_argument('--classpath', dest='kernalclasspath',
> metavar='KERNAL-CLASSPATH',
>                        default=config.get('neo4j', 'kernalclasspath'),
>                        help='the path toneo4j kernal path.')
>    parser.add_argument('--jvm', dest='jvmclasspath',
> metavar='JVM-CLASSPATH',
>                        default=config.get('jvm', 'jvmpath'),
>                        help='the path toneo4j kernal path.')
>
>    args = parser.parse_args()
>    pytest(args.neodbpath, args.kernalclasspath, args.jvmclasspath)
>
>
> def pytest(arg_neodb_path, arg_kernal_classpath, arg_jvm_classpath):
>    print "====================================================="
>    print "initializing neo4j db using parameters:"
>    print "database-path = " + arg_neodb_path
>    print "kernel-path = " + arg_kernal_classpath
>    print "jvm-path = " + arg_jvm_classpath
>    print "====================================================="
>
>    #initialize variables
>    ls_message = ""
>
>    #create new neo database
>    graphdb = neo4j.GraphDatabase(arg_neodb_path,
> classpath=arg_kernal_classpath, jvm=arg_jvm_classpath)
>
>    #start new transaction
>    try:
>        tx = graphdb.transaction.begin()
>
>        rootindex = graphdb.index("root_index", create=True)
>        subbrandnode = rootindex["subbrand"]
>        referencenode = graphdb.node[0]
>
>
>        li_index = 0
>        ls_filter = "PEPSI"
>        for node in neoentity_traverse_test.get_OFTYPE(subbrandnode):#,
> ls_filter):
>            ls_result = node["label"]
>            if ls_result.startswith(ls_filter):
>                li_index = li_index + 1
>                print ls_result
>        print li_index
>
>    except:
>        tx.failure()
>        print "Error occurred, exiting..."
>        raise
>    else:
>        tx.success()
>    finally:
>        tx.finish()
>
>    #saving current transactions and closing current database
>    graphdb.shutdown()
>
>
> if __name__ == '__main__':
>    main()
>
> The problem is it's taking too much time. how can I improve performance and
> how can I use or call the lucene indexer from within python.
> THX in advance.
>
> Francois.
> _______________________________________________
> Neo4j mailing list
> User@lists.neo4j.org
> https://lists.neo4j.org/mailman/listinfo/user
>



-- 
Mattias Persson, [matt...@neotechnology.com]
Hacker, Neo Technology
www.neotechnology.com
_______________________________________________
Neo4j mailing list
User@lists.neo4j.org
https://lists.neo4j.org/mailman/listinfo/user

Reply via email to