Yep, your use case is a good fit for using indexes... looping through 300k nodes just to find one particular isn't very efficient. Take a look at http://components.neo4j.org/neo4j.py/ for how to use indexing in the neo4j python bindings.
2010/10/5 Francois Kassis <francois_kas...@hotmail.com> > Hi all, > I am trying to retrieve data from neo4j database using python version. > I have over 300000 nodes all joined to a master node by a OFTYPE > relationships. > > I used the following to traverse it: > > #!/usr/bin/env python > # -*- coding: UTF-8 -*- > # Traversal > import neo4j > > > def get_OFTYPE(ar_node):#, ar_filter): > return OFTYPE(ar_node) > > class OFTYPE(neo4j.Traversal): > # my_pos_filter = "" > # > # def __init__(self, start, pos_filter=""): > # # set an internal variable > # self.my_pos_filter = pos_filter > # neo4j.Traversal.__init__(self, start) > > types = [ > neo4j.Incoming.OFTYPE, > ] > order = neo4j.BREADTH_FIRST > stop = neo4j.StopAtDepth(1) > > def isReturnable(self, position): > return (not position.is_start > # and position.label == self.my_pos_filter > and position.last_relationship.type == 'OFTYPE') > > > > and call the above by: > > #!/usr/bin/env python > # -*- coding: UTF-8 -*- > > import argparse > import neo4j > import ConfigParser > import os, sys, datetime, string > import neoentity_traverse_test > from neo4j.util import Subreference > > def main(): > ls_current_script_path = sys.path[0] # os.getcwd() > ls_current_script_filename = sys.argv[0] > > config = ConfigParser.RawConfigParser() > config.read(ls_current_script_path + '/' + 'neoentity.cfg') > > parser = argparse.ArgumentParser(description='Initialize neo4j database > for mediasharks root enteties. NOTE: if the database '\ > 'does not exists, it will simply be > created.') > parser.add_argument('--neodbpath', dest='neodbpath', > metavar='NEODB-PATH', > default=config.get('database', 'database_path'), > help='a directory where neodb should be created or > opened.') > parser.add_argument('--classpath', dest='kernalclasspath', > metavar='KERNAL-CLASSPATH', > default=config.get('neo4j', 'kernalclasspath'), > help='the path toneo4j kernal path.') > parser.add_argument('--jvm', dest='jvmclasspath', > metavar='JVM-CLASSPATH', > default=config.get('jvm', 'jvmpath'), > help='the path toneo4j kernal path.') > > args = parser.parse_args() > pytest(args.neodbpath, args.kernalclasspath, args.jvmclasspath) > > > def pytest(arg_neodb_path, arg_kernal_classpath, arg_jvm_classpath): > print "=====================================================" > print "initializing neo4j db using parameters:" > print "database-path = " + arg_neodb_path > print "kernel-path = " + arg_kernal_classpath > print "jvm-path = " + arg_jvm_classpath > print "=====================================================" > > #initialize variables > ls_message = "" > > #create new neo database > graphdb = neo4j.GraphDatabase(arg_neodb_path, > classpath=arg_kernal_classpath, jvm=arg_jvm_classpath) > > #start new transaction > try: > tx = graphdb.transaction.begin() > > rootindex = graphdb.index("root_index", create=True) > subbrandnode = rootindex["subbrand"] > referencenode = graphdb.node[0] > > > li_index = 0 > ls_filter = "PEPSI" > for node in neoentity_traverse_test.get_OFTYPE(subbrandnode):#, > ls_filter): > ls_result = node["label"] > if ls_result.startswith(ls_filter): > li_index = li_index + 1 > print ls_result > print li_index > > except: > tx.failure() > print "Error occurred, exiting..." > raise > else: > tx.success() > finally: > tx.finish() > > #saving current transactions and closing current database > graphdb.shutdown() > > > if __name__ == '__main__': > main() > > The problem is it's taking too much time. how can I improve performance and > how can I use or call the lucene indexer from within python. > THX in advance. > > Francois. > _______________________________________________ > Neo4j mailing list > User@lists.neo4j.org > https://lists.neo4j.org/mailman/listinfo/user > -- Mattias Persson, [matt...@neotechnology.com] Hacker, Neo Technology www.neotechnology.com _______________________________________________ Neo4j mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user