Hello.
I wanted to try splitting my index up into two slices and indexing each in
separate threads to see if it would run faster on a dual-proc box, but my
script began segfaulting as soon as threading was added. This is the first
time I've ever used threads in Python, so I might be doing something
obviously stupid.
Anyway, I pared down the script to a minimal test case that still yields a
segfault. Here is the code:
---
#!/usr/bin/python
import os
import sys
import threading
import PyLucene
class Indexer(object):
def __init__(self, index_dir):
self.index_dir = index_dir
if not os.path.exists(index_dir):
os.mkdir(index_dir)
def run(self):
worker1 = Worker(self.index_dir + '/1', 1)
worker2 = Worker(self.index_dir + '/2', 2)
worker1.start()
worker2.start()
while (worker1.isAlive() or worker2.isAlive()):
pass
class Worker(threading.Thread):
def __init__(self, index_dir, worker_id):
threading.Thread.__init__(self)
self.index_dir = index_dir
self.worker_id = worker_id
if not os.path.exists(index_dir):
os.mkdir(index_dir)
def run(self):
print 'woo hoo: ' + self.index_dir
self.store = PyLucene.FSDirectory.getDirectory(self.index_dir, True)
self.store.close()
if __name__ == '__main__':
if len(sys.argv) < 2:
print "Usage: python " + __file__ + " <index_dir>"
sys.exit(1)
print 'PyLucene', PyLucene.VERSION, 'Lucene', PyLucene.LUCENE_VERSION
indexer = Indexer(sys.argv[1])
indexer.run()
---
The output is as follows:
[EMAIL PROTECTED] ~/bin]$ lucene_segfault_demo /tmp
PyLucene 2.1.0-1 Lucene 2.1.0-509013
woo hoo: /tmp/1
Segmentation fault
Any ideas?
-ofer
_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev