Hello, Andi.

>> In general, for a large Java heap, you are better off running
>> a recent gcj.  And ideally the collector should be built with
>> -DLARGE_CONFIG, which I don't think it is by default.

AV> I imagine you are running a recent gcj already since you solved the 4gb file
AV> limit, aren't you ?

yes. I build latest gcj (4.0 now) from cvs.

AV> To build a gcj with LARGE_CONFIG, see PyLucene/INSTALL.

Well, I already built gcj with this option.

Also, I have attached file which can reproduce bug about
OutOfMemory on bix index. I use Queue() to initiate search in the
PythonThread. Maybe Queue() can affect on things?..

Tomorrow I am going to port my example to java.

Yura Smolsky.


_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev
#!/usr/bin/python2.4

from PyLucene import *
from Queue import Queue, Empty, Full
import time

class LuceneSearcher:
    def __init__(self, path):
        self.searcher = False
        self.path = path
        self._reloadIndex()
        self.analyzer = StandardAnalyzer()
        self.queryParser = QueryParser("content", self.analyzer)
        self.sort = Sort()
        #self.sort.setSort([SortField("modified", SortField.STRING, True)]) 
        
    """ just store for all variables of searcher """
    def _reloadIndex(self):
        if self.searcher:
            self.searcher.close()
        self.directory = FSDirectory.getDirectory(self.path, False)
        self.searcher = IndexSearcher(self.directory)

class LuceneWorkerThread(PythonThread):
    def __init__(self, luceneSearcher, qIn, qOut, id):
        PythonThread.__init__(self, name='LuceneWorker-%s' % id)
        self.luceneSearcher = luceneSearcher
        self.queueIn = qIn
        self.queueOut = qOut

    def search(self, query):
        query = self.luceneSearcher.queryParser.parseQuery(query)
        topFieldDocs = self.luceneSearcher.searcher.search(query, None, 10, 
self.luceneSearcher.sort)
        for fDoc in topFieldDocs.scoreDocs:
            fDoc = fDoc.toFieldDoc()
            print fDoc.doc, fDoc.score, fDoc.fields
        return topFieldDocs

    def run(self):
        while True:
            #print self.id,
            method, args = self.queueIn.get()
            if method=='search':
                result = self.search(args)
            self.queueOut.put(result)


# main
luceneSearcher = LuceneSearcher('../index/index03')

# for big indexes numberThreads > 5..10 will cause OutOfMemory
numberThreads = 10 
# 

queueWorkers = Queue()
for i in range(numberThreads):
    luceneWorker = LuceneWorkerThread(luceneSearcher, Queue(), Queue(), i)
    luceneWorker.setDaemon(1)
    luceneWorker.start()
    queueWorkers.put(luceneWorker)
        
lw = queueWorkers.get()
# add trigger item to initiate search process
lw.queueIn.put(('search', 'good'))
#wait for response
print lw.queueOut.get()
_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev

Reply via email to