Hello, pylucene-dev.

I use ParallelMultiSearcher and I construct manually variables passed
to search, searchSorted methods of Searchable implementation.
I got this exception when I use sort and ParallerMultiSearcher:

scoreDoc: 261
scoreDoc: 211
... skipped ...
scoreDoc: 142
scoreDoc: 144
Exception in thread "MultiSearcher thread #1" java.lang.ClassCastException: 
org.apache.lucene.search.ScoreDoc cannot be cast to
org.apache.lucene.search.FieldDoc
   at 0x012ce0ae (Unknown Source)
   ... skipped
hits: 18

if I do not use sort or if I use Searchable implementation through
searchSorted then everything is ok.
I do not understand this.. Who is messing the data?..
Maybe method searchSorted of implementation of Searchable generates
"bad" PyLucene classes?..

Thanks in advance.

Yura Smolsky
#!/usr/bin/python2.4

import sys, re, threading, feedfinder, urlparse
import myCORBA, ServerCarcass, Servers, Servers__POA
import os, time         
from Queue import Queue, Empty, Full
from PyLucene import *

# CORBA Server. it contains IndexSearcher inside and dispatch messages from
# outside to IndexSearcher
class RemoteSearcher_i (Servers__POA.RemoteSearcher, 
ServerCarcass.ServerCarcass_i):
    URI = "Servers.RemoteSearcher"

    def __init__(self, num, path):
        self.URI += str(num)
        ServerCarcass.ServerCarcass_i.__init__(self)

        self.searcher = False
        self.path = path
        self._reloadIndex()
        
        self.analyzer = SnowballAnalyzer("English", 
StopAnalyzer.ENGLISH_STOP_WORDS)
        self.queryParser = QueryParser("content", self.analyzer)
        self.sort = Sort()
        self.sort.setSort([SortField("modified", SortField.STRING, True)]) #, 
SortField(None, SortField.SCORE),
        self.queryParser.setOperator(1)

    def _reloadIndex(self):
        if self.searcher:
            self.searcher.close()
            
        self.directory = FSDirectory.getDirectory(self.path, False)
        self.searcher = IndexSearcher(self.directory)
        
    def __del__(self):
        pass
    
    def search(self, queryText, n):
        query = self.queryParser.parseQuery(queryText)
        topDocs = self.searcher.search(query, None, n)
        scoreDocs = []
        for scoreDoc in topDocs.scoreDocs:
                scoreDocs.append(Servers.cScoreDoc(scoreDoc.doc, 
scoreDoc.score))

        topDocs = Servers.cTopDocs(topDocs.totalHits, scoreDocs)
        return topDocs
    
    def searchSorted(self, queryText, n):
        query = self.queryParser.parseQuery(queryText)
        topFieldDocs = self.searcher.search(query, None, n, self.sort)
        
        scoreDocs = []
        for scoreDoc in topFieldDocs.scoreDocs:
                scoreDocs.append(Servers.cScoreDoc(scoreDoc.doc, 
scoreDoc.score))
        sortFields = []
        for sortField in topFieldDocs.fields:
                sortFields.append(Servers.cSortField(sortField.getField(), 
sortField.getType(), sortField.getReverse()))
        
        return Servers.cTopFieldDocs(topFieldDocs.totalHits, scoreDocs, 
sortFields)
    
    def docFreq(self, termField, termText):
        return self.searcher.docFreq(Term(termField, termText))
    
    def maxDoc(self):
        return self.searcher.maxDoc()
    
    def get(self, i):
        doc = self.searcher.doc(i)
        nFields = []
        for field in doc.fields():
            nFields.append(Servers.cField(field.name(), field.stringValue()))
        return Servers.cDocument(doc.getBoost(), nFields)
    
    def reload(self):
        self._reloadIndex()

    def dump(self):
        outstr = ""
        outstr += "%s docs" % self.maxDoc()
        return outstr
    
    def _stop(self):
        self._log.info("stopped")
        
    

class Server(myCORBA.CORBAServerController):
    def setUp(self):
        global num
        self.objServant = RemoteSearcher_i(num, '../index/index03/')
        
if __name__=='__main__':
    import sys
    if not '-n' in sys.argv:
        print 'please, specify number of remote search server'
        sys.exit()
    num = sys.argv[sys.argv.index('-n')+1]
    print 'working with #%s' % num
        
    
    server = Server(Servers.RemoteSearcher, "Servers.RemoteSearcher"+str(num))
    server.processCommandLine(sys.argv)
import sys
import myCORBA, Servers
from PyLucene import *

class RemoteSearcherClient(Searchable):
    """ Searchable implementation which connects to remote CORBA object and
    decode PyLucene requests to simple simple structures and sends requests to 
remote
    IndexSearcher.
    After answer is received, this wrapper encode answer to correct PyLucene 
    objects and returns it. """
    def __init__(self, num='1'):
        self.searcher = myCORBA.WrappedClient(Servers.RemoteSearcher, 
"Servers.RemoteSearcher"+num)
    
    def reload(self):
        self.searcher.reload()
        
    def close(self):
        pass #self.searcher.close()
    
    def docFreq(self, term):
        return self.searcher.docFreq(term.field(), term.text())
  
    def maxDoc(self):
        return self.searcher.maxDoc()
    
    def searchAll(self, query, filter, hitCollector):
        raise NotImplementedError #return self.local.search(query, filter, 
hitCollector)
    
    def search(self, query, filter, n):
        query = unicode(query.toString())
        pyTopDocs = self.searcher.search(query, int(n))

        scoreDocs = []
        for scoreDoc in pyTopDocs.scoreDocs:
                scoreDocs.append(ScoreDoc(scoreDoc.doc, scoreDoc.score))

        topDocs = TopDocs(pyTopDocs.totalHits, scoreDocs)
        return topDocs
    
    def searchSorted(self, query, filter, n, sort):
        query = unicode(query.toString())
        pyTopFieldDocs = self.searcher.searchSorted(query, int(n))

        scoreDocs = []
        for scoreDoc in pyTopFieldDocs.scoreDocs:
                scoreDocs.append(ScoreDoc(scoreDoc.doc, scoreDoc.score))
        sortFields = []
        for sortField in pyTopFieldDocs.sortFields:
                if sortField.reverse == 1:
                    sortField.reverse = True
                else:
                    sortField.reverse = False
                sortFields.append(SortField(sortField.field, sortField.type, 
sortField.reverse))

        topFieldDocs = TopFieldDocs(pyTopFieldDocs.totalHits, scoreDocs, 
sortFields)
        return topFieldDocs
    
    def doc(self, i):
        return self.local.doc(i)
    
    def rewrite(self, original):
        raise NotImplementedError
        #return self.local.rewrite(original)
    
    def explain(self, query, doc):
        raise NotImplementedError
        #return self.local.explain(query, doc)
        

###### this stuff works
remoteS = RemoteSearcherClient('1')
sort = Sort()
sort.setSort([SortField("modified", SortField.STRING, True)])
query = QueryParser.parse("good AND bad", "content", StandardAnalyzer())
docs = remoteS.searchSorted(query, None, 10, sort)
for scoreDoc in docs.scoreDocs:
    print "scoreDoc:", scoreDoc.doc

###### and this does not
remoteS = RemoteSearcherClient('1')
# create MultiSearcher with one Searcher
parallel = ParallelMultiSearcher([remoteS])

sort = Sort()
sort.setSort([SortField("modified", SortField.STRING, True)])
query = QueryParser.parse("good AND bad", "content", StandardAnalyzer())

hits = parallel.search(query, sort)
print "hits:", hits.length()

#for hit in range(hits.length()):
#    print hits.score(hit)
_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev

Reply via email to