Andi Vajda wrote:
> 
> This was indeed very broken and had never been used nor tested.
> I made a number of fixes and changes today that got it to work:
> 
>   - added cloning support to PythonInputStream
>   - added isClone argument to PythonInputStream.close()
>   - because InputStream expects subclasses to set its length variable upon
>     construction, a lengthInternal() method was added to the python
> protocol
>     to return the length of the input file or source thus opened
>   - fixed NUL char bug in PythonInputStream.readInternal() that caused
> reads
>     to be short when the data read contained NUL chars
>   - added missing seekInternal() method to PythonOutputStream
>   - made PythonOutputStream close() call OutputStream close() first
> 
> It seems that I also got it to work with the Lucene index compound file
> format (the default) with the exception that
> PythonDirectory.deleteFile() sometimes is asked to delete non-existent
> files. I was not able, today, to solve that problem though.
> 
> The new version of fdir.py is attached. I'd like to integrate it into a
> unit test at some point.
> 
> Fixes are checked in.
> 
> Andi.. 

hi andi,

thanks for the fixes, i recompiled the latest trunk and was able to use
fdir w/ manindex and mansearch succesfully. the unit tests were a
different story, i frobbed fdir into a unit test, attached, using
test_pylucene base, but ran into a number of failures, and a possible
regression.

the underlying issue for most of the failures is a problem where just
indexing one document in a pythondirectory, doesn't actually get written
to the index, it seems like one needs about 10 documents, to force the
directory to be written to.

another issue is that simulating multiple incremental indexs, ie opening
the python directory, creating a writer, analyzer, indexing a doc,
closing the writer and directory, in a loop will cause an aborted process.

i'm also seeing some regression in a few of the other tests, not using
pythondirectory, ( test_Analyzers.py is one), the exact api call varies,
but the error message is the same.

SystemError: NULL result without error in PyObject_Call

thanks,

-kapil
Index: test_PyLucene.py
===================================================================
--- test_PyLucene.py	(revision 194)
+++ test_PyLucene.py	(working copy)
@@ -37,7 +37,7 @@
     def openStore(self):
         raise NotImplemented
 
-    def closeStore(self, store):
+    def closeStore(self, store, *args):
         pass
 
     def getWriter(self, store, analyzer, create=False):
@@ -285,7 +285,7 @@
     def closeStore(self, store, *args):
         
         for arg in args:
-            arg.close()
+            if arg: arg.close()
         store.close()
 
 
import os, sys, unittest, shutil, weakref
from threading import RLock
import test_PyLucene 

"""
The Directory Implementation here is for testing purposes only, not meant
as an example of writing one, the implementation here suffers from a lack
of safety when dealing with concurrent modifications as it does away with 
the file locking in the default lucene fsdirectory implementation.
"""

DEBUG = False

class DebugWrapper( object ):

    def __init__(self, obj ):
        self.obj = obj

    def __getattr__(self, name):
        print self.obj.__class__.__name__, self.obj.name, name
        sys.stdout.flush()
        return getattr(self.obj, name )
        
class DebugFactory( object ):
    
    def __init__(self, klass):
        self.klass = klass
        
    def __call__(self, *args, **kw):
        instance = self.klass(*args, **kw)
        return DebugWrapper( instance )


class PythonDirLock( object ):
    # only safe for a single process
    
    def __init__(self, name, path, lock ):
        self.name = name
        self.lock_file = path
        self.lock = lock

    def isLocked(self):
        return self.lock.locked()

    def obtainTimeout( self, timeout ):
        return self.lock.acquire( timeout )

    def obtain( self ):
        return self.lock.acquire()

    def release( self ):
        return self.lock.release()

class PythonFileStream( object ):

    def __init__(self, name, fh, size=0L ):
        self.name = name
        self.fh = fh
        self.size = size # when used as input file
        self.length = 0L  # when used as output file

    def close(self, isClone=False):
        if isClone:
            return
        self.fh.close()

    def readInternal( self, length, pos ):
        self.fh.seek(pos)
        return self.fh.read( length )

    def seekInternal( self, pos ):
        self.fh.seek( pos )

    def flushBuffer( self, buffer ):
        self.fh.write( buffer )
        self.fh.flush()
        self.length += len(buffer)

    def lengthInternal( self ):
        return self.size

    def length( self ):
        return self.length

        
class PythonFileDirectory( object ):

    def __init__(self, path ):
        self.name = path
        assert os.path.isdir( path )
        self.path = path
        self._locks = {}

    def close(self):
        for s in tuple(self._streams):
            print s
            s.close()

    def createFile(self, name ):
        file_path = os.path.join( self.path, name )
        fh = open( file_path, "w" )
        return  PythonFileStream( name, fh )

    def deleteFile( self, name ):
        if self.fileExists(name):
            os.unlink( os.path.join( self.path, name ) )

    def fileExists( self, name ):
        return os.path.exists( os.path.join( self.path, name ) )

    def fileLength( self, name ):
        file_path = os.path.join( self.path, name )
        return os.path.getsize( file_path )

    def fileModified( self, name ):
        file_path = os.path.join( self.path, name )
        return os.path.getmtime( file_path )

    def list(self):
        return os.listdir( self.path )

    def makeLock( self, name ):
        lock = self._locks.setdefault( name, RLock() )
        return PythonDirLock( name, os.path.join( self.path, name ), lock )

    def openFile( self, name ):
        file_path = os.path.join( self.path, name )
        fh = open( file_path, 'r')
        return PythonFileStream( name, fh, os.path.getsize(file_path) )

    def renameFile(self, fname, tname):
        return os.rename( os.path.join( self.path, fname ),
                          os.path.join( self.path, tname ) )

    def touchFile( self, name):

        file_path = os.path.join( self.path, name )        
        fh = open( file_path, 'rw')
        c = fh.read(1)
        fh.seek(0)
        fh.write(c)
        fh.close()


if DEBUG:
    _globals = globals()
    _globals['PythonFileDirectory'] = DebugFactory( PythonFileDirectory )
    _globals['PythonFileStream'] = DebugFactory( PythonFileStream )
    _globals['PythonDirLock'] = DebugFactory( PythonDirLock )
    del _globals

class PythonDirectoryTests( unittest.TestCase,
                            test_PyLucene.Test_PyLuceneBase ):

    STORE_DIR = "testrepo"

    def setUp( self ):
        if not os.path.exists( self.STORE_DIR ):
            os.mkdir( self.STORE_DIR )

    def tearDown( self ):
        return 
        if os.path.exists(self.STORE_DIR):
            shutil.rmtree(self.STORE_DIR)

    def openStore( self ):
        return PythonFileDirectory( self.STORE_DIR )

    def TODOtest_IncrementalLoop( self ):
        # this test aborts after indexing a variable
        # number of documents
        print "Testing Indexing Incremental Looping"
        for i in range(100):
            print "indexing ", i
            sys.stdout.flush()
            self.test_indexDocument()
                       

if __name__ == "__main__":
    import sys
    if '-loop' in sys.argv:
        sys.argv.remove('-loop')
        while True:
            try:
                unittest.main()
            except:
                pass
    else:
        unittest.main()


_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev

Reply via email to