Andi Vajda wrote:
>
> This was indeed very broken and had never been used nor tested.
> I made a number of fixes and changes today that got it to work:
>
> - added cloning support to PythonInputStream
> - added isClone argument to PythonInputStream.close()
> - because InputStream expects subclasses to set its length variable upon
> construction, a lengthInternal() method was added to the python
> protocol
> to return the length of the input file or source thus opened
> - fixed NUL char bug in PythonInputStream.readInternal() that caused
> reads
> to be short when the data read contained NUL chars
> - added missing seekInternal() method to PythonOutputStream
> - made PythonOutputStream close() call OutputStream close() first
>
> It seems that I also got it to work with the Lucene index compound file
> format (the default) with the exception that
> PythonDirectory.deleteFile() sometimes is asked to delete non-existent
> files. I was not able, today, to solve that problem though.
>
> The new version of fdir.py is attached. I'd like to integrate it into a
> unit test at some point.
>
> Fixes are checked in.
>
> Andi..
hi andi,
thanks for the fixes, i recompiled the latest trunk and was able to use
fdir w/ manindex and mansearch succesfully. the unit tests were a
different story, i frobbed fdir into a unit test, attached, using
test_pylucene base, but ran into a number of failures, and a possible
regression.
the underlying issue for most of the failures is a problem where just
indexing one document in a pythondirectory, doesn't actually get written
to the index, it seems like one needs about 10 documents, to force the
directory to be written to.
another issue is that simulating multiple incremental indexs, ie opening
the python directory, creating a writer, analyzer, indexing a doc,
closing the writer and directory, in a loop will cause an aborted process.
i'm also seeing some regression in a few of the other tests, not using
pythondirectory, ( test_Analyzers.py is one), the exact api call varies,
but the error message is the same.
SystemError: NULL result without error in PyObject_Call
thanks,
-kapil
Index: test_PyLucene.py
===================================================================
--- test_PyLucene.py (revision 194)
+++ test_PyLucene.py (working copy)
@@ -37,7 +37,7 @@
def openStore(self):
raise NotImplemented
- def closeStore(self, store):
+ def closeStore(self, store, *args):
pass
def getWriter(self, store, analyzer, create=False):
@@ -285,7 +285,7 @@
def closeStore(self, store, *args):
for arg in args:
- arg.close()
+ if arg: arg.close()
store.close()
import os, sys, unittest, shutil, weakref
from threading import RLock
import test_PyLucene
"""
The Directory Implementation here is for testing purposes only, not meant
as an example of writing one, the implementation here suffers from a lack
of safety when dealing with concurrent modifications as it does away with
the file locking in the default lucene fsdirectory implementation.
"""
DEBUG = False
class DebugWrapper( object ):
def __init__(self, obj ):
self.obj = obj
def __getattr__(self, name):
print self.obj.__class__.__name__, self.obj.name, name
sys.stdout.flush()
return getattr(self.obj, name )
class DebugFactory( object ):
def __init__(self, klass):
self.klass = klass
def __call__(self, *args, **kw):
instance = self.klass(*args, **kw)
return DebugWrapper( instance )
class PythonDirLock( object ):
# only safe for a single process
def __init__(self, name, path, lock ):
self.name = name
self.lock_file = path
self.lock = lock
def isLocked(self):
return self.lock.locked()
def obtainTimeout( self, timeout ):
return self.lock.acquire( timeout )
def obtain( self ):
return self.lock.acquire()
def release( self ):
return self.lock.release()
class PythonFileStream( object ):
def __init__(self, name, fh, size=0L ):
self.name = name
self.fh = fh
self.size = size # when used as input file
self.length = 0L # when used as output file
def close(self, isClone=False):
if isClone:
return
self.fh.close()
def readInternal( self, length, pos ):
self.fh.seek(pos)
return self.fh.read( length )
def seekInternal( self, pos ):
self.fh.seek( pos )
def flushBuffer( self, buffer ):
self.fh.write( buffer )
self.fh.flush()
self.length += len(buffer)
def lengthInternal( self ):
return self.size
def length( self ):
return self.length
class PythonFileDirectory( object ):
def __init__(self, path ):
self.name = path
assert os.path.isdir( path )
self.path = path
self._locks = {}
def close(self):
for s in tuple(self._streams):
print s
s.close()
def createFile(self, name ):
file_path = os.path.join( self.path, name )
fh = open( file_path, "w" )
return PythonFileStream( name, fh )
def deleteFile( self, name ):
if self.fileExists(name):
os.unlink( os.path.join( self.path, name ) )
def fileExists( self, name ):
return os.path.exists( os.path.join( self.path, name ) )
def fileLength( self, name ):
file_path = os.path.join( self.path, name )
return os.path.getsize( file_path )
def fileModified( self, name ):
file_path = os.path.join( self.path, name )
return os.path.getmtime( file_path )
def list(self):
return os.listdir( self.path )
def makeLock( self, name ):
lock = self._locks.setdefault( name, RLock() )
return PythonDirLock( name, os.path.join( self.path, name ), lock )
def openFile( self, name ):
file_path = os.path.join( self.path, name )
fh = open( file_path, 'r')
return PythonFileStream( name, fh, os.path.getsize(file_path) )
def renameFile(self, fname, tname):
return os.rename( os.path.join( self.path, fname ),
os.path.join( self.path, tname ) )
def touchFile( self, name):
file_path = os.path.join( self.path, name )
fh = open( file_path, 'rw')
c = fh.read(1)
fh.seek(0)
fh.write(c)
fh.close()
if DEBUG:
_globals = globals()
_globals['PythonFileDirectory'] = DebugFactory( PythonFileDirectory )
_globals['PythonFileStream'] = DebugFactory( PythonFileStream )
_globals['PythonDirLock'] = DebugFactory( PythonDirLock )
del _globals
class PythonDirectoryTests( unittest.TestCase,
test_PyLucene.Test_PyLuceneBase ):
STORE_DIR = "testrepo"
def setUp( self ):
if not os.path.exists( self.STORE_DIR ):
os.mkdir( self.STORE_DIR )
def tearDown( self ):
return
if os.path.exists(self.STORE_DIR):
shutil.rmtree(self.STORE_DIR)
def openStore( self ):
return PythonFileDirectory( self.STORE_DIR )
def TODOtest_IncrementalLoop( self ):
# this test aborts after indexing a variable
# number of documents
print "Testing Indexing Incremental Looping"
for i in range(100):
print "indexing ", i
sys.stdout.flush()
self.test_indexDocument()
if __name__ == "__main__":
import sys
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev