Hello, Julien. i will look into the problem today. thanks.
JA> Hi there, JA> I've been trying out your new Python directory implementation for JA> NXLucene which is using PyLucene 1.9.1 JA> We are then using gcc-3.4.6. Everything is stable for weeks now but I'm JA> reaching the 2 go limitation of gcc-3.4.6. JA> I saw two problems with the PythonFileLock you implemented : JA> - It appears that it is not thread safe. Try out with something like JA> 100 threads. JA> I got timeout within obtainTimeout() from the PythonFileLock in some of JA> my NXLucene multi threading tests. It seems there is an issue as soon as JA> you got 2 kind of lock names (write / commit locks). JA> - It appears to be really inefficient with lots of threads compared to JA> the FSDirectory. I suspect lots of retries occurred within the JA> obtainTimeout(). JA> I don't have really a lot of time investigating this right now. If you JA> guys do have an idea ? JA> I'm gonna give a try to the gcc-4.1.0 + patch solution. JA> J. JA> Yura Smolsky wrote: >>>> I am going to implement fully functional python directory same to Java >>>> Lucene one. I will provide it here later, so you can include into PyLucene >>>> if somebody will need. >> AV> Great ! >> >> 1. Ok. Here is the deal. I have finished the class. >> This class implements FSDIrectory functionality, >> but using python only to avoid 2 gb limit with gcc 3.4.6. (see >> PythonDirectory.py) >> >> This is the test for it (see test_PythonDirectory2.py) >> >> Please feel free to put this class into PyLucene distribution. >> >> 2. Everything seems to be perfect on Linux (Debian), but sometimes I do >> receive random exceptions when I run testcases on Windows - about 1 >> time per 5 runs. >> >> And even bigger problem. Optimize method completely does not work on >> windows platform for this index (see attached archive). Use optimizeIndex.py >> to reproduce problem on windows plaftorm. I got this exception: >> >> Traceback (most recent call last): >> File "D:\workshop\index\optimizeIndex.py", line 16, in ? >> writer.optimize() >> PyLucene.JavaError: java.lang.NullPointerException >> >> Again, everything is okay on Linux. >> >> -- >> Yura Smolsky, >> http://altervisionmedia.com/ >> >> >> ------------------------------------------------------------------------ >> >> import os, sys >> import PyLucene >> import md5 >> import time >> >> DEBUG = False >> >> class DebugWrapper( object ): >> >> def __init__(self, obj ): >> self.obj = obj >> >> def __getattr__(self, name): >> print self.obj.__class__.__name__, self.obj.name, name >> sys.stdout.flush() >> return getattr(self.obj, name ) >> >> class DebugFactory( object ): >> >> def __init__(self, klass): >> self.klass = klass >> >> def __call__(self, *args, **kw): >> instance = self.klass(*args, **kw) >> return DebugWrapper( instance ) >> >> class PythonFileLock( object ): >> # safe for a multimple processes >> >> LOCK_POLL_INTERVAL = 1000 >> >> def __init__(self, lockDir, lockFile): >> self.name = lockFile >> self.lockDir = lockDir >> self.lockFile = os.path.join(lockDir, lockFile) >> #print self.lockFile >> >> def isLocked(self): >> return os.path.exists(self.lockFile) >> >> def obtainTimeout( self, timeout ): >> locked = self.obtain() >> maxSleepCount = round(timeout / self.LOCK_POLL_INTERVAL) >> sleepCount = 0 >> while (not locked): >> if sleepCount >= maxSleepCount: >> raise Exception("Lock obtain timed out: " + self.toString()) >> time.sleep(timeout/1000) >> locked = self.obtain() >> sleepCount += 1 >> return locked >> >> def obtain( self ): >> if not os.path.exists(self.lockDir): >> os.makedirs(self.lockDir) >> >> if self.isLocked(): >> return False >> >> try: >> open(self.lockFile, 'w') >> except: >> return False >> else: >> return True >> >> def release( self ): >> os.remove(self.lockFile) >> return True >> >> def toString(self): >> return 'Lock@' + self.lockFile >> >> >> class PythonFileStream(object): >> >> def __init__(self, name, fh, size=0L): >> self.name = name >> self.fh = fh >> self._length = size >> self.isOpen = True >> >> def close(self, isClone=False): >> if isClone or not self.isOpen: >> return >> self.isOpen = False >> self.fh.close() >> >> def seek(self, pos): >> self.fh.seek(pos) >> >> def read(self, length, pos): >> self.fh.seek(pos) >> return self.fh.read(length) >> >> def write(self, buffer): >> self.fh.write(buffer) >> self.fh.flush() >> self._length += len(buffer) >> >> def length(self): >> return self._length >> >> >> class PythonFileDirectory( object ): >> >> LOCK_DIR = PyLucene.System.getProperty("org.apache.lucene.lockDir", >> PyLucene.System.getProperty("java.io.tmpdir")); >> >> def __init__(self, path, create=False ): >> self.path = os.path.realpath(path) >> self.name = self.path >> self._locks = {} >> self._streams = [] >> if not self.LOCK_DIR: >> self.LOCK_DIR = self.path >> if create: >> self.create() >> >> assert os.path.isdir( path ) >> >> def create(self): >> if not os.path.exists(self.path): >> os.makedirs(self.path) >> >> oldFiles = os.listdir(self.path) >> for oldFile in oldFiles: >> os.remove(os.path.join(self.path, oldFile)) >> >> lockPrefix = self.getLockPrefix() >> tmpFiles = os.listdir(self.LOCK_DIR) >> for tmpFile in tmpFiles: >> if tmpFile.startswith(lockPrefix): >> os.remove(os.path.join(self.LOCK_DIR, tmpFile)) >> >> >> def close(self): >> for s in self._streams: >> s.close() >> >> def createOutput(self, name ): >> file_path = os.path.join( self.path, name ) >> fh = open( file_path, "w" ) >> stream = PythonFileStream( name, fh ) >> self._streams.append(stream) >> return stream >> >> def deleteFile( self, name ): >> if self.fileExists(name): >> os.unlink( os.path.join( self.path, name ) ) >> >> def fileExists( self, name ): >> return os.path.exists( os.path.join( self.path, name ) ) >> >> def fileLength( self, name ): >> file_path = os.path.join( self.path, name ) >> return os.path.getsize( file_path ) >> >> def fileModified( self, name ): >> file_path = os.path.join( self.path, name ) >> return int( os.path.getmtime( file_path )) >> >> def list(self): >> return os.listdir( self.path ) >> >> def openInput( self, name ): >> file_path = os.path.join( self.path, name ) >> fh = open( file_path, 'r') >> stream = PythonFileStream( name, fh, os.path.getsize(file_path) ) >> self._streams.append(stream) >> return stream >> >> def renameFile(self, fname, tname): >> fromName = os.path.join( self.path, fname ) >> toName = os.path.join( self.path, tname ) >> if os.path.exists( toName ): >> os.remove( toName ) >> os.rename( fromName, toName ) >> >> def touchFile( self, name): >> >> file_path = os.path.join( self.path, name ) >> fh = open( file_path, 'rw') >> c = fh.read(1) >> fh.seek(0) >> fh.write(c) >> fh.close() >> >> def makeLock( self, name ): >> lockDir = self.LOCK_DIR >> lockFile = self.getLockPrefix() + '-' + name >> lock = self._locks.setdefault( name, PythonFileLock(lockDir, >> lockFile) ) >> #print lock.toString() >> return lock >> >> def getHexDigest(self, string): >> m = md5.new(string) >> return m.hexdigest() >> >> def getLockPrefix(self): >> dirName = os.path.realpath(self.path) >> prefix = 'lucene-' + self.getHexDigest(dirName) >> return prefix >> >> if DEBUG: >> _globals = globals() >> _globals['PythonFileDirectory'] = DebugFactory( PythonFileDirectory ) >> _globals['PythonFileStream'] = DebugFactory( PythonFileStream ) >> _globals['PythonFileLock'] = DebugFactory( PythonFileLock ) >> del _globals >> >> >> ------------------------------------------------------------------------ >> >> #!/usr/local/bin/python >> >> import os, sys, unittest, shutil, weakref >> import test_PyLucene >> from PythonDirectory import * >> >> """ >> The Directory Implementation here is for testing purposes only, not meant >> as an example of writing one, the implementation here suffers from a lack >> of safety when dealing with concurrent modifications as it does away with >> the file locking in the default lucene fsdirectory implementation. >> """ >> >> >> >> >> class PythonDirectoryTests( unittest.TestCase, >> test_PyLucene.Test_PyLuceneBase ): >> >> STORE_DIR = "testpyrepo" >> >> def setUp( self ): >> if not os.path.exists( self.STORE_DIR ): >> os.mkdir( self.STORE_DIR ) >> >> def tearDown( self ): >> if os.path.exists(self.STORE_DIR): >> shutil.rmtree(self.STORE_DIR) >> >> def openStore( self ): >> return PythonFileDirectory( self.STORE_DIR ) >> >> def closeStore(self, store, *args): >> for arg in args: >> if arg: arg.close() >> store.close() >> >> def test_IncrementalLoop( self ): >> print "Testing Indexing Incremental Looping" >> for i in range(100): >> print "indexing ", i >> sys.stdout.flush() >> self.test_indexDocument() >> >> >> if __name__ == "__main__": >> import sys >> if '-loop' in sys.argv: >> sys.argv.remove('-loop') >> while True: >> try: >> unittest.main() >> except: >> pass >> else: >> unittest.main() >> >> >> >> >> ------------------------------------------------------------------------ >> >> _______________________________________________ >> pylucene-dev mailing list >> [email protected] >> http://lists.osafoundation.org/mailman/listinfo/pylucene-dev -- Yura Smolsky, http://altervisionmedia.com/ _______________________________________________ pylucene-dev mailing list [email protected] http://lists.osafoundation.org/mailman/listinfo/pylucene-dev
