Hello, Julien. hmm. i cannot reproduce this with my applications. can you please prepare unit test to reproduce this error?
thanks. JA> Yura Smolsky wrote: >> Hello, Julien. >> >> i will look into the problem today. thanks. JA> To be more precise here is the errors I got : JA> File "/usr/local/lib/python2.4/site-packages/PyLucene.py", line 2913, JA> in __init__ JA> newobj = _PyLucene.new_IndexSearcher(*args) JA> File "./src/nxlucene/directory/PythonDirectory.py", line 68, in release JA> os.remove(self.lockFile) JA> OSError: [Errno 2] No such file or directory: JA> '/tmp/lucene-a14687cc515226325f527aade90082ad-commit.lock' JA> seems the lock file is removed by an another thread. (max 5 errors out JA> of 100 threads) JA> I'm currently investigating. I see 2 options : JA> 1. my code does some wrong things regarding writer / indexer / searcher JA> and the PythonFileDirectory is more sensitive to this. JA> 2. we do have an actual bug in the PythonFileLock implementation. JA> Keep you posted if I find something. JA> J. >> JA> Hi there, >> >> JA> I've been trying out your new Python directory implementation for >> JA> NXLucene which is using PyLucene 1.9.1 >> >> JA> We are then using gcc-3.4.6. Everything is stable for weeks now but I'm >> JA> reaching the 2 go limitation of gcc-3.4.6. >> >> JA> I saw two problems with the PythonFileLock you implemented : >> >> JA> - It appears that it is not thread safe. Try out with something like >> JA> 100 threads. >> >> JA> I got timeout within obtainTimeout() from the PythonFileLock in some of >> JA> my NXLucene multi threading tests. It seems there is an issue as soon as >> JA> you got 2 kind of lock names (write / commit locks). >> >> JA> - It appears to be really inefficient with lots of threads compared to >> JA> the FSDirectory. I suspect lots of retries occurred within the >> JA> obtainTimeout(). >> >> JA> I don't have really a lot of time investigating this right now. If you >> JA> guys do have an idea ? >> >> JA> I'm gonna give a try to the gcc-4.1.0 + patch solution. >> >> JA> J. >> >> JA> Yura Smolsky wrote: >>>>>> I am going to implement fully functional python directory same to Java >>>>>> Lucene one. I will provide it here later, so you can include into >>>>>> PyLucene >>>>>> if somebody will need. >>>> AV> Great ! >>>> >>>> 1. Ok. Here is the deal. I have finished the class. >>>> This class implements FSDIrectory functionality, >>>> but using python only to avoid 2 gb limit with gcc 3.4.6. (see >>>> PythonDirectory.py) >>>> >>>> This is the test for it (see test_PythonDirectory2.py) >>>> >>>> Please feel free to put this class into PyLucene distribution. >>>> >>>> 2. Everything seems to be perfect on Linux (Debian), but sometimes I do >>>> receive random exceptions when I run testcases on Windows - about 1 >>>> time per 5 runs. >>>> >>>> And even bigger problem. Optimize method completely does not work on >>>> windows platform for this index (see attached archive). Use >>>> optimizeIndex.py >>>> to reproduce problem on windows plaftorm. I got this exception: >>>> >>>> Traceback (most recent call last): >>>> File "D:\workshop\index\optimizeIndex.py", line 16, in ? >>>> writer.optimize() >>>> PyLucene.JavaError: java.lang.NullPointerException >>>> >>>> Again, everything is okay on Linux. >>>> >>>> -- >>>> Yura Smolsky, >>>> http://altervisionmedia.com/ >>>> >>>> >>>> ------------------------------------------------------------------------ >>>> >>>> import os, sys >>>> import PyLucene >>>> import md5 >>>> import time >>>> >>>> DEBUG = False >>>> >>>> class DebugWrapper( object ): >>>> >>>> def __init__(self, obj ): >>>> self.obj = obj >>>> >>>> def __getattr__(self, name): >>>> print self.obj.__class__.__name__, self.obj.name, name >>>> sys.stdout.flush() >>>> return getattr(self.obj, name ) >>>> >>>> class DebugFactory( object ): >>>> >>>> def __init__(self, klass): >>>> self.klass = klass >>>> >>>> def __call__(self, *args, **kw): >>>> instance = self.klass(*args, **kw) >>>> return DebugWrapper( instance ) >>>> >>>> class PythonFileLock( object ): >>>> # safe for a multimple processes >>>> >>>> LOCK_POLL_INTERVAL = 1000 >>>> >>>> def __init__(self, lockDir, lockFile): >>>> self.name = lockFile >>>> self.lockDir = lockDir >>>> self.lockFile = os.path.join(lockDir, lockFile) >>>> #print self.lockFile >>>> >>>> def isLocked(self): >>>> return os.path.exists(self.lockFile) >>>> >>>> def obtainTimeout( self, timeout ): >>>> locked = self.obtain() >>>> maxSleepCount = round(timeout / self.LOCK_POLL_INTERVAL) >>>> sleepCount = 0 >>>> while (not locked): >>>> if sleepCount >= maxSleepCount: >>>> raise Exception("Lock obtain timed out: " + >>>> self.toString()) >>>> time.sleep(timeout/1000) >>>> locked = self.obtain() >>>> sleepCount += 1 >>>> return locked >>>> >>>> def obtain( self ): >>>> if not os.path.exists(self.lockDir): >>>> os.makedirs(self.lockDir) >>>> >>>> if self.isLocked(): >>>> return False >>>> >>>> try: >>>> open(self.lockFile, 'w') >>>> except: >>>> return False >>>> else: >>>> return True >>>> >>>> def release( self ): >>>> os.remove(self.lockFile) >>>> return True >>>> >>>> def toString(self): >>>> return 'Lock@' + self.lockFile >>>> >>>> >>>> class PythonFileStream(object): >>>> >>>> def __init__(self, name, fh, size=0L): >>>> self.name = name >>>> self.fh = fh >>>> self._length = size >>>> self.isOpen = True >>>> >>>> def close(self, isClone=False): >>>> if isClone or not self.isOpen: >>>> return >>>> self.isOpen = False >>>> self.fh.close() >>>> >>>> def seek(self, pos): >>>> self.fh.seek(pos) >>>> >>>> def read(self, length, pos): >>>> self.fh.seek(pos) >>>> return self.fh.read(length) >>>> >>>> def write(self, buffer): >>>> self.fh.write(buffer) >>>> self.fh.flush() >>>> self._length += len(buffer) >>>> >>>> def length(self): >>>> return self._length >>>> >>>> >>>> class PythonFileDirectory( object ): >>>> >>>> LOCK_DIR = PyLucene.System.getProperty("org.apache.lucene.lockDir", >>>> PyLucene.System.getProperty("java.io.tmpdir")); >>>> >>>> def __init__(self, path, create=False ): >>>> self.path = os.path.realpath(path) >>>> self.name = self.path >>>> self._locks = {} >>>> self._streams = [] >>>> if not self.LOCK_DIR: >>>> self.LOCK_DIR = self.path >>>> if create: >>>> self.create() >>>> >>>> assert os.path.isdir( path ) >>>> >>>> def create(self): >>>> if not os.path.exists(self.path): >>>> os.makedirs(self.path) >>>> >>>> oldFiles = os.listdir(self.path) >>>> for oldFile in oldFiles: >>>> os.remove(os.path.join(self.path, oldFile)) >>>> >>>> lockPrefix = self.getLockPrefix() >>>> tmpFiles = os.listdir(self.LOCK_DIR) >>>> for tmpFile in tmpFiles: >>>> if tmpFile.startswith(lockPrefix): >>>> os.remove(os.path.join(self.LOCK_DIR, tmpFile)) >>>> >>>> >>>> def close(self): >>>> for s in self._streams: >>>> s.close() >>>> >>>> def createOutput(self, name ): >>>> file_path = os.path.join( self.path, name ) >>>> fh = open( file_path, "w" ) >>>> stream = PythonFileStream( name, fh ) >>>> self._streams.append(stream) >>>> return stream >>>> >>>> def deleteFile( self, name ): >>>> if self.fileExists(name): >>>> os.unlink( os.path.join( self.path, name ) ) >>>> >>>> def fileExists( self, name ): >>>> return os.path.exists( os.path.join( self.path, name ) ) >>>> >>>> def fileLength( self, name ): >>>> file_path = os.path.join( self.path, name ) >>>> return os.path.getsize( file_path ) >>>> >>>> def fileModified( self, name ): >>>> file_path = os.path.join( self.path, name ) >>>> return int( os.path.getmtime( file_path )) >>>> >>>> def list(self): >>>> return os.listdir( self.path ) >>>> >>>> def openInput( self, name ): >>>> file_path = os.path.join( self.path, name ) >>>> fh = open( file_path, 'r') >>>> stream = PythonFileStream( name, fh, os.path.getsize(file_path) ) >>>> self._streams.append(stream) >>>> return stream >>>> >>>> def renameFile(self, fname, tname): >>>> fromName = os.path.join( self.path, fname ) >>>> toName = os.path.join( self.path, tname ) >>>> if os.path.exists( toName ): >>>> os.remove( toName ) >>>> os.rename( fromName, toName ) >>>> >>>> def touchFile( self, name): >>>> >>>> file_path = os.path.join( self.path, name ) >>>> fh = open( file_path, 'rw') >>>> c = fh.read(1) >>>> fh.seek(0) >>>> fh.write(c) >>>> fh.close() >>>> >>>> def makeLock( self, name ): >>>> lockDir = self.LOCK_DIR >>>> lockFile = self.getLockPrefix() + '-' + name >>>> lock = self._locks.setdefault( name, PythonFileLock(lockDir, >>>> lockFile) ) >>>> #print lock.toString() >>>> return lock >>>> >>>> def getHexDigest(self, string): >>>> m = md5.new(string) >>>> return m.hexdigest() >>>> >>>> def getLockPrefix(self): >>>> dirName = os.path.realpath(self.path) >>>> prefix = 'lucene-' + self.getHexDigest(dirName) >>>> return prefix >>>> >>>> if DEBUG: >>>> _globals = globals() >>>> _globals['PythonFileDirectory'] = DebugFactory( PythonFileDirectory ) >>>> _globals['PythonFileStream'] = DebugFactory( PythonFileStream ) >>>> _globals['PythonFileLock'] = DebugFactory( PythonFileLock ) >>>> del _globals >>>> >>>> >>>> ------------------------------------------------------------------------ >>>> >>>> #!/usr/local/bin/python >>>> >>>> import os, sys, unittest, shutil, weakref >>>> import test_PyLucene >>>> from PythonDirectory import * >>>> >>>> """ >>>> The Directory Implementation here is for testing purposes only, not meant >>>> as an example of writing one, the implementation here suffers from a lack >>>> of safety when dealing with concurrent modifications as it does away with >>>> the file locking in the default lucene fsdirectory implementation. >>>> """ >>>> >>>> >>>> >>>> >>>> class PythonDirectoryTests( unittest.TestCase, >>>> test_PyLucene.Test_PyLuceneBase ): >>>> >>>> STORE_DIR = "testpyrepo" >>>> >>>> def setUp( self ): >>>> if not os.path.exists( self.STORE_DIR ): >>>> os.mkdir( self.STORE_DIR ) >>>> >>>> def tearDown( self ): >>>> if os.path.exists(self.STORE_DIR): >>>> shutil.rmtree(self.STORE_DIR) >>>> >>>> def openStore( self ): >>>> return PythonFileDirectory( self.STORE_DIR ) >>>> >>>> def closeStore(self, store, *args): >>>> for arg in args: >>>> if arg: arg.close() >>>> store.close() >>>> >>>> def test_IncrementalLoop( self ): >>>> print "Testing Indexing Incremental Looping" >>>> for i in range(100): >>>> print "indexing ", i >>>> sys.stdout.flush() >>>> self.test_indexDocument() >>>> >>>> >>>> if __name__ == "__main__": >>>> import sys >>>> if '-loop' in sys.argv: >>>> sys.argv.remove('-loop') >>>> while True: >>>> try: >>>> unittest.main() >>>> except: >>>> pass >>>> else: >>>> unittest.main() >>>> >>>> >>>> >>>> >>>> ------------------------------------------------------------------------ >>>> >>>> _______________________________________________ >>>> pylucene-dev mailing list >>>> [email protected] >>>> http://lists.osafoundation.org/mailman/listinfo/pylucene-dev >> >> >> >> >> >> -- >> Yura Smolsky, >> http://altervisionmedia.com/ >> >> -- Yura Smolsky, http://altervisionmedia.com/ _______________________________________________ pylucene-dev mailing list [email protected] http://lists.osafoundation.org/mailman/listinfo/pylucene-dev
