On Thu, 2005-10-13 at 10:00 -0700, Andi Vajda wrote:
> > i was trying to get a new directory implementation going based on the
> > pythondirectory support in pylucene. i ran into some problems, and was
> > curious if this part of the codebase is in use, i couldnt find any unit
> > tests for it, nor references of folks using it.
>
> I don't know that this code is in use at the moment. Would it be possible to
> send in code to reproduce the problem ? It could become a unit test.
my impl unfortunately has several context dependencies, which make it
unsuitable for a unit test.
instead, i've written a really simple directory implementation without
deps that just stores to files. attached, along with a modified manindex
script that i've been using for testing. it provokes the same error.
enabling the debug flag at the top gives some basic method traces of
calls into python.
>
> > are there some rules to reference counting with objects being used by
> > java? is there a way to trackdown/pinpoint this sort of error better?
>
> You shouldn't have to do anything special, PyLucene should be taking care of
> it for you.
cool.
kapil
import os, sys
DEBUG = False #True
class DebugWrapper( object ):
def __init__(self, obj ):
self.obj = obj
def __getattr__(self, name):
print self.obj.__class__.__name__, self.obj.name, name
sys.stdout.flush()
return getattr(self.obj, name )
class DebugFactory( object ):
def __init__(self, klass):
self.klass = klass
def __call__(self, *args, **kw):
instance = self.klass(*args, **kw)
return DebugWrapper( instance )
class DirLock( object ):
# feeling lucky lock impl.
# basically each lock instance ignore others,
# safe for single context usage only.
def __init__(self, name, path ):
self.name = name
self.lock_file = path
self.locked = False
def isLocked(self):
return self.locked
def obtainTimeout( self, timeout ):
self.locked = True
return True
def obtain( self ):
self.locked = True
return True
def release( self ):
self.locked = False
if DEBUG:
DirLock = DebugFactory( DirLock )
class FileStream( object ):
def __init__(self, name, fh ):
self.name = name
self.fh = fh
def close(self):
self.fh.close()
def readInternal( self, length ):
return self.fh.read( length )
def seekInternal( self, pos ):
self.fh.seek( pos )
def flushBuffer( self, buffer ):
self.fh.write( buffer )
self.fh.flush()
if DEBUG:
FileStream = DebugFactory( FileStream )
class FileDirectory( object ):
def __init__(self, path ):
self.name = path
assert os.path.isdir( path )
self.path = path
def close(self):
pass
def createFile(self, name ):
file_path = os.path.join( self.path, name )
fh = open( file_path, 'w')
return FileStream( name, fh )
def deleteFile( self, name ):
os.unlink( os.path.join( self.path, name ) )
def fileExists( self, name ):
return os.path.exists( os.path.join( self.path, name ) )
def fileLength( self, name ):
file_path = os.path.join( self.path, name )
return os.path.getsize( file_path )
def fileModified( self, name ):
file_path = os.path.join( self.path, name )
return os.path.getmtime( file_path )
def list(self):
return os.listdir( self.path )
def makeLock( self, name ):
return DirLock( name, os.path.join( self.path, name ) )
def openFile( self, name ):
file_path = os.path.join( self.path, name )
fh = open( file_path, 'rw')
return FileStream( name, fh )
def renameFile(self, fname, tname):
return os.rename( os.path.join( self.path, fname ),
os.path.join( self.path, tname ) )
def touchFile( self, name):
file_path = os.path.join( self.path, name )
fh = open( file_path, 'rw')
c = fh.read(1)
fh.seek(0)
fh.write(c)
fh.close()
if DEBUG:
FileDirectory = DebugFactory(FileDirectory)
# ====================================================================
# Copyright (c) 2004-2005 Open Source Applications Foundation.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# ====================================================================
#
# Author: Erik Hatcher
#
# to index all man pages on $MANPATH or /usr/share/man:
# python manindex.py pages
# ====================================================================
import os, re, sys
from subprocess import *
from PyLucene import IndexWriter, StandardAnalyzer, Document, Field
def indexDirectory(dir):
for name in os.listdir(dir):
path = os.path.join(dir, name)
if os.path.isfile(path):
indexFile(dir, name)
def indexFile(dir,filename):
path = os.path.join(dir, filename)
print " File: ", filename
if filename.endswith('.gz'):
child = Popen('gunzip -c ' + path + ' | groff -t -e -E -mandoc -Tascii | col -bx', shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
command, section = re.search('^(.*)\.(.*)\.gz$', filename).groups()
else:
child = Popen('groff -t -e -E -mandoc -Tascii ' + path + ' | col -bx',
shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
command, section = re.search('^(.*)\.(.*)$', filename).groups()
data = child.read()
err = child.close()
if err:
raise RuntimeError, '%s failed with exit code %d' %(command, err)
matches = re.search('^NAME$(.*?)^\S', data,
re.MULTILINE | re.DOTALL)
name = matches and matches.group(1) or ''
matches = re.search('^(?:SYNOPSIS|SYNOPSYS)$(.*?)^\S', data,
re.MULTILINE | re.DOTALL)
synopsis = matches and matches.group(1) or ''
matches = re.search('^(?:DESCRIPTION|OVERVIEW)$(.*?)', data,
re.MULTILINE | re.DOTALL)
description = matches and matches.group(1) or ''
doc = Document()
doc.add(Field.Keyword("command", command))
doc.add(Field.Keyword("section", section))
doc.add(Field.Text("name", name.strip()))
doc.add(Field.Text("synopsis", synopsis.strip()))
doc.add(Field.UnStored("keywords",
' '.join((command, name, synopsis, description))))
doc.add(Field.Keyword("filename", os.path.abspath(path)))
writer.addDocument(doc)
if __name__ == '__main__':
import fdir
if len(sys.argv) != 2:
print "Usage: python manindex.py <index dir>"
else:
indexDir = sys.argv[1]
fd = fdir.FileDirectory( indexDir )
writer = IndexWriter(fd, StandardAnalyzer(), True)
manpath = os.environ.get('MANPATH', '/usr/share/man').split(os.pathsep)
for dir in manpath:
print "Crawling", dir
for name in os.listdir(dir):
path = os.path.join(dir, name)
if os.path.isdir(path):
indexDirectory(path)
writer.optimize()
writer.close()
_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev