On Thu, 2005-10-13 at 10:00 -0700, Andi Vajda wrote:
> > i was trying to get a new directory implementation going based on the
> > pythondirectory support in pylucene. i ran into some problems, and was
> > curious if this part of the codebase is in use, i couldnt find any unit
> > tests for it, nor references of folks using it.
> 
> I don't know that this code is in use at the moment. Would it be possible to 
> send in code to reproduce the problem ? It could become a unit test.

my impl unfortunately has several context dependencies,  which make it
unsuitable for a unit test.

instead, i've written a really simple directory implementation without
deps that just stores to files. attached, along with a modified manindex
script that i've been using for testing. it provokes the same error.
enabling the debug flag at the top gives some basic method traces of
calls into python.

> 
> > are there some rules to reference counting with objects being used by
> > java? is there a way to trackdown/pinpoint this sort of error better?
> 
> You shouldn't have to do anything special, PyLucene should be taking care of 
> it for you.

cool.

kapil
import os, sys

DEBUG = False #True

class DebugWrapper( object ):

    def __init__(self, obj ):
        self.obj = obj

    def __getattr__(self, name):
        print self.obj.__class__.__name__, self.obj.name, name
        sys.stdout.flush()
        return getattr(self.obj, name )

class DebugFactory( object ):

    def __init__(self, klass):
        self.klass = klass

    def __call__(self, *args, **kw):
        instance = self.klass(*args, **kw)
        return DebugWrapper( instance )

class DirLock( object ):
    # feeling lucky lock impl.
    # basically each lock instance ignore others, 
    # safe for single context usage only.

    def __init__(self, name, path ):
        self.name = name
        self.lock_file = path
        self.locked = False

    def isLocked(self):
        return self.locked

    def obtainTimeout( self, timeout ):
        self.locked = True
        return True

    def obtain( self ):
        self.locked = True
        return True

    def release( self ):
        self.locked = False


if DEBUG:
    DirLock = DebugFactory( DirLock )

class FileStream( object ):

    def __init__(self, name, fh ):
        self.name = name
        self.fh = fh

    def close(self):
        self.fh.close()

    def readInternal( self, length ):
        return self.fh.read( length )

    def seekInternal( self, pos ):
        self.fh.seek( pos )

    def flushBuffer( self, buffer ):
        self.fh.write( buffer )
        self.fh.flush()

if DEBUG:
    FileStream = DebugFactory( FileStream )
        
class FileDirectory( object ):

    def __init__(self, path ):
        self.name = path
        assert os.path.isdir( path )
        self.path = path

    def close(self):
        pass

    def createFile(self, name ):
        file_path = os.path.join( self.path, name )
        fh = open( file_path, 'w')
        return FileStream( name, fh )

    def deleteFile( self, name ):
        os.unlink( os.path.join( self.path, name ) )

    def fileExists( self, name ):
        return os.path.exists( os.path.join( self.path, name ) )

    def fileLength( self, name ):
        file_path = os.path.join( self.path, name )
        return os.path.getsize( file_path )

    def fileModified( self, name ):
        file_path = os.path.join( self.path, name )
        return os.path.getmtime( file_path )

    def list(self):
        return os.listdir( self.path )

    def makeLock( self, name ):
        return DirLock( name, os.path.join( self.path, name ) )

    def openFile( self, name ):
        file_path = os.path.join( self.path, name )
        fh = open( file_path, 'rw')
        return FileStream( name, fh )

    def renameFile(self, fname, tname):
        return os.rename( os.path.join( self.path, fname ),
                          os.path.join( self.path, tname ) )

    def touchFile( self, name):
        file_path = os.path.join( self.path, name )        
        fh = open( file_path, 'rw')
        c = fh.read(1)
        fh.seek(0)
        fh.write(c)
        fh.close()


if DEBUG:
    FileDirectory = DebugFactory(FileDirectory)
# ====================================================================
# Copyright (c) 2004-2005 Open Source Applications Foundation.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions: 
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software. 
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# ====================================================================
#
# Author: Erik Hatcher
#
# to index all man pages on $MANPATH or /usr/share/man:
#   python manindex.py pages
# ====================================================================

import os, re, sys
from subprocess import *
from PyLucene import IndexWriter, StandardAnalyzer, Document, Field

def indexDirectory(dir):

    for name in os.listdir(dir):
        path = os.path.join(dir, name)
        if os.path.isfile(path):
            indexFile(dir, name)


def indexFile(dir,filename):

    path = os.path.join(dir, filename)
    print "  File: ", filename

    if filename.endswith('.gz'):
        child = Popen('gunzip -c ' + path + ' | groff -t -e -E -mandoc -Tascii | col -bx', shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
        command, section = re.search('^(.*)\.(.*)\.gz$', filename).groups()
    else:
        child = Popen('groff -t -e -E -mandoc -Tascii ' + path + ' | col -bx',
                      shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
        command, section = re.search('^(.*)\.(.*)$', filename).groups()

    data = child.read()
    err = child.close()
    if err:
        raise RuntimeError, '%s failed with exit code %d' %(command, err)

    matches = re.search('^NAME$(.*?)^\S', data,
                        re.MULTILINE | re.DOTALL)
    name = matches and matches.group(1) or ''

    matches = re.search('^(?:SYNOPSIS|SYNOPSYS)$(.*?)^\S', data,
                        re.MULTILINE | re.DOTALL)
    synopsis = matches and matches.group(1) or ''

    matches = re.search('^(?:DESCRIPTION|OVERVIEW)$(.*?)', data,
                        re.MULTILINE | re.DOTALL)
    description = matches and matches.group(1) or ''

    doc = Document()
    doc.add(Field.Keyword("command", command))
    doc.add(Field.Keyword("section", section))
    doc.add(Field.Text("name", name.strip()))
    doc.add(Field.Text("synopsis", synopsis.strip()))
    doc.add(Field.UnStored("keywords",
                           ' '.join((command, name, synopsis, description))))
    doc.add(Field.Keyword("filename", os.path.abspath(path)))

    writer.addDocument(doc)


if __name__ == '__main__':
    import fdir

    if len(sys.argv) != 2:
        print "Usage: python manindex.py <index dir>"

    else:
        indexDir = sys.argv[1]
        
        fd = fdir.FileDirectory( indexDir )
        
        writer = IndexWriter(fd, StandardAnalyzer(), True)
        manpath = os.environ.get('MANPATH', '/usr/share/man').split(os.pathsep)
        for dir in manpath:
            print "Crawling", dir
            for name in os.listdir(dir):
                path = os.path.join(dir, name)
                if os.path.isdir(path):
                    indexDirectory(path)
        writer.optimize()
        writer.close()
_______________________________________________
pylucene-dev mailing list
[email protected]
http://lists.osafoundation.org/mailman/listinfo/pylucene-dev

Reply via email to