# -*- coding: utf-8 -*-

# Copyright (c) 2003 Detlev Offenbach <detlev@die-offenbachs.de>
#

"""
Parse a Python module file.

This module is based on pyclbr.py as of Python 2.2.2

<b>BUGS</b> (from pyclbr.py)
<ul>
<li>Code that doesn't pass tabnanny or python -t will confuse it, unless
  you set the module TABWIDTH variable (default 8) to the correct tab width
  for the file.</li>
</ul>
"""

import sys
import os
import imp
import re
import string

__all__ = ["Module", "Class", "Function", "readModule"]

TABWIDTH = 8

PTL_SOURCE = 128

SUPPORTED_TYPES = [imp.PY_SOURCE, PTL_SOURCE]
    
_getnext = re.compile(r"""
    (?P<String>
       \""" (?P<StringContents1>
               [^"\\]* (?:
                            (?: \\. | "(?!"") )
                            [^"\\]*
                        )*
            )
       \"""

    |   ''' (?P<StringContents2>
                [^'\\]* (?:
                            (?: \\. | '(?!'') )
                            [^'\\]*
                        )*
            )
        '''

    |   " [^"\\\n]* (?: \\. [^"\\\n]*)* "

    |   ' [^'\\\n]* (?: \\. [^'\\\n]*)* '

    |   \#\#\# (?P<StringContents3>
                [^#\\]* (?:
                            (?: \\. | \#(?!\#\#) )
                            [^#\\]*
                        )*
            )
        \#\#\#
    )

|   (?P<Docstring>
        (?<= :) \s*
       \""" (?P<DocstringContents1>
                [^"\\]* (?:
                            (?: \\. | "(?!"") )
                            [^"\\]*
                        )*
            )
       \"""

    |   (?<= :) \s*
        ''' (?P<DocstringContents2>
                [^'\\]* (?:
                            (?: \\. | '(?!'') )
                            [^'\\]*
                        )*
            )
        '''

    |   (?<= :) \s*
        \#\#\# (?P<DocstringContents3>
                [^#\\]* (?:
                            (?: \\. | \#(?!\#\#) )
                            [^#\\]*
                        )*
            )
        \#\#\#
    )

|   (?P<Method>
        ^
        (?P<MethodIndent> [ \t]* )
        def [ \t]+
        (?P<MethodName> [a-zA-Z_] \w* )
        (?: [ \t]* \[ (?: plain | html ) \] )?
        [ \t]* \(
        (?P<MethodSignature> (?: [^)] | \)[ \t]*, )* )
        \) [ \t]* :
    )

|   (?P<Class>
        ^
        (?P<ClassIndent> [ \t]* )
        class [ \t]+
        (?P<ClassName> [a-zA-Z_] \w* )
        [ \t]*
        (?P<ClassSupers> \( [^)]* \) )?
        [ \t]* :
    )

|   (?P<Attribute>
        ^
        (?P<AttributeIndent> [ \t]* )
        self [ \t]* \. [ \t]*
        (?P<AttributeName> [a-zA-Z_] \w* )
        [ \t]* =
    )

|   (?P<Import>
        ^ import [ \t]+
        (?P<ImportList> [^#;\n]+ )
    )

|   (?P<ImportFrom>
        ^ from [ \t]+
        (?P<ImportFromPath>
            [a-zA-Z_] \w*
            (?:
                [ \t]* \. [ \t]* [a-zA-Z_] \w*
            )*
        )
        [ \t]+
        import [ \t]+
        (?P<ImportFromList> [^#;\n]+ )
    )

|   (?P<ConditionalDefine>
        ^ [ \t]* (?: (?: if | elif ) [ \t]+ [^:]* | else [ \t]* ) : (?= \s* def)
    )
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search

_hashsub = re.compile(r"""^([ \t]*)#[ \t]?""", re.MULTILINE).sub

_modules = {}                           # cache of modules we've seen

class Module:
    '''
    Class to represent a Python module.
    '''
    def __init__(self, name, file=None):
        """
        Constructor
        
        @param name name of this module (string)
        @param file filename of file containing this module (string)
        """
        self.name = name
        self.file = file
        self.classes = {}
        self.functions = {}
        self.description = ""
        self.imports = []
        self.from_imports = {}
        self.package = '.'.join(name.split('.')[:-1])
        
    def _addclass(self, name, _class):
        """
        Method to add information about a class.
        
        @param name name of class to be added (string)
        @param _class Class object to be added
        """
        self.classes[name] = _class
        
    def _addfunction(self, name, function):
        """
        Method to add information about a function.
        
        @param name name of function to be added (string)
        @param function Function object to be added
        """
        self.functions[name] = function
            
    def _adddescription(self, description):
        """
        Method to store the modules docstring.
        
        @param description the docstring to be stored (string)
        """
        self.description = description
        
    def scan(self, src):
        """
        Method to scan the source text and retrieve the relevant information.
        
        @param src the source text to be scanned (string)
        """
        # To avoid having to stop the regexp at each newline, instead
        # when we need a line number we simply string.count the number of
        # newlines in the string since the last time we did this; i.e.,
        #    lineno = lineno + \
        #             string.count(src, '\n', last_lineno_pos, here)
        #    last_lineno_pos = here
        countnl = string.count
        lineno, last_lineno_pos = 1, 0
        classstack = [] # stack of (class, indent) pairs
        i = 0
        modulelevel = 1
        conditional_def = 0
        cur_obj = self
        while 1:
            m = _getnext(src, i)
            if not m:
                break
            start, i = m.span()
            
            if m.start("Method") >= 0:
                # found a method definition or function
                thisindent = _indent(m.group("MethodIndent"))
                meth_name = m.group("MethodName")
                meth_sig = m.group("MethodSignature")
                meth_sig = meth_sig.replace('\\\n', '')
                lineno = lineno + \
                         countnl(src, os.linesep,
                                 last_lineno_pos, start)
                last_lineno_pos = start
                # close all classes indented at least as much
                while classstack and \
                      classstack[-1][1] >= thisindent:
                    del classstack[-1]
                if classstack:
                    cur_class = classstack[-1][0]
                    if isinstance(cur_class, Class):
                        # it's a class method
                        f = Function(None, meth_name, None, lineno, meth_sig)
                        cur_class._addmethod(meth_name, f)
                        conditional_def = 0
                    elif cur_class is None and conditional_def:
                        # it's a conditional def
                        cur_class = classstack[-2][0]
                        if isinstance(cur_class, Class):
                            # it's a class method
                            f = Function(None, meth_name, None, lineno, meth_sig)
                            cur_class._addmethod(meth_name, f)
                    # else it's a nested def
                else:
                    # it's a module function
                    f = Function(self.name, meth_name, file, lineno, meth_sig)
                    self._addfunction(meth_name, f)
                cur_obj = f
                classstack.append((None, thisindent)) # Marker for nested fns
            
            elif m.start("Docstring") >= 0:
                contents = m.group("DocstringContents3")
                if contents is not None:
                    contents = _hashsub(r"\1", contents)
                else:
                    if self.file.lower().endswith('.ptl'):
                            contents = ""
                    else:
                        contents = 1 and m.group("DocstringContents1") \
                                   or m.group("DocstringContents2")
                if cur_obj:
                    cur_obj._adddescription(contents)
            
            elif m.start("String") >= 0:
                if modulelevel and \
                   src[start-len(os.linesep):start] == os.linesep:
                    contents = m.group("StringContents3")
                    if contents is not None:
                        contents = _hashsub(r"\1", contents)
                    else:
                        if self.file.lower().endswith('.ptl'):
                            contents = ""
                        else:
                            contents = 1 and m.group("StringContents1") \
                                       or m.group("StringContents2")
                    if cur_obj:
                        cur_obj._adddescription(contents)
            
            elif m.start("Class") >= 0:
                # we found a class definition
                thisindent = _indent(m.group("ClassIndent"))
                # close all classes indented at least as much
                while classstack and \
                      classstack[-1][1] >= thisindent:
                    del classstack[-1]
                lineno = lineno + \
                         countnl(src, '\n', last_lineno_pos, start)
                last_lineno_pos = start
                class_name = m.group("ClassName")
                inherit = m.group("ClassSupers")
                if inherit:
                    # the class inherits from other classes
                    inherit = inherit[1:-1].strip()
                    names = []
                    for n in inherit.split(','):
                        n = n.strip()
                        if n:
                            if self.classes.has_key(n):
                                # we know this super class
                                n = self.classes[n].name
                            else:
                                c = n.split('.')
                                if len(c) > 1:
                                    # super class is of the
                                    # form module.class:
                                    # look in module for class
                                    m = c[-2]
                                    c = c[-1]
                                    if _modules.has_key(m):
                                        m = _modules[m]
                                        n = m.name
                            names.append(n)
                    inherit = names
                # remember this class
                cur_class = Class(self.name, class_name, inherit,
                                  file, lineno)
                cur_obj = cur_class
                # add nested classes to the module
                self._addclass(class_name, cur_class)
                classstack.append((cur_class, thisindent))
            
            elif m.start("Attribute") >= 0:
                index = -1
                while index >= -len(classstack):
                    if classstack[index][0] is not None:
                        classstack[index][0]._addattribute(m.group("AttributeName"))
                        break
                    else:
                        index -= 1
            
            elif m.start("Import") >= 0:
                # import module
                for name in m.group("ImportList").split(','):
                    name = name.strip()
                    if not name in self.imports:
                        self.imports.append(name)
            
            elif m.start("ImportFrom") >= 0:
                # from module import stuff
                mod = m.group("ImportFromPath")
                names = m.group("ImportFromList").split(',')
                if not self.from_imports.has_key(mod):
                    self.from_imports[mod] = []
                for n in names:
                    n = n.strip()
                    self.from_imports[mod].append(n)
            
            elif m.start("ConditionalDefine") >= 0:
                # a conditional function/method definition
                conditional_def = 1
            
            else:
                assert 0, "regexp _getnext found something unexpected"
            
            modulelevel = 0
            
    def createHierarchy(self):
        """
        Method to build the inheritance hierarchy for all classes of this module.
        
        @return A dictionary with inheritance hierarchies.
        """
        hierarchy =  {}
        for cls in self.classes.keys():
            self.assembleHierarchy(cls, self.classes, [cls], hierarchy)
        return hierarchy
        
    def assembleHierarchy(self, name, classes, path, result):
        """
        Method to assemble the inheritance hierarchy.
        
        This method will traverse the class hierarchy, from a given class
        and build up a nested dictionary of super-classes. The result is
        intended to be inverted, i.e. the highest level are the super classes.
        
        This code is borrowed from Boa Constructor.
        
        @param name name of class to assemble hierarchy (string)
        @param classes A dictionary of classes to look in.
        @param path 
        @param result The resultant hierarchy
        @return The nested dictionary of super-classes.
        """
        rv = {}
        if classes.has_key(name):
            for cls in classes[name].super:
                if not classes.has_key(cls):
                    rv[cls] = {}
                    exhausted = path + [cls]
                    exhausted.reverse()
                    self.addPathToHierarchy(exhausted, result, self.addPathToHierarchy)
                else:
                    rv[cls] = self.assembleHierarchy(cls,
                                   classes, path + [cls], result)
        
        if len(rv) == 0:
            exhausted = path
            exhausted.reverse()
            self.addPathToHierarchy(exhausted, result, self.addPathToHierarchy)
            
    def addPathToHierarchy(self, path, result, fn):
        """
        Method to put the exhausted path into the result dictionary.
        
        @param path the exhausted path of classes
        @param result the result dictionary
        @param fn function to call for classe that are already part of the
            result dictionary
        """
        if path[0] in result.keys():
            if len(path) > 1:
                fn(path[1:], result[path[0]], fn)
        else:
            for part in path:
                result[part] = {}
                result = result[part]
                
    def getName(self):
        """
        Method to retrieve the modules name.
        
        @return module name (string)
        """
        return self.name
        
    def getFileName(self):
        """
        Method to retrieve the modules filename.
        
        @return module filename (string)
        """
        return self.file

class Class:
    '''
    Class to represent a Python class.
    '''
    def __init__(self, module, name, super, file, lineno):
        """
        Constructor
        
        @param module name of module containing this class (string)
        @param name name of the class (string)
        @param super list of classnames this class is inherited from
                (list of strings)
        @param file name of file containing this class (string)
        @param lineno linenumber of the class definition (integer)
        """
        self.module = module
        self.name = name
        if super is None:
            super = []
        self.super = super
        self.methods = {}
        self.attributes = []
        self.file = file
        self.lineno = lineno
        self.description = ""

    def _addmethod(self, name, function):
        """
        Method to add information about a method.
        
        @param name name of method to be added (string)
        @param function Function object to be added
        """
        self.methods[name] = function
            
    def _addattribute(self, name):
        """
        Method to add information about attributes.
        
        @param name name of the attribute to add (string)
        """
        if not name in self.attributes:
            self.attributes.append(name)
            
    def _adddescription(self, description):
        """
        Method to store the class docstring.
        
        @param description the docstring to be stored (string)
        """
        self.description = description

class Function:
    '''
    Class to represent a Python function or method.
    '''
    def __init__(self, module, name, file, lineno, signature = ''):
        """
        Constructor
        
        @param module name of module containing this function (string)
        @param name name of the function (string)
        @param file name of file containing this function (string)
        @param lineno linenumber of the function definition (integer)
        @param signature the functions call signature (string)
        """
        self.module = module
        self.name = name
        self.file = file
        self.lineno = lineno
        self.parameters = [e.strip() for e in signature.split(',')]
        self.description = ""
        
    def _adddescription(self, description):
        """
        Method to store the functions docstring.
        
        @param description the docstring to be stored (string)
        """
        self.description = description

def readModule(module, path=[], inpackage=0, basename=""):
    '''
    Function to read a module file and parse it.

    The module is searched in path and sys.path, read and parsed.
    If the module was parsed before, the information is taken
    from a cache in order to speed up processing.
    
    @param module Name of the module to be parsed (string)
    @param path Searchpath for the module (list of strings)
    @param inpackage Flag indicating that module is inside a
        package (boolean)
    @param basename a path basename. This basename is deleted from
        the filename of the module file to be read. (string)
    @return reference to a Module object containing the parsed
        module information (Module)
    '''

    modname = module
    
    if os.path.exists(module):
        path = [os.path.dirname(module)]
        if module.lower().endswith(".py"):
            module = module[:-3]
        if os.path.exists(os.path.join(path[0], "__init__.py")):
            if basename:
                module = module.replace(basename, "")
            modname = module.replace(os.sep, '.')
            inpackage = 1
        else:
            modname = os.path.basename(module)
        if modname.lower().endswith(".ptl"):
            modname = modname[:-4]
        module = os.path.basename(module)

    if _modules.has_key(modname):
        # we've seen this module before...
        return _modules[modname]

    if module in sys.builtin_module_names:
        # this is a built-in module
        _modules[modname] = Module(modname, None)
        return _modules[modname]

    # search the path for the module
    f = None
    if inpackage:
        try:
            f, file, (suff, mode, type) = find_module(module, path)
        except ImportError:
            f = None
    if f is None:
        fullpath = list(path) + sys.path
        f, file, (suff, mode, type) = find_module(module, fullpath)
    if type not in SUPPORTED_TYPES:
        # not Python source, can't do anything with this module
        f.close()
        _modules[modname] = Module(modname, None)
        return _modules[module]

    mod = Module(modname, file)
    mod.scan(f.read())
    f.close()
    _modules[modname] = mod
    return mod

def _indent(ws, _expandtabs=string.expandtabs):
    """
    Function to determine the indent width of a whitespace string.
    
    @param ws The whitespace string to be cheked. (string)
    @param _expandtabs Function to be used to expand tab characters.
    @return Length of the whitespace string after tab expansion.
    """
    return len(_expandtabs(ws, TABWIDTH))

def find_module(name, path):
    """
    Module function to extend the Python module finding mechanism.
    
    This function searches for files in the given path. If the filename
    doesn't have an extension or an extension of .py, the normal search
    implemented in the imp module is used. For all other supported files
    only path is searched.
    
    @param name filename or modulename to search for (string)
    @param path search path (list of strings)
    @return tuple of the open file, pathname and description. Description
        is a tuple of file suffix, file mode and file type)
    @exception ImportError The file or module wasn't found.
    """
    if name.lower().endswith('.ptl'):
        for p in path:      # only search in path
            if os.path.exists(os.path.join(p, name)):
                pathname = os.path.join(p, name)
                return (open(pathname), pathname, ('.ptl', 'r', PTL_SOURCE))
        raise ImportError
        
    if name.lower().endswith('.py'):
        name = name[:-3]
        
    return imp.find_module(name, path)

def resetParsedModules():
    """
    Module function to reset the list of modules already parsed.
    """
    _modules.clear()
    
def resetParsedModule(module, basename=""):
    """
    Module function to clear one module from the list of parsed modules.
    
    @param module Name of the module to be parsed (string)
    @param basename a path basename. This basename is deleted from
        the filename of the module file to be cleared. (string)
    """
    modname = module
    
    if os.path.exists(module):
        path = [os.path.dirname(module)]
        if module.lower().endswith(".py"):
            module = module[:-3]
        if os.path.exists(os.path.join(path[0], "__init__.py")):
            if basename:
                module = module.replace(basename, "")
            modname = module.replace(os.sep, '.')
        else:
            modname = os.path.basename(module)
        if modname.lower().endswith(".ptl"):
            modname = modname[:-4]
        module = os.path.basename(module)

    if _modules.has_key(modname):
        del _modules[modname]
    
if __name__ == "__main__":
    # Main program for testing.
    mod = sys.argv[1]
    module = readModule(mod)
    hierc = module.createHierarchy()
    print hierc
