scripts: All parsing is now done in one sweep and cached to allow details to be obtained without re-parsing .

Ian Thompson Fri, 18 Jul 2008 04:00:44 -0700

Revision: 15619
          
http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=15619
Author:   quorn
Date:     2008-07-18 13:00:34 +0200 (Fri, 18 Jul 2008)


Log Message:
-----------
All parsing is now done in one sweep and cached to allow details to be obtained 
without re-parsing. A text can be manually parsed with parse_text(text) which 
also updates the cache.

Modified Paths:
--------------
    branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py
    branches/soc-2008-quorn/release/scripts/textplugin_imports.py
    branches/soc-2008-quorn/release/scripts/textplugin_membersuggest.py

Modified: branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py
===================================================================
--- branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py 
2008-07-18 04:59:07 UTC (rev 15618)
+++ branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py 
2008-07-18 11:00:34 UTC (rev 15619)
@@ -5,12 +5,37 @@
 
 # TODO: Remove the dependency for a full Python installation.
 
+class ClassDesc():
+       
+       def __init__(self, name, defs, vars):
+               self.name = name
+               self.defs = defs
+               self.vars = vars
+
+class ScriptDesc():
+       
+       def __init__(self, name, imports, classes, defs, vars, 
incomplete=False):
+               self.name = name
+               self.imports = imports
+               self.classes = classes
+               self.defs = defs
+               self.vars = vars
+               self.incomplete = incomplete
+               self.time = 0
+       
+       def set_time(self):
+               self.time = time()
+
 # Context types
+UNSET = -1
 NORMAL = 0
 SINGLE_QUOTE = 1
 DOUBLE_QUOTE = 2
 COMMENT = 3
 
+# Special period constants
+AUTO = -1
+
 # Python keywords
 KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global',
                        'or', 'with', 'assert', 'else', 'if', 'pass', 'yield',
@@ -18,14 +43,319 @@
                        'raise', 'continue', 'finally', 'is', 'return', 'def', 
'for',
                        'lambda', 'try' ]
 
-# Used to cache the return value of generate_tokens
-_token_cache = None
-_cache_update = 0
+ModuleType = type(__builtin__)
+NoneScriptDesc = ScriptDesc('', dict(), dict(), dict(), dict(), True)
 
-ModuleType = type(__builtin__)
 _modules = dict([(n, None) for n in sys.builtin_module_names])
 _modules_updated = 0
+_parse_cache = dict()
 
+def get_cached_descriptor(txt, period=AUTO):
+       """Returns the cached ScriptDesc for the specified Text object 'txt'. 
If the
+       script has not been parsed in the last 'period' seconds it will be 
reparsed
+       to obtain this descriptor.
+       
+       Specifying AUTO for the period (default) will choose a period based on 
the
+       size of the Text object. Larger texts are parsed less often.
+       """
+       
+       global _parse_cache, NoneScriptDesc, AUTO
+       
+       if period == AUTO:
+               m = txt.nlines
+               r = 1
+               while True:
+                       m = m >> 2
+                       if not m: break
+                       r = r << 1
+               period = r
+       
+       key = hash(txt)
+       parse = True
+       if _parse_cache.has_key(key):
+               desc = _parse_cache[key]
+               if desc.time >= time() - period:
+                       parse = desc.incomplete
+       
+       if parse:
+               try:
+                       desc = parse_text(txt)
+               except:
+                       if _parse_cache.has_key(key):
+                               del _parse_cache[key]
+                       desc = NoneScriptDesc
+       
+       return desc
+
+def parse_text(txt):
+       """Parses an entire script's text and returns a ScriptDesc instance
+       containing information about the script.
+       
+       If the text is not a valid Python script a TokenError will be thrown.
+       Currently this means leaving brackets open will result in the script 
failing
+       to complete.
+       """
+       
+       global NORMAL, SINGLE_QUOTE, DOUBLE_QUOTE, COMMENT
+       
+       txt.reset()
+       tokens = generate_tokens(txt.readline) # Throws TokenError
+       
+       curl, cursor = txt.getCursorPos()
+       linen = curl + 1 # Token line numbers are one-based
+       
+       imports = dict()
+       imp_step = 0
+       
+       classes = dict()
+       cls_step = 0
+       
+       defs = dict()
+       def_step = 0
+       
+       vars = dict()
+       var_step = 0
+       var_accum = dict()
+       var_forflag = False
+       
+       indent = 0
+       prev_type = -1
+       prev_string = ''
+       incomplete = False
+       
+       try:
+        for type, string, start, end, line in tokens:
+               
+               #################
+               ## Indentation ##
+               #################
+               
+               if type == tokenize.INDENT:
+                       indent += 1
+               elif type == tokenize.DEDENT:
+                       indent -= 1
+               
+               #########################
+               ## Module importing... ##
+               #########################
+               
+               imp_store = False
+               
+               # Default, look for 'from' or 'import' to start
+               if imp_step == 0:
+                       if string == 'from':
+                               imp_tmp = []
+                               imp_step = 1
+                       elif string == 'import':
+                               imp_from = None
+                               imp_tmp = []
+                               imp_step = 2
+               
+               # Found a 'from', create imp_from in form '???.???...'
+               elif imp_step == 1:
+                       if string == 'import':
+                               imp_from = '.'.join(imp_tmp)
+                               imp_tmp = []
+                               imp_step = 2
+                       elif type == tokenize.NAME:
+                               imp_tmp.append(string)
+                       elif string != '.':
+                               imp_step = 0 # Invalid syntax
+               
+               # Found 'import', imp_from is populated or None, create imp_name
+               elif imp_step == 2:
+                       if string == 'as':
+                               imp_name = '.'.join(imp_tmp)
+                               imp_step = 3
+                       elif type == tokenize.NAME or string == '*':
+                               imp_tmp.append(string)
+                       elif string != '.':
+                               imp_name = '.'.join(imp_tmp)
+                               imp_symb = imp_name
+                               imp_store = True
+               
+               # Found 'as', change imp_symb to this value and go back to step 
2
+               elif imp_step == 3:
+                       if type == tokenize.NAME:
+                               imp_symb = string
+                       else:
+                               imp_store = True
+               
+               # Both imp_name and imp_symb have now been populated so we can 
import
+               if imp_store:
+                       
+                       # Handle special case of 'import *'
+                       if imp_name == '*':
+                               parent = get_module(imp_from)
+                               imports.update(parent.__dict__)
+                               
+                       else:
+                               # Try importing the name as a module
+                               try:
+                                       if imp_from:
+                                               module = get_module(imp_from 
+'.'+ imp_name)
+                                       else:
+                                               module = get_module(imp_name)
+                                       imports[imp_symb] = module
+                               except (ImportError, ValueError, 
AttributeError, TypeError):
+                                       # Try importing name as an attribute of 
the parent
+                                       try:
+                                               module = __import__(imp_from, 
globals(), locals(), [imp_name])
+                                               imports[imp_symb] = 
getattr(module, imp_name)
+                                       except (ImportError, ValueError, 
AttributeError, TypeError):
+                                               pass
+                       
+                       # More to import from the same module?
+                       if string == ',':
+                               imp_tmp = []
+                               imp_step = 2
+                       else:
+                               imp_step = 0
+               
+               ###################
+               ## Class parsing ##
+               ###################
+               
+               # If we are inside a class then def and variable parsing should 
be done
+               # for the class. Otherwise the definitions are considered global
+               
+               # Look for 'class'
+               if cls_step == 0:
+                       if string == 'class':
+                               cls_name = None
+                               cls_indent = indent
+                               cls_step = 1
+               
+               # Found 'class', look for cls_name followed by '('
+               elif cls_step == 1:
+                       if not cls_name:
+                               if type == tokenize.NAME:
+                                       cls_name = string
+                                       cls_sline = False
+                                       cls_defs = dict()
+                                       cls_vars = dict()
+                       elif string == ':':
+                               cls_step = 2
+               
+               # Found 'class' name ... ':', now check if it's a single line 
statement
+               elif cls_step == 2:
+                       if type == tokenize.NEWLINE:
+                               cls_sline = False
+                               cls_step = 3
+                       elif type != tokenize.COMMENT and type != tokenize.NL:
+                               cls_sline = True
+                               cls_step = 3
+               
+               elif cls_step == 3:
+                       if cls_sline:
+                               if type == tokenize.NEWLINE:
+                                       classes[cls_name] = ClassDesc(cls_name, 
cls_defs, cls_vars)
+                                       cls_step = 0
+                       else:
+                               if type == tokenize.DEDENT and indent <= 
cls_indent:
+                                       classes[cls_name] = ClassDesc(cls_name, 
cls_defs, cls_vars)
+                                       cls_step = 0
+               
+               #################
+               ## Def parsing ##
+               #################
+               
+               # Look for 'def'
+               if def_step == 0:
+                       if string == 'def':
+                               def_name = None
+                               def_step = 1
+               
+               # Found 'def', look for def_name followed by '('
+               elif def_step == 1:
+                       if type == tokenize.NAME:
+                               def_name = string
+                               def_params = []
+                       elif def_name and string == '(':
+                               def_step = 2
+               
+               # Found 'def' name '(', now identify the parameters upto ')'
+               # TODO: Handle ellipsis '...'
+               elif def_step == 2:
+                       if type == tokenize.NAME:
+                               def_params.append(string)
+                       elif string == ')':
+                               if cls_step > 0: # Parsing a class
+                                       cls_defs[def_name] = def_params
+                               else:
+                                       defs[def_name] = def_params
+                               def_step = 0
+               
+               ##########################
+               ## Variable assignation ##
+               ##########################
+               
+               if cls_step > 0: # Parsing a class
+                       # Look for 'self.???'
+                       if var_step == 0:
+                               if string == 'self':
+                                       var_step = 1
+                       elif var_step == 1:
+                               if string == '.':
+                                       var_name = None
+                                       var_step = 2
+                               else:
+                                       var_step = 0
+                       elif var_step == 2:
+                               if type == tokenize.NAME:
+                                       var_name = string
+                                       var_step = 3
+                       elif var_step == 3:
+                               if string == '=':
+                                       cls_vars[var_name] = True
+                                       var_step = 0
+               
+               elif def_step > 0: # Parsing a def
+                       # Look for 'global ???[,???]'
+                       if var_step == 0:
+                               if string == 'global':
+                                       var_step = 1
+                       elif var_step == 1:
+                               if type == tokenize.NAME:
+                                       vars[string] = True
+                               elif string != ',' and type != tokenize.NL:
+                                       var_step == 0
+               
+               else: # In global scope
+                       # Look for names
+                       if string == 'for':
+                               var_accum = dict()
+                               var_forflag = True
+                       elif string == '=' or (var_forflag and string == 'in'):
+                               vars.update(var_accum)
+                               var_accum = dict()
+                               var_forflag = False
+                       elif type == tokenize.NAME:
+                               var_accum[string] = True
+                       elif not string in [',', '(', ')', '[', ']']:
+                               var_accum = dict()
+                               var_forflag = False
+               
+               #######################
+               ## General utilities ##
+               #######################
+               
+               prev_type = type
+               prev_string = string
+       
+        # end:for
+       
+       except TokenError:
+               incomplete = True
+               pass
+       
+       desc = ScriptDesc(txt.name, imports, classes, defs, vars, incomplete)
+       desc.set_time()
+       
+       global _parse_cache
+       _parse_cache[hash(txt.name)] = desc
+       return desc
+
 def get_modules(since=1):
        """Returns the set of built-in modules and any modules that have been
        imported into the system upto 'since' seconds ago.
@@ -45,20 +375,6 @@
        
        return cmp(x[0].upper(), y[0].upper())
 
-def cached_generate_tokens(txt, since=1):
-       """A caching version of generate tokens for multiple parsing of the same
-       document within a given timescale.
-       """
-       
-       global _token_cache, _cache_update
-       
-       t = time()
-       if _cache_update < t - since:
-               txt.reset()
-               _token_cache = [g for g in generate_tokens(txt.readline)]
-               _cache_update = t
-       return _token_cache
-
 def get_module(name):
        """Returns the module specified by its name. The module itself is 
imported
        by this method and, as such, any initialization code will be executed.
@@ -78,6 +394,7 @@
          'm' if the parameter is a module
          'f' if the parameter is callable
          'v' if the parameter is variable or otherwise indeterminable
+       
        """
        
        if isinstance(v, ModuleType):
@@ -140,7 +457,8 @@
 def current_line(txt):
        """Extracts the Python script line at the cursor in the Blender Text 
object
        provided and cursor position within this line as the tuple pair (line,
-       cursor)"""
+       cursor).
+       """
        
        (lineindex, cursor) = txt.getCursorPos()
        lines = txt.asLines()
@@ -166,7 +484,8 @@
 
 def get_targets(line, cursor):
        """Parses a period separated string of valid names preceding the cursor 
and
-       returns them as a list in the same order."""

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [15619] branches/soc-2008-quorn/release/ scripts: All parsing is now done in one sweep and cached to allow details to be obtained without re-parsing .

Reply via email to