Re: Subtle bug in leoGlobals.py?

Reinhard Engel Thu, 13 Mar 2014 15:49:42 -0700

>
> My guess is that sorting as you suggest is correct.  However, 
> returning pat rather than the present code:: 
>
>    return '|'.join(aList) 
>
> seems like a separate issue.  Care to comment? 
>


 I stumbled upon this issue when I tried to understand the code of the 
function 'get_directives_dict' in leoGlobals.py. I've worked a lot with 
regular expressions in Python and saw some possibilities to simplyfy this 
complex function. The attached file includes just the functions 
'get_directives_dict' and 'compute_directives_re' with some comments by me 
(marked 'RE'). Basically I moved the compilation of the regular expression 
from the first complex function into the second function where the pattern 
is constructed. Then I constructed a different pattern, that eliminates 
some tests in 'get_directives_dict' and simplifies it. I'm *not* suggesting 
to change Leo's code base. I just stumbled upon this missing 'comment' 
directive.

Reinhard

-- 
You received this message because you are subscribed to the Google Groups 
"leo-editor" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/leo-editor.
For more options, visit https://groups.google.com/d/optout.

def get_directives_dict(p,root=None):
    """
    Scan p for @directives found in globalDirectiveList.

    Returns a dict containing the stripped remainder of the line
    following the first occurrence of each recognized directive
    """
    trace = False and not g.unitTesting
    verbose = False
    if trace: g.trace('*'*20,p.h)
    if root: root_node = root[0]
    d = {}
    # Do this every time so plugins can add directives.
    #RE: commented the following two lines out
    #pat = g.compute_directives_re()
    #directives_pat = re.compile(pat,re.MULTILINE)
    #RE: move the compilation of the regex into the function
    #    g.compute_directives_re and added this line
    directives_pat = g.compute_directives_re()

    # The headline has higher precedence because it is more visible.
    for kind,s in (('head',p.h),('body',p.b)):
        #anIter = directives_pat.finditer(s)
        anIter = directives_pat.finditer(s)
        for m in anIter:
            #RE: eliminate by defining a group
            #    and put the '@' into the regex
            word = m.group(0)[1:] # Omit the @
            i = m.start(0)
            if word.strip() not in d:
                j = i + 1 + len(word)
                k = g.skip_line(s,j)
                val = s[j:k].strip()
                #RE: This can be eliminated by defining a look-ahead assertion
                # in the regex: "^@(...|...|...)(?=[ \t\n])"
                if j < len(s) and s[j] not in (' ','\t','\n'):
                    # g.es_print('invalid character after directive',s[max(0,i-1):k-1])
                    # if trace:g.trace(word,repr(val),s[i:i+20])
                    pass # Not a valid directive: just ignore it.
                else:
                    #RE: deduplicate code
                    #word = m.group(1) (is already stripped)
                    directive_word = word.strip()
                    if directive_word == 'language':
                        d[directive_word] = val
                    else:
                        if directive_word in ('root-doc', 'root-code'):
                            d['root'] = val # in addition to optioned version
                        d[directive_word] = val
                    # g.trace(kind,directive_word,val)
                    if trace: g.trace(word.strip(),kind,repr(val))
                    # A special case for @path in the body text of @<file> nodes.
                    # Don't give an actual warning: just set some flags.
                    if kind == 'body' and word.strip() == 'path' and p.isAnyAtFileNode():
                        g.app.atPathInBodyWarning = p.h
                        d['@path_in_body'] = p.h
                        if trace: g.trace('@path in body',p.h)

    if root:
        anIter = g_noweb_root.finditer(p.b)
        for m in anIter:
            if root_node:
                d["root"]=0 # value not immportant
            else:
                g.es('%s= may only occur in a topmost node (i.e., without a parent)' % (
                    g.angleBrackets('*')))
            break
    if trace and verbose:
        g.trace('%4d' % (len(p.h) + len(p.b)))
    return d

def compute_directives_re ():

    '''Return an re pattern which will match all Leo directives.'''

    global globalDirectiveList

    """Code by EKR
     aList = ['^@%s' % z for z in globalDirectiveList
                if z != 'others']

     if 0: # 2010/02/01
        # The code never uses this, and this regex is broken
        # because it can confuse g.get_directives_dict.
        # @others can have leading whitespace.
        aList.append(r'^\s@others\s')

     return '|'.join(aList)
     """

    #RE: redefine the pattern
    # Sort by length, longest first
    aList = [].extend(globalDirectiveList)
    aList.sort(lambda a, b: len(b) - len(a))
    # Create pattern with a group and a lookahead pattern
    # This eleiminates some tests in 'get_directives_dict'.
    pat = "^@(%s)(?=( |\t|\n)))" % "|".join(aList)
    #Compile regex in this function and return just the regex
    rex = re.compile(pat, re.MULTILINE)
    return rex

Re: Subtle bug in leoGlobals.py?

Reply via email to