On 19/03/2022 09:43, Robin Becker wrote:
I wonder if anybody has code to reconstruct the content definition of a DTD element. It's needed for some automatic documentation process.

I inherited a recursive code, but it's clear that it's doing the wrong thing; it's fairly obvious that the parentheses are being done wrongly. I think that parens ought to go before the occur processing at
"if node.occur == 'plus':", but it's also obvious that they are not always 
required.
.......
I made some changes to the alogorithm and it seems to work better it now 
produces these results

content reconstruction test
'<!ELEMENT a (b?)>' content=(b?)
'<!ELEMENT a (a|b)+>' content=(a | b)+
'<!ELEMENT a (a|b|c)+>' content=(a | b | c)+
'<!ELEMENT a (a|b?|c)+>' content=(a | b | c)*
'<!ELEMENT a (a|(b?,c)+)+>' content=(a | (b?, c)+)+
'<!ELEMENT a (a|(b+|c)+)+>' content=(a | b+ | c+)+
'<!ELEMENT a (z)>' content=(z)
'<!ELEMENT a (#PCDATA)>' content=(#PCDATA)
'<!ELEMENT a (#PCDATA|b)*>' content=(#PCDATA | b)*
'<!ELEMENT a (a,b,c)*>' content=(a, b, c)*
'<!ELEMENT a ANY>' content=ANY
'<!ELEMENT a EMPTY>' content=EMPTY


def elementContent(node):
    return node.name

def _contentRecur(node, parentType, elFmt):
    """
    node.type: ("element" | "pcdata" | "seq" | "or" )
    node.occur: ("once" | "opt" | "plus" | "mult")
    none.name: (str | None)
    """
    s = ""
    if node is  None:
        return s
    t = node.type
    occur = node.occur
    if t == "element":
        s = f"{elFmt(node)}"
    elif t=='pcdata':
        s = '#PCDATA'
    else:
        right = node.right
        left = node.left
        if t == 'or':
            if left and right:
                s = f"{_contentRecur(left,t,elFmt)} | 
{_contentRecur(right,t,elFmt)}"
                if parentType!=t: s = f"({s})"
            elif left is not None:
                s = _contentRecur(left,t,elFmt)
            elif right is not None:
                s = _contentRecur(right,t,elFmt)
        elif t == 'seq':
            if left and right:
                s = f"{_contentRecur(left,t,elFmt)}, 
{_contentRecur(right,t,elFmt)}"
                if parentType!=t: s = f"({s})"
            elif left is not None:
                s = _contentRecur(left,t,elFmt)
            elif right is not None:
                s = _contentRecur(right,t,elFmt)
    if occur == 'plus':
        s += "+"
    elif occur == 'opt':
        s += '?'
    elif occur=='mult':
        s += '*'
    return s

def content(el):
    if el.type=='pcdata':
        return '(#PCDATA)'
    elif el.type=='empty':
        return 'EMPTY'
    elif el.type=='any':
        return 'ANY'
    s = f"{_contentRecur(el.content,None,elementContent)}"
    if s[0]!='(':
        s = f"({s})"
    return s

--
Robin Becker
_______________________________________________
lxml - The Python XML Toolkit mailing list -- lxml@python.org
To unsubscribe send an email to lxml-le...@python.org
https://mail.python.org/mailman3/lists/lxml.python.org/
Member address: arch...@mail-archive.com

Reply via email to