On 19/03/2022 09:43, Robin Becker wrote:
I wonder if anybody has code to reconstruct the content definition of a DTD element. It's needed for some automatic
documentation process.
I inherited a recursive code, but it's clear that it's doing the wrong thing; it's fairly obvious that the parentheses
are being done wrongly. I think that parens ought to go before the occur processing at
"if node.occur == 'plus':", but it's also obvious that they are not always
required.
.......
I made some changes to the alogorithm and it seems to work better it now
produces these results
content reconstruction test
'<!ELEMENT a (b?)>' content=(b?)
'<!ELEMENT a (a|b)+>' content=(a | b)+
'<!ELEMENT a (a|b|c)+>' content=(a | b | c)+
'<!ELEMENT a (a|b?|c)+>' content=(a | b | c)*
'<!ELEMENT a (a|(b?,c)+)+>' content=(a | (b?, c)+)+
'<!ELEMENT a (a|(b+|c)+)+>' content=(a | b+ | c+)+
'<!ELEMENT a (z)>' content=(z)
'<!ELEMENT a (#PCDATA)>' content=(#PCDATA)
'<!ELEMENT a (#PCDATA|b)*>' content=(#PCDATA | b)*
'<!ELEMENT a (a,b,c)*>' content=(a, b, c)*
'<!ELEMENT a ANY>' content=ANY
'<!ELEMENT a EMPTY>' content=EMPTY
def elementContent(node):
return node.name
def _contentRecur(node, parentType, elFmt):
"""
node.type: ("element" | "pcdata" | "seq" | "or" )
node.occur: ("once" | "opt" | "plus" | "mult")
none.name: (str | None)
"""
s = ""
if node is None:
return s
t = node.type
occur = node.occur
if t == "element":
s = f"{elFmt(node)}"
elif t=='pcdata':
s = '#PCDATA'
else:
right = node.right
left = node.left
if t == 'or':
if left and right:
s = f"{_contentRecur(left,t,elFmt)} |
{_contentRecur(right,t,elFmt)}"
if parentType!=t: s = f"({s})"
elif left is not None:
s = _contentRecur(left,t,elFmt)
elif right is not None:
s = _contentRecur(right,t,elFmt)
elif t == 'seq':
if left and right:
s = f"{_contentRecur(left,t,elFmt)},
{_contentRecur(right,t,elFmt)}"
if parentType!=t: s = f"({s})"
elif left is not None:
s = _contentRecur(left,t,elFmt)
elif right is not None:
s = _contentRecur(right,t,elFmt)
if occur == 'plus':
s += "+"
elif occur == 'opt':
s += '?'
elif occur=='mult':
s += '*'
return s
def content(el):
if el.type=='pcdata':
return '(#PCDATA)'
elif el.type=='empty':
return 'EMPTY'
elif el.type=='any':
return 'ANY'
s = f"{_contentRecur(el.content,None,elementContent)}"
if s[0]!='(':
s = f"({s})"
return s
--
Robin Becker
_______________________________________________
lxml - The Python XML Toolkit mailing list -- lxml@python.org
To unsubscribe send an email to lxml-le...@python.org
https://mail.python.org/mailman3/lists/lxml.python.org/
Member address: arch...@mail-archive.com