Module Name:    othersrc
Committed By:   joerg
Date:           Thu Apr 30 00:28:59 UTC 2009

Added Files:
        othersrc/usr.bin/pod2mdoc: pod2mdoc.py

Log Message:
Add a Python script to convert POD markup to mdoc markup.
It tries to do something sane e.g. by detecting arguments etc, but
post-processing is still required.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 othersrc/usr.bin/pod2mdoc/pod2mdoc.py

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: othersrc/usr.bin/pod2mdoc/pod2mdoc.py
diff -u /dev/null othersrc/usr.bin/pod2mdoc/pod2mdoc.py:1.1
--- /dev/null	Thu Apr 30 00:28:59 2009
+++ othersrc/usr.bin/pod2mdoc/pod2mdoc.py	Thu Apr 30 00:28:58 2009
@@ -0,0 +1,606 @@
+#!/usr/pkg/bin/python2.5
+import datetime
+import re
+
+nroff_post_punctuation = (".", ",", ":", ";", ")", "]", "?", "!")
+nroff_pre_punctuation = ("(", "[")
+
+class Node(object):
+    def __str__(self):
+	return "\n".join(self.output())
+
+def convert_bracket(line):
+    if line[0] == ".":
+	args = line.split()
+	output = []
+	if len(args) > 1:
+	    if args[1] == "[":
+		output.append(".Oo")
+		output.append(args[0][1:])
+	    else:
+		output.append(args[0])
+		if args[1] == "]":
+		    output.append("Oc")
+		else:
+		    output.append(args[1])
+	    for a in args[2:]:
+		if a == "[":
+		    output.append("Oo")
+		elif a == "]":
+		    output.append("Oc")
+		else:
+		    output.append(a)
+	else:
+	    output.append(args[0])
+	if output[0] == ".Oo" and output[-1] == "Oc" and "Oo" not in output[1:-1] and "Oc" not in output[1:-1]:
+	    output[0] = ".Op"
+	    output.pop()
+	return " ".join(output)
+    else:
+	return line
+
+def apply_expand(lines, f):
+    output = []
+    for l in lines:
+	output += f(l)
+    return output
+
+def expand_generic(line, regex, macro, sub):
+    output = []
+    while True:
+	m = re.search(regex, line)
+	if m is None:
+	    break
+	pre = line[:m.start()].rstrip()
+	matched = line[m.start():m.end()].strip()
+	post = line[m.end():].lstrip()
+	args = [ macro ]
+	while pre and pre[-1] in nroff_pre_punctuation:
+	    args.append(pre[-1])
+	    pre = pre[:-1].rstrip()
+	if pre:
+	    output.append(pre)
+	args += sub(m)
+	while post and post[0] in nroff_post_punctuation:
+	    args.append(post[0])
+	    post = post[1:].lstrip()
+	output.append(" ".join(args))
+	line = post
+    if line:
+	output.append(line)
+    return output
+
+def expand_static_tag(line, marker, macro, replacement):
+    return expand_generic(line, re.escape(marker), macro, lambda m: [replacement])
+
+def expand_url(line):
+    return expand_generic(line, r"\\\*\[Lt\]B<(http|ftp)://.*?>\\\*\[Gt\]",
+	".Lk", lambda m: [ m.group(0)[8:-7].strip() ])
+
+def expand_crossref(line):
+    return expand_generic(line, r"L<(([-a-zA-Z0-9_\.]+)\(([0-9]+)\))\|\1>",
+	".Xr", lambda m: [ m.group(2).strip(), m.group(3)])
+
+def expand_code(line):
+    return expand_generic(line, "C<([^<]*)>",
+	".Va", lambda m: [ m.group(1).strip() ])
+
+def expand_code2(line):
+    return expand_generic(line, "C<([^<]*)I<file>([^<]*)>",
+	".Va", lambda m: [ ".Sy %sfile%s" % (m.group(1).strip(), m.group(2).strip()) ])
+
+def expand_options_fixed(line):
+    def split_options(m):
+	output = []
+	if m.group(1) == "Os":
+	    output.append("\&Os")
+	else:
+	    output.append(m.group(1))
+	output.append("Ns")
+	output.append("Oo")
+	output.append("Ns")
+	output.append("Ar")
+	output.append(m.group(2))
+	if m.group(3):
+	    args = m.group(3).split("|")[1:]
+	    for a in args:
+		output.append("Ns")
+		output.append("|")
+	        output.append("Ns")
+		output.append("Ar")
+		output.append(a[2:-1])
+	output.append("Ns")
+	output.append("Oc")
+	return output
+
+    return expand_generic(line, r"B<-([-a-zA-Z0-9=#\+,]+)>\[B<([^<>]*)>(\|B<[^<>]*>)*\]", ".Fl", split_options)
+
+def expand_options_optional(line):
+    def split_options(m):
+	args = m.group(1)[2:-1].split()
+	output = []
+	first = True
+	for a in args:
+	    if not first:
+		output.append("Fl")
+	    else:
+		first = False
+	    if a[1:] == "Os":
+		output.append("\&Os")
+	    else:
+		output.append(a[1:])
+	output[-1] = output[-1] + "-"
+	output.append("Ns")
+	output.append("Ar")
+	output.append(m.group(3))
+
+	return output
+
+    return expand_generic(line, r"(B<-[-a-zA-Z0-9=#\+,]+( -[-a-zA-Z0-9=#\+,]+)*>)\[B<->I<([^<>]*)>\]", ".Fl", split_options)
+
+def expand_options_optional2(line):
+    def split_options(m):
+	args = m.group(1)[2:-3].split()
+	output = []
+	first = True
+	for a in args:
+	    if not first:
+		output.append("Fl")
+	    else:
+		first = False
+	    if a[1:] == "Os":
+		output.append("\&Os")
+	    else:
+		output.append(a[1:])
+	output.append("Ns")
+	output.append("Oo")
+	output.append("=")
+	output.append("Ns")
+	output.append("Ar")
+	output.append(m.group(3))
+	output.append("Oc")
+
+	return output
+
+    regex = r"(B<-[-a-zA-Z0-9=#\+,]+( -[-a-zA-Z0-9=#\+,]+)*\[=>)I<([^<>]*)>B<\]"
+    macro = ".Fl"
+
+    output = []
+    while True:
+	m = re.search(regex, line)
+	if m is None:
+	    break
+	pre = line[:m.start()].rstrip()
+	matched = line[m.start():m.end()].strip()
+	post = line[m.end():].lstrip()
+	args = [ macro ]
+	while pre and pre[-1] in nroff_pre_punctuation:
+	    args.append(pre[-1])
+	    pre = pre[:-1].rstrip()
+	if pre:
+	    output.append(pre)
+	args += split_options(m)
+	post = "B<" + post
+	while post and post[0] in nroff_post_punctuation:
+	    args.append(post[0])
+	    post = post[1:].lstrip()
+	output.append(" ".join(args))
+	line = post
+    if line:
+	output.append(line)
+    return output
+
+def expand_options(line):
+    def split_options(m):
+	args = m.group(1)[2:-1].split()
+	output = []
+	first = True
+	for a in args:
+	    if a[0] != "-":
+		output.append("Ar")
+		output.append(a)
+		continue
+	    if not first:
+		output.append("Fl")
+	    else:
+		first = False
+	    if a[1:] == "Os":
+		output.append("\&Os")
+	    else:
+		output.append(a[1:])
+	if m.group(4):
+	    if not m.group(5):
+		output.append("Ns")
+	    args2 = m.group(7).split()
+	    for a in args2:
+		output.append("Ar")
+		output.append(a)
+	    if m.group(8):
+		output[-1] = output[-1] + "..."
+	if m.group(9):
+	    output += [ "Ns", "=", "Ns" ]
+	    args3 = m.group(11).split()
+	    for a in args3:
+		output.append("Ar")
+		output.append(a)
+	    if m.group(12):
+		output[-1] = output[-1] + "..."
+	if m.group(13):
+	    output += [ "Ns", "=", "Ns", "Ar", m.group(14) ]
+	return output
+    return expand_generic(line, r"(B<-[-a-zA-Z0-9=#\+,_]+?(\.\.\.)?( [-a-zA-Z0-9=#\+,_]+)*>)(( ?)(I<([^<>]*)>(\.\.\.)?))?(\[(B<=>|=)I<([^<>]*)>\](\.\.\.)?)?(B<=>I<([^<>]*)>)?", ".Fl", split_options)
+
+def expand_filename(line):
+    def expand_file(m):
+	line = m.group(1)
+	args = []
+	while True:
+	    m = re.search(r"[BI]<([^<>]*)>", line)
+	    if m is None:
+		break
+	    args.append(line[:m.start()])
+	    args.append(m.group(1))
+	    line = line[m.end():]
+        args.append(line)
+	return [ "".join(args)]
+    return expand_generic(line, r"F<((I<[^<>]*>|B<[^<>]*>|[^<>]*)*)>", ".Pa", expand_file)
+
+def expand_remaining_italic(line):
+    return expand_generic(line, r"I<([^<>]*)>", ".Em", lambda m: [ m.group(1)] )
+
+def expand_remaining_bold(line):
+    return expand_generic(line, r"B<([^<>]*)>", ".Sy", lambda m: [ m.group(1)] )
+
+def macro_options(line):
+    output = []
+    while True:
+	m = re.match(r"B<-([-a-zA-Z0-9=#\+,_]+)( ([- a-zA-Z0-9+]*))?>( +)?(I<([^<>]*)>(B<(=|-)>I<([^<>]*)>)?)?(B< +>)?", line)
+	if m is None:
+	    break
+
+        output.append("Fl")
+	if m.group(1) == "Os":
+	    output.append("\&Os")
+	else:
+	    output.append(m.group(1))
+	if m.group(2):
+	    for a in m.group(3).split():
+		if a[0] == "-":
+        	    output.append("Fl")
+		    if a[1:] == "Os":
+			output.append("\&Os")
+		    else:
+			output.append(a[1:])
+		else:
+		    output.append("Ar")
+		    output.append(a)
+	if m.group(5):
+	    if not m.group(4):
+	        output.append("Ns")
+	    output.append("Ar")
+	    output.append(m.group(6).strip())
+	    if m.group(7):
+		output.append("Ns")
+		output.append(m.group(8))
+		output.append("Ns")
+		output.append("Ar")
+		output.append(m.group(9).strip())
+	line = line[m.end():].lstrip()
+    if line:
+	output.append(line)
+    return " ".join(output)
+
+def macro_bold_italic(line):
+    output = []
+    while True:
+	m = re.search(r"[IBC]<([^<>]*)>", line)
+	if m is None:
+	    break
+
+	if line[:m.start()]:
+	    output.append(line[:m.start()])
+	if m.group(0)[0] == "I":
+	    output.append("Em")
+	else:
+	    output.append("Sy")
+	output.append(m.group(1))
+	output.append("Ns")
+	line = line[m.end():].lstrip()
+    if line:
+	output.append(line)
+
+    return " ".join(output)
+
+def expand_S(string):
+    string = string.replace("S< >", "\~")
+    string = string.replace("B< >", "\~")
+    while True:
+	if not "S<" in string:
+	    return string
+	pre, tag, post = string.partition("S<")
+	open_tags = 1
+	i = 0
+	while i < len(post):
+	    if post[i] == '<':
+		open_tags += 1
+	    elif post[i] == '>':
+		open_tags -= 1
+		if not open_tags:
+		    break
+	    i += 1
+	if open_tags:
+	    raise SyntaxError, "Unbalanced <>"
+	string = pre + post[:i] + post[i+1:]
+
+class Document(Node):
+    def __init__(self):
+	self._children = []
+    def append(self, c):
+	self._children.append(c)
+    def output(self):
+	output = []
+	output.append(".\\\" $NetBSD: pod2mdoc.py,v 1.1 2009/04/30 00:28:58 joerg Exp $")
+	output.append(".Dd %s" % mdoc_date)
+	output.append(".Dt %s %s" % (mdoc_command.upper(), mdoc_section))
+	output.append(".Os")
+	# Assumes that Section level=3 doesn't happen
+	# without Section level < 3 before it.
+	for c in self._children:
+	    output += c.output()
+	if output[-1] == ".Pp":
+	    output.pop()
+	output.append("")
+	output2 = []
+	for l in output:
+	    if l.startswith("."):
+		output2.append(l.replace(' "', ' \&"'))
+	    else:
+		output2.append(l)
+	return output2
+
+class Section(Node):
+    def __init__(self, level, title):
+	self.level = level
+	self.title = expand_S(title)
+	self._children = []
+    def append(self, c):
+	self._children.append(c)
+    def output(self):
+	output = []
+	if self.level == 1:
+	    output.append(".Sh %s" % self.title)
+	if self.level == 2:
+	    output.append(".Ss %s" % self.title)
+	if self.level == 3:
+	    output.append(".It Sy %s" % self.title)
+	in_sect3 = False
+	for c in self._children:
+	    if in_sect3:
+		if not isinstance(c, Section) or c.level != 3:
+		    output.append(".El")
+		    in_sect3 = False
+	    else:
+		if isinstance(c, Section) and c.level == 3:
+		    output.append(".Bl -tag -width xx")
+		    in_sect3 = True
+	    output += c.output()
+	if output[-1] == ".Pp":
+	    output.pop()
+	if in_sect3:
+	    output.append(".El")
+	return output
+
+class Display(Node):
+    def __init__(self, lines):
+	lines = [ expand_S(l).replace("<", "\*[Lt]").replace(">", "\*[Gt]") for l in lines ]
+	self.lines = []
+	for l in lines:
+	    if l.startswith("."):
+		self.lines.append("\&" + l)
+	    else:
+		self.lines.append(l)
+	while self.lines and not self.lines[-1]:
+	    self.lines.pop()
+    def output(self):
+	return [".Bd -literal -offset indent"] + self.lines + [ ".Ed" ]
+
+class Paragraph(Node):
+    def __init__(self, lines):
+	line = "  ".join(lines)
+	if ".  " in line:
+	    lines = [ l.strip() + "." for l in line.split(".  ") ]
+	    lines[-1] = lines[-1][:-1]
+	else:
+	    lines = [ line ]
+	def kill_space(x):
+	    while "  " in x:
+		x = x.replace("  ", " ")
+	    return x
+	lines = [ expand_S(x) for x in lines ]
+	lines = [ kill_space(x) for x in lines ]
+	for name in command_names:
+	    lines = apply_expand(lines, lambda l: expand_static_tag(l, "B<%s>" % name, ".Nm", name))
+	lines = apply_expand(lines, expand_url)
+	lines = apply_expand(lines, expand_crossref)
+	lines = apply_expand(lines, expand_code)
+	lines = apply_expand(lines, expand_code2)
+	lines = apply_expand(lines, expand_options_fixed)
+	lines = apply_expand(lines, expand_options_optional)
+	lines = apply_expand(lines, expand_options_optional2)
+	lines = apply_expand(lines, expand_options)
+	lines = apply_expand(lines, expand_filename)
+	lines = apply_expand(lines, expand_remaining_italic)
+	lines = apply_expand(lines, expand_remaining_bold)
+	lines = [ convert_bracket(x) for x in lines ]
+
+	self.lines = lines
+    def output(self):
+	return self.lines + [ ".Pp" ]
+
+class List(Node):
+    def __init__(self):
+	self._children = []
+	self._type = None
+    def appendItem(self, c):
+	self._children.append((expand_S(c), []))
+    def append(self, c):
+	self._children[-1][1].append(c)
+    def finalize(self):
+	for i,l in self._children:
+	    if not i:
+		break
+	    if self._type and i[:1] != self._type:
+		self._type = None
+		break
+	    if i[:1] not in ("*", "-"):
+		break
+	    self._type = i[0]
+	else:
+	    return
+	for i,l in self._children:
+	    if i[:2] != "I<" or i[-1] != ">":
+		break
+	    if "<" in i[2:-1] or ">" in i[2:-1]:
+		break
+	else:
+	    self._children = [ ("Sy %s" % i[2:-1], l) for (i,l) in self._children]
+	    return
+
+	self._children = [ (convert_bracket(macro_bold_italic(macro_options(i))), l) for (i,l) in self._children]
+
+    def output(self):
+	output = []
+	if self._type == "*":
+	    output.append(".Bl -bullet")
+	elif self._type == "-":
+	    output.append(".Bl -dash")
+	else:
+	    output.append(".Bl -tag -width xx")
+	for i, l in self._children:
+	    if self._type:
+		output.append(".It")
+	    else:
+		output.append(".It %s" % i)
+	    for c in l:
+		output += c.output()
+	output.append(".El")
+	return output
+
+def convert_to_tree(data):
+    in_display = False
+    in_paragraph = False
+    buffer = []
+
+    document = [ Document() ]
+    def append(c):
+	document[-1].append(c)
+
+    for line in data:
+	line = line.rstrip()
+	if not line or line.isspace():
+	    if in_paragraph:
+		append(Paragraph(buffer))
+		in_paragraph = False
+		buffer = []
+	    if in_display:
+		buffer.append(line)
+	elif line[0] == "\t" or (mdoc_display_space and line[0] == " "):
+	    if in_paragraph:
+		append(Paragraph(buffer))
+		in_paragraph = False
+		buffer = []
+	    if not in_display:
+		in_display = True
+	    buffer.append(line[1:])
+        elif line[0] == '=':
+	    if in_display:
+		append(Display(buffer))
+		buffer = []
+		in_display = False
+	    if in_paragraph:
+		append(Paragraph(buffer))
+		buffer = []
+		in_paragraph = False
+	    if line.startswith("=pod"):
+		continue
+	    if line.startswith("=cut"):
+		break
+	    if line.startswith("=head1") or line.startswith("=head2") or line.startswith("=head3"):
+		level = int(line[5])
+		while not isinstance(document[-1], Document):
+		    if not isinstance(document[-1], Section):
+			raise SyntaxError, "Bad nesting"
+		    if document[-1].level >= level:
+			document.pop()
+		    else:
+			break
+		c = Section(level, line[6:].strip())
+		append(c)
+		document.append(c)
+	    elif line.startswith("=over"):
+		c = List()
+		append(c)
+		document.append(c)
+	    elif line.startswith("=back"):
+		if not isinstance(document[-1], List):
+    		    raise SyntaxError, "Bad nesting"
+		document[-1].finalize()
+		document.pop()
+	    elif line.startswith("=item"):
+		if not isinstance(document[-1], List):
+    		    raise SyntaxError, "Bad nesting"
+		document[-1].appendItem(line[6:].strip())
+	    else:
+		raise SyntaxError, "Bad meta command: %s" % line
+	else:
+	    if in_display:
+		append(Display(buffer))
+		buffer = []
+		in_display = False
+	    if not in_paragraph:
+		in_paragraph = True
+	    buffer.append(line)
+    if in_display:
+	append(Display(buffer))
+    if in_paragraph:
+	append(Paragraph(buffer))
+
+    return document[0]
+
+command_names = []
+input_name = None
+mdoc_date = datetime.date.today().strftime("%B %e, %Y")
+mdoc_command = None
+mdoc_section = 1
+mdoc_display_space = False
+output_name = None
+
+import getopt
+import sys
+
+opts, args = getopt.getopt(sys.argv[1:], "C:c:d:i:o:Ss:")
+for o,a in opts:
+    if o == "-c":
+	mdoc_command = a
+	command_names.append(a)
+    elif o == "-i":
+	input_name = a
+    elif o == "-d":
+	mdoc_date = a
+    elif o == "-C":
+	command_names.append(a)
+    elif o == "-s":
+	mdoc_section = a
+    elif o == "-S":
+	mdoc_display_space = True
+    elif o == "-o":
+	output_name = a
+
+data = [ x.replace("\\", "\\e").replace("E<gt>", "\\*[Gt]").replace("E<lt>", "\\*[Lt]") for x in open(input_name).readlines() ]
+
+if output_name:
+    output = open(output_name, "w")
+else:
+    output = sys.stdout
+
+output.write(str(convert_to_tree(data)))

Reply via email to