Module Name: othersrc
Committed By: joerg
Date: Thu Apr 30 00:28:59 UTC 2009
Added Files:
othersrc/usr.bin/pod2mdoc: pod2mdoc.py
Log Message:
Add a Python script to convert POD markup to mdoc markup.
It tries to do something sane e.g. by detecting arguments etc, but
post-processing is still required.
To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 othersrc/usr.bin/pod2mdoc/pod2mdoc.py
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Added files:
Index: othersrc/usr.bin/pod2mdoc/pod2mdoc.py
diff -u /dev/null othersrc/usr.bin/pod2mdoc/pod2mdoc.py:1.1
--- /dev/null Thu Apr 30 00:28:59 2009
+++ othersrc/usr.bin/pod2mdoc/pod2mdoc.py Thu Apr 30 00:28:58 2009
@@ -0,0 +1,606 @@
+#!/usr/pkg/bin/python2.5
+import datetime
+import re
+
+nroff_post_punctuation = (".", ",", ":", ";", ")", "]", "?", "!")
+nroff_pre_punctuation = ("(", "[")
+
+class Node(object):
+ def __str__(self):
+ return "\n".join(self.output())
+
+def convert_bracket(line):
+ if line[0] == ".":
+ args = line.split()
+ output = []
+ if len(args) > 1:
+ if args[1] == "[":
+ output.append(".Oo")
+ output.append(args[0][1:])
+ else:
+ output.append(args[0])
+ if args[1] == "]":
+ output.append("Oc")
+ else:
+ output.append(args[1])
+ for a in args[2:]:
+ if a == "[":
+ output.append("Oo")
+ elif a == "]":
+ output.append("Oc")
+ else:
+ output.append(a)
+ else:
+ output.append(args[0])
+ if output[0] == ".Oo" and output[-1] == "Oc" and "Oo" not in output[1:-1] and "Oc" not in output[1:-1]:
+ output[0] = ".Op"
+ output.pop()
+ return " ".join(output)
+ else:
+ return line
+
+def apply_expand(lines, f):
+ output = []
+ for l in lines:
+ output += f(l)
+ return output
+
+def expand_generic(line, regex, macro, sub):
+ output = []
+ while True:
+ m = re.search(regex, line)
+ if m is None:
+ break
+ pre = line[:m.start()].rstrip()
+ matched = line[m.start():m.end()].strip()
+ post = line[m.end():].lstrip()
+ args = [ macro ]
+ while pre and pre[-1] in nroff_pre_punctuation:
+ args.append(pre[-1])
+ pre = pre[:-1].rstrip()
+ if pre:
+ output.append(pre)
+ args += sub(m)
+ while post and post[0] in nroff_post_punctuation:
+ args.append(post[0])
+ post = post[1:].lstrip()
+ output.append(" ".join(args))
+ line = post
+ if line:
+ output.append(line)
+ return output
+
+def expand_static_tag(line, marker, macro, replacement):
+ return expand_generic(line, re.escape(marker), macro, lambda m: [replacement])
+
+def expand_url(line):
+ return expand_generic(line, r"\\\*\[Lt\]B<(http|ftp)://.*?>\\\*\[Gt\]",
+ ".Lk", lambda m: [ m.group(0)[8:-7].strip() ])
+
+def expand_crossref(line):
+ return expand_generic(line, r"L<(([-a-zA-Z0-9_\.]+)\(([0-9]+)\))\|\1>",
+ ".Xr", lambda m: [ m.group(2).strip(), m.group(3)])
+
+def expand_code(line):
+ return expand_generic(line, "C<([^<]*)>",
+ ".Va", lambda m: [ m.group(1).strip() ])
+
+def expand_code2(line):
+ return expand_generic(line, "C<([^<]*)I<file>([^<]*)>",
+ ".Va", lambda m: [ ".Sy %sfile%s" % (m.group(1).strip(), m.group(2).strip()) ])
+
+def expand_options_fixed(line):
+ def split_options(m):
+ output = []
+ if m.group(1) == "Os":
+ output.append("\&Os")
+ else:
+ output.append(m.group(1))
+ output.append("Ns")
+ output.append("Oo")
+ output.append("Ns")
+ output.append("Ar")
+ output.append(m.group(2))
+ if m.group(3):
+ args = m.group(3).split("|")[1:]
+ for a in args:
+ output.append("Ns")
+ output.append("|")
+ output.append("Ns")
+ output.append("Ar")
+ output.append(a[2:-1])
+ output.append("Ns")
+ output.append("Oc")
+ return output
+
+ return expand_generic(line, r"B<-([-a-zA-Z0-9=#\+,]+)>\[B<([^<>]*)>(\|B<[^<>]*>)*\]", ".Fl", split_options)
+
+def expand_options_optional(line):
+ def split_options(m):
+ args = m.group(1)[2:-1].split()
+ output = []
+ first = True
+ for a in args:
+ if not first:
+ output.append("Fl")
+ else:
+ first = False
+ if a[1:] == "Os":
+ output.append("\&Os")
+ else:
+ output.append(a[1:])
+ output[-1] = output[-1] + "-"
+ output.append("Ns")
+ output.append("Ar")
+ output.append(m.group(3))
+
+ return output
+
+ return expand_generic(line, r"(B<-[-a-zA-Z0-9=#\+,]+( -[-a-zA-Z0-9=#\+,]+)*>)\[B<->I<([^<>]*)>\]", ".Fl", split_options)
+
+def expand_options_optional2(line):
+ def split_options(m):
+ args = m.group(1)[2:-3].split()
+ output = []
+ first = True
+ for a in args:
+ if not first:
+ output.append("Fl")
+ else:
+ first = False
+ if a[1:] == "Os":
+ output.append("\&Os")
+ else:
+ output.append(a[1:])
+ output.append("Ns")
+ output.append("Oo")
+ output.append("=")
+ output.append("Ns")
+ output.append("Ar")
+ output.append(m.group(3))
+ output.append("Oc")
+
+ return output
+
+ regex = r"(B<-[-a-zA-Z0-9=#\+,]+( -[-a-zA-Z0-9=#\+,]+)*\[=>)I<([^<>]*)>B<\]"
+ macro = ".Fl"
+
+ output = []
+ while True:
+ m = re.search(regex, line)
+ if m is None:
+ break
+ pre = line[:m.start()].rstrip()
+ matched = line[m.start():m.end()].strip()
+ post = line[m.end():].lstrip()
+ args = [ macro ]
+ while pre and pre[-1] in nroff_pre_punctuation:
+ args.append(pre[-1])
+ pre = pre[:-1].rstrip()
+ if pre:
+ output.append(pre)
+ args += split_options(m)
+ post = "B<" + post
+ while post and post[0] in nroff_post_punctuation:
+ args.append(post[0])
+ post = post[1:].lstrip()
+ output.append(" ".join(args))
+ line = post
+ if line:
+ output.append(line)
+ return output
+
+def expand_options(line):
+ def split_options(m):
+ args = m.group(1)[2:-1].split()
+ output = []
+ first = True
+ for a in args:
+ if a[0] != "-":
+ output.append("Ar")
+ output.append(a)
+ continue
+ if not first:
+ output.append("Fl")
+ else:
+ first = False
+ if a[1:] == "Os":
+ output.append("\&Os")
+ else:
+ output.append(a[1:])
+ if m.group(4):
+ if not m.group(5):
+ output.append("Ns")
+ args2 = m.group(7).split()
+ for a in args2:
+ output.append("Ar")
+ output.append(a)
+ if m.group(8):
+ output[-1] = output[-1] + "..."
+ if m.group(9):
+ output += [ "Ns", "=", "Ns" ]
+ args3 = m.group(11).split()
+ for a in args3:
+ output.append("Ar")
+ output.append(a)
+ if m.group(12):
+ output[-1] = output[-1] + "..."
+ if m.group(13):
+ output += [ "Ns", "=", "Ns", "Ar", m.group(14) ]
+ return output
+ return expand_generic(line, r"(B<-[-a-zA-Z0-9=#\+,_]+?(\.\.\.)?( [-a-zA-Z0-9=#\+,_]+)*>)(( ?)(I<([^<>]*)>(\.\.\.)?))?(\[(B<=>|=)I<([^<>]*)>\](\.\.\.)?)?(B<=>I<([^<>]*)>)?", ".Fl", split_options)
+
+def expand_filename(line):
+ def expand_file(m):
+ line = m.group(1)
+ args = []
+ while True:
+ m = re.search(r"[BI]<([^<>]*)>", line)
+ if m is None:
+ break
+ args.append(line[:m.start()])
+ args.append(m.group(1))
+ line = line[m.end():]
+ args.append(line)
+ return [ "".join(args)]
+ return expand_generic(line, r"F<((I<[^<>]*>|B<[^<>]*>|[^<>]*)*)>", ".Pa", expand_file)
+
+def expand_remaining_italic(line):
+ return expand_generic(line, r"I<([^<>]*)>", ".Em", lambda m: [ m.group(1)] )
+
+def expand_remaining_bold(line):
+ return expand_generic(line, r"B<([^<>]*)>", ".Sy", lambda m: [ m.group(1)] )
+
+def macro_options(line):
+ output = []
+ while True:
+ m = re.match(r"B<-([-a-zA-Z0-9=#\+,_]+)( ([- a-zA-Z0-9+]*))?>( +)?(I<([^<>]*)>(B<(=|-)>I<([^<>]*)>)?)?(B< +>)?", line)
+ if m is None:
+ break
+
+ output.append("Fl")
+ if m.group(1) == "Os":
+ output.append("\&Os")
+ else:
+ output.append(m.group(1))
+ if m.group(2):
+ for a in m.group(3).split():
+ if a[0] == "-":
+ output.append("Fl")
+ if a[1:] == "Os":
+ output.append("\&Os")
+ else:
+ output.append(a[1:])
+ else:
+ output.append("Ar")
+ output.append(a)
+ if m.group(5):
+ if not m.group(4):
+ output.append("Ns")
+ output.append("Ar")
+ output.append(m.group(6).strip())
+ if m.group(7):
+ output.append("Ns")
+ output.append(m.group(8))
+ output.append("Ns")
+ output.append("Ar")
+ output.append(m.group(9).strip())
+ line = line[m.end():].lstrip()
+ if line:
+ output.append(line)
+ return " ".join(output)
+
+def macro_bold_italic(line):
+ output = []
+ while True:
+ m = re.search(r"[IBC]<([^<>]*)>", line)
+ if m is None:
+ break
+
+ if line[:m.start()]:
+ output.append(line[:m.start()])
+ if m.group(0)[0] == "I":
+ output.append("Em")
+ else:
+ output.append("Sy")
+ output.append(m.group(1))
+ output.append("Ns")
+ line = line[m.end():].lstrip()
+ if line:
+ output.append(line)
+
+ return " ".join(output)
+
+def expand_S(string):
+ string = string.replace("S< >", "\~")
+ string = string.replace("B< >", "\~")
+ while True:
+ if not "S<" in string:
+ return string
+ pre, tag, post = string.partition("S<")
+ open_tags = 1
+ i = 0
+ while i < len(post):
+ if post[i] == '<':
+ open_tags += 1
+ elif post[i] == '>':
+ open_tags -= 1
+ if not open_tags:
+ break
+ i += 1
+ if open_tags:
+ raise SyntaxError, "Unbalanced <>"
+ string = pre + post[:i] + post[i+1:]
+
+class Document(Node):
+ def __init__(self):
+ self._children = []
+ def append(self, c):
+ self._children.append(c)
+ def output(self):
+ output = []
+ output.append(".\\\" $NetBSD: pod2mdoc.py,v 1.1 2009/04/30 00:28:58 joerg Exp $")
+ output.append(".Dd %s" % mdoc_date)
+ output.append(".Dt %s %s" % (mdoc_command.upper(), mdoc_section))
+ output.append(".Os")
+ # Assumes that Section level=3 doesn't happen
+ # without Section level < 3 before it.
+ for c in self._children:
+ output += c.output()
+ if output[-1] == ".Pp":
+ output.pop()
+ output.append("")
+ output2 = []
+ for l in output:
+ if l.startswith("."):
+ output2.append(l.replace(' "', ' \&"'))
+ else:
+ output2.append(l)
+ return output2
+
+class Section(Node):
+ def __init__(self, level, title):
+ self.level = level
+ self.title = expand_S(title)
+ self._children = []
+ def append(self, c):
+ self._children.append(c)
+ def output(self):
+ output = []
+ if self.level == 1:
+ output.append(".Sh %s" % self.title)
+ if self.level == 2:
+ output.append(".Ss %s" % self.title)
+ if self.level == 3:
+ output.append(".It Sy %s" % self.title)
+ in_sect3 = False
+ for c in self._children:
+ if in_sect3:
+ if not isinstance(c, Section) or c.level != 3:
+ output.append(".El")
+ in_sect3 = False
+ else:
+ if isinstance(c, Section) and c.level == 3:
+ output.append(".Bl -tag -width xx")
+ in_sect3 = True
+ output += c.output()
+ if output[-1] == ".Pp":
+ output.pop()
+ if in_sect3:
+ output.append(".El")
+ return output
+
+class Display(Node):
+ def __init__(self, lines):
+ lines = [ expand_S(l).replace("<", "\*[Lt]").replace(">", "\*[Gt]") for l in lines ]
+ self.lines = []
+ for l in lines:
+ if l.startswith("."):
+ self.lines.append("\&" + l)
+ else:
+ self.lines.append(l)
+ while self.lines and not self.lines[-1]:
+ self.lines.pop()
+ def output(self):
+ return [".Bd -literal -offset indent"] + self.lines + [ ".Ed" ]
+
+class Paragraph(Node):
+ def __init__(self, lines):
+ line = " ".join(lines)
+ if ". " in line:
+ lines = [ l.strip() + "." for l in line.split(". ") ]
+ lines[-1] = lines[-1][:-1]
+ else:
+ lines = [ line ]
+ def kill_space(x):
+ while " " in x:
+ x = x.replace(" ", " ")
+ return x
+ lines = [ expand_S(x) for x in lines ]
+ lines = [ kill_space(x) for x in lines ]
+ for name in command_names:
+ lines = apply_expand(lines, lambda l: expand_static_tag(l, "B<%s>" % name, ".Nm", name))
+ lines = apply_expand(lines, expand_url)
+ lines = apply_expand(lines, expand_crossref)
+ lines = apply_expand(lines, expand_code)
+ lines = apply_expand(lines, expand_code2)
+ lines = apply_expand(lines, expand_options_fixed)
+ lines = apply_expand(lines, expand_options_optional)
+ lines = apply_expand(lines, expand_options_optional2)
+ lines = apply_expand(lines, expand_options)
+ lines = apply_expand(lines, expand_filename)
+ lines = apply_expand(lines, expand_remaining_italic)
+ lines = apply_expand(lines, expand_remaining_bold)
+ lines = [ convert_bracket(x) for x in lines ]
+
+ self.lines = lines
+ def output(self):
+ return self.lines + [ ".Pp" ]
+
+class List(Node):
+ def __init__(self):
+ self._children = []
+ self._type = None
+ def appendItem(self, c):
+ self._children.append((expand_S(c), []))
+ def append(self, c):
+ self._children[-1][1].append(c)
+ def finalize(self):
+ for i,l in self._children:
+ if not i:
+ break
+ if self._type and i[:1] != self._type:
+ self._type = None
+ break
+ if i[:1] not in ("*", "-"):
+ break
+ self._type = i[0]
+ else:
+ return
+ for i,l in self._children:
+ if i[:2] != "I<" or i[-1] != ">":
+ break
+ if "<" in i[2:-1] or ">" in i[2:-1]:
+ break
+ else:
+ self._children = [ ("Sy %s" % i[2:-1], l) for (i,l) in self._children]
+ return
+
+ self._children = [ (convert_bracket(macro_bold_italic(macro_options(i))), l) for (i,l) in self._children]
+
+ def output(self):
+ output = []
+ if self._type == "*":
+ output.append(".Bl -bullet")
+ elif self._type == "-":
+ output.append(".Bl -dash")
+ else:
+ output.append(".Bl -tag -width xx")
+ for i, l in self._children:
+ if self._type:
+ output.append(".It")
+ else:
+ output.append(".It %s" % i)
+ for c in l:
+ output += c.output()
+ output.append(".El")
+ return output
+
+def convert_to_tree(data):
+ in_display = False
+ in_paragraph = False
+ buffer = []
+
+ document = [ Document() ]
+ def append(c):
+ document[-1].append(c)
+
+ for line in data:
+ line = line.rstrip()
+ if not line or line.isspace():
+ if in_paragraph:
+ append(Paragraph(buffer))
+ in_paragraph = False
+ buffer = []
+ if in_display:
+ buffer.append(line)
+ elif line[0] == "\t" or (mdoc_display_space and line[0] == " "):
+ if in_paragraph:
+ append(Paragraph(buffer))
+ in_paragraph = False
+ buffer = []
+ if not in_display:
+ in_display = True
+ buffer.append(line[1:])
+ elif line[0] == '=':
+ if in_display:
+ append(Display(buffer))
+ buffer = []
+ in_display = False
+ if in_paragraph:
+ append(Paragraph(buffer))
+ buffer = []
+ in_paragraph = False
+ if line.startswith("=pod"):
+ continue
+ if line.startswith("=cut"):
+ break
+ if line.startswith("=head1") or line.startswith("=head2") or line.startswith("=head3"):
+ level = int(line[5])
+ while not isinstance(document[-1], Document):
+ if not isinstance(document[-1], Section):
+ raise SyntaxError, "Bad nesting"
+ if document[-1].level >= level:
+ document.pop()
+ else:
+ break
+ c = Section(level, line[6:].strip())
+ append(c)
+ document.append(c)
+ elif line.startswith("=over"):
+ c = List()
+ append(c)
+ document.append(c)
+ elif line.startswith("=back"):
+ if not isinstance(document[-1], List):
+ raise SyntaxError, "Bad nesting"
+ document[-1].finalize()
+ document.pop()
+ elif line.startswith("=item"):
+ if not isinstance(document[-1], List):
+ raise SyntaxError, "Bad nesting"
+ document[-1].appendItem(line[6:].strip())
+ else:
+ raise SyntaxError, "Bad meta command: %s" % line
+ else:
+ if in_display:
+ append(Display(buffer))
+ buffer = []
+ in_display = False
+ if not in_paragraph:
+ in_paragraph = True
+ buffer.append(line)
+ if in_display:
+ append(Display(buffer))
+ if in_paragraph:
+ append(Paragraph(buffer))
+
+ return document[0]
+
+command_names = []
+input_name = None
+mdoc_date = datetime.date.today().strftime("%B %e, %Y")
+mdoc_command = None
+mdoc_section = 1
+mdoc_display_space = False
+output_name = None
+
+import getopt
+import sys
+
+opts, args = getopt.getopt(sys.argv[1:], "C:c:d:i:o:Ss:")
+for o,a in opts:
+ if o == "-c":
+ mdoc_command = a
+ command_names.append(a)
+ elif o == "-i":
+ input_name = a
+ elif o == "-d":
+ mdoc_date = a
+ elif o == "-C":
+ command_names.append(a)
+ elif o == "-s":
+ mdoc_section = a
+ elif o == "-S":
+ mdoc_display_space = True
+ elif o == "-o":
+ output_name = a
+
+data = [ x.replace("\\", "\\e").replace("E<gt>", "\\*[Gt]").replace("E<lt>", "\\*[Lt]") for x in open(input_name).readlines() ]
+
+if output_name:
+ output = open(output_name, "w")
+else:
+ output = sys.stdout
+
+output.write(str(convert_to_tree(data)))