OK for the following commit to /usr/src/usr.bin/docbook2mdoc
as a first step?  We can then continue development in tree.

Ingo,

I don't think you're addressing the root problem here: complexity. Both mdoc(7) and DocBook are semantic languages. And while semantics were good enough historically, shouldn't we be moving on?

I don't want to put down mdoc(7)--after all, it has given us a good sense of what manpages should look like. I mean, all mdoc(7) pages have the same sort of output:

PROG(1)       SOME TEXT WE ALL IGNORE        PROG(1)

NAME
   prog - foo

SYNOPSIS
   prog [-123]

DESCRIPTION
    ...

This format works just as well today as it did in sixties, when Donald Knuth wrote it. Even other manpage languages try to look the same. So while DocBook is a good idea (it's documented on the web and in a book, while mdoc(7) appears to be completely undocumented), it seems we're throwing away a good idea--the presentation--while keeping to an overly-complex semantic model.

I think a better idea is to go with POD. We can dispense with the complexities of mdoc(7) and go right to what we really want--content, not markup. Unlike mdoc(7), POD gets down to brass tacks: instead of worrying about what's an argument or flag, you can just make it bold or underlined. It doesn't support colour yet, but if we consider it a growing language, that's not too much of a setback.

In the short-run, can't you achieve your goal the same way with this:

 pod2mdoc -> doclifter -> docbook2mdoc -> man

Eventually, we can re-write man to accept more sophisticated formats directly instead of being tied to mdoc(7), but this would be transparent to end-users.

Thoughts?

Best,

Kristaps

Yours,
   Ingo


Index: Makefile
===================================================================
RCS file: Makefile
diff -N Makefile
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ Makefile    1 Apr 2014 11:03:51 -0000
@@ -0,0 +1,14 @@
+#      $OpenBSD$
+
+.include <bsd.own.mk>
+
+CFLAGS += -W -Wall -Wstrict-prototypes -Wno-unused-parameter
+LDADD  += -lexpat
+
+SRCS = docbook2mdoc.c rules.c
+
+PROG = docbook2mdoc
+
+MAN =  docbook2mdoc.1
+
+.include <bsd.prog.mk>
Index: docbook2mdoc.1
===================================================================
RCS file: docbook2mdoc.1
diff -N docbook2mdoc.1
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ docbook2mdoc.1      1 Apr 2014 11:03:51 -0000
@@ -0,0 +1,66 @@
+.\"       $Id$
+.\"
+.\" Copyright (c) 2014 Kristaps Dzonsons <krist...@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: March 28 2014 $
+.Dt DOCBOOK2MDOC 1
+.Os
+.Sh NAME
+.Nm docbook2mdoc
+.Nd Convert DocBook refentry to mdoc
+.Sh SYNOPSIS
+.Nm docbook2man
+.Op Ar file
+.Sh DESCRIPTION
+The
+.Nm
+utility reads
+.Ar file
+and
+generates
+.Xr mdoc 7
+from the
+.Aq refentry
+section of DocBook source.
+If unspecified or
+.Ar \&- ,
+.Ar file
+is taken to be standard input.
+It accepts no arguments.
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+To pipe a DocBook document
+.Pa foo.xml
+through
+.Xr mandoc 1
+and a pager:
+.Pp
+.Dl % docbook2mdoc foo.xml | mandoc | more
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr mdoc 7
+.Sh AUTHORS
+.Nm
+was written by
+.Ar Kristaps Dzonsons ,
+.Mt krist...@bsd.lv .
+.Sh CAVEATS
+The
+.Nm
+utility is experimental.
+As such, only a small subset of the
+.Aq refentry
+domain is accepted.
Index: docbook2mdoc.c
===================================================================
RCS file: docbook2mdoc.c
diff -N docbook2mdoc.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ docbook2mdoc.c      1 Apr 2014 11:03:52 -0000
@@ -0,0 +1,1310 @@
+/*     $Id$ */
+/*
+ * Copyright (c) 2014 Kristaps Dzonsons <krist...@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/queue.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <expat.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * Global parse state.
+ * Keep this as simple and small as possible.
+ */
+struct parse {
+       XML_Parser       xml;
+       enum nodeid      node; /* current (NODE_ROOT if pre-tree) */
+       const char      *fname; /* filename */
+       int              stop; /* should we stop now? */
+       struct pnode    *root; /* root of parse tree */
+       struct pnode    *cur; /* current node in tree */
+       char            *b; /* nil-terminated buffer for pre-print */
+       size_t           bsz; /* current length of b */
+       size_t           mbsz; /* max bsz allocation */
+       int              newln; /* output: are we on a fresh line */
+};
+
+struct node {
+       const char      *name; /* docbook element name */
+       unsigned int     flags;
+#define        NODE_IGNTEXT     1 /* ignore all contained text */
+};
+
+TAILQ_HEAD(pnodeq, pnode);
+TAILQ_HEAD(pattrq, pattr);
+
+struct pattr {
+       enum attrkey     key;
+       enum attrval     val;
+       char            *rawval;
+       TAILQ_ENTRY(pattr) child;
+};
+
+struct pnode {
+       enum nodeid      node; /* node type */
+       char            *b; /* binary data buffer */
+       size_t           bsz; /* data buffer size */
+       struct pnode    *parent; /* parent (or NULL if top) */
+       struct pnodeq    childq; /* queue of children */
+       struct pattrq    attrq; /* attributes of node */
+       TAILQ_ENTRY(pnode) child;
+};
+
+static const char *attrkeys[ATTRKEY__MAX] = {
+       "choice",
+       "id",
+       "rep"
+};
+
+static const char *attrvals[ATTRVAL__MAX] = {
+       "norepeat",
+       "opt",
+       "plain",
+       "repeat",
+       "req"
+};
+
+static const struct node nodes[NODE__MAX] = {
+       { NULL, 0 },
+       { "acronym", 0 },
+       { "arg", 0 },
+       { "citerefentry", NODE_IGNTEXT },
+       { "cmdsynopsis", NODE_IGNTEXT },
+       { "code", 0 },
+       { "command", 0 },
+       { "date", 0 },
+       { "emphasis", 0 },
+       { "envar", 0 },
+       { "filename", 0 },
+       { "funcdef", 0 },
+       { "funcprototype", NODE_IGNTEXT },
+       { "funcsynopsis", NODE_IGNTEXT },
+       { "funcsynopsisinfo", 0 },
+       { "function", 0 },
+       { "itemizedlist", NODE_IGNTEXT },
+       { "group", NODE_IGNTEXT },
+       { "link", 0 },
+       { "listitem", NODE_IGNTEXT },
+       { "literal", 0 },
+       { "manvolnum", 0 },
+       { "option", 0 },
+       { "orderedlist", NODE_IGNTEXT },
+       { "para", 0 },
+       { "paramdef", 0 },
+       { "parameter", 0 },
+       { "programlisting", 0 },
+       { "prompt", 0 },
+       { "refclass", NODE_IGNTEXT },
+       { "refdescriptor", NODE_IGNTEXT },
+       { "refentry", NODE_IGNTEXT },
+       { "refentryinfo", NODE_IGNTEXT },
+       { "refentrytitle", 0 },
+       { "refmeta", NODE_IGNTEXT },
+       { "refmiscinfo", NODE_IGNTEXT },
+       { "refname", 0 },
+       { "refnamediv", NODE_IGNTEXT },
+       { "refpurpose", 0 },
+       { "refsect1", NODE_IGNTEXT },
+       { "refsect2", NODE_IGNTEXT },
+       { "refsynopsisdiv", NODE_IGNTEXT },
+       { "replaceable", 0 },
+       { "sbr", NODE_IGNTEXT },
+       { "screen", NODE_IGNTEXT },
+       { "structname", 0 },
+       { "synopsis", 0 },
+       { "term", 0 },
+       { NULL, 0 },
+       { "title", 0 },
+       { "ulink", 0 },
+       { "userinput", 0 },
+       { "variablelist", NODE_IGNTEXT },
+       { "varlistentry", NODE_IGNTEXT },
+};
+
+static void
+pnode_print(struct parse *p, struct pnode *pn);
+
+/*
+ * Process a stream of characters.
+ * We store text as nodes in and of themselves.
+ * If a text node is already open, append to it.
+ * If it's not open, open one under the current context.
+ */
+static void
+xml_char(void *arg, const XML_Char *p, int sz)
+{
+       struct parse    *ps = arg;
+       struct pnode    *dat;
+       int              i;
+
+       /* Stopped or no tree yet. */
+       if (ps->stop || NODE_ROOT == ps->node)
+               return;
+
+       /* Not supposed to be collecting text. */
+       assert(NULL != ps->cur);
+       if (NODE_IGNTEXT & nodes[ps->node].flags)
+               return;
+
+       /*
+        * Are we in the midst of processing text?
+        * If we're not processing text right now, then create a text
+        * node for doing so.
+        * However, don't do so unless we have some non-whitespace to
+        * process: strip out all leading whitespace to be sure.
+        */
+       if (NODE_TEXT != ps->node) {
+               for (i = 0; i < sz; i++)
+                       if ( ! isspace((int)p[i]))
+                               break;
+               if (i == sz)
+                       return;
+               p += i;
+               sz -= i;
+               dat = calloc(1, sizeof(struct pnode));
+               if (NULL == dat) {
+                       perror(NULL);
+                       exit(EXIT_FAILURE);
+               }
+
+               dat->node = ps->node = NODE_TEXT;
+               dat->parent = ps->cur;
+               TAILQ_INIT(&dat->childq);
+               TAILQ_INIT(&dat->attrq);
+               TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
+               ps->cur = dat;
+               assert(NULL != ps->root);
+       }
+
+       /* Append to current buffer. */
+       assert(sz >= 0);
+       ps->cur->b = realloc(ps->cur->b,
+               ps->cur->bsz + (size_t)sz);
+       if (NULL == ps->cur->b) {
+               perror(NULL);
+               exit(EXIT_FAILURE);
+       }
+       memcpy(ps->cur->b + ps->cur->bsz, p, sz);
+       ps->cur->bsz += (size_t)sz;
+}
+
+static void
+pnode_trim(struct pnode *pn)
+{
+
+       assert(NODE_TEXT == pn->node);
+       for ( ; pn->bsz > 0; pn->bsz--)
+               if ( ! isspace((int)pn->b[pn->bsz - 1]))
+                       break;
+}
+
+/*
+ * Begin an element.
+ * First, look for the element.
+ * If we don't find it and we're not parsing, keep going.
+ * If we don't find it and we're parsing, puke and exit.
+ * If we find it but we're not parsing yet (i.e., it's not a refentry
+ * and thus out of context), keep going.
+ * If we find it and we're at the root and already have a tree, puke and
+ * exit (FIXME: I don't think this is right?).
+ * If we find it but we're parsing a text node, close out the text node,
+ * return to its parent, and keep going.
+ * Make sure that the element is in the right context.
+ * Lastly, put the node onto our parse tree and continue.
+ */
+static void
+xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
+{
+       struct parse     *ps = arg;
+       enum nodeid       node;
+       enum attrkey      key;
+       enum attrval      val;
+       struct pnode     *dat;
+       struct pattr     *pattr;
+       const XML_Char  **att;
+
+       if (ps->stop)
+               return;
+
+       /* Close out text node, if applicable... */
+       if (NODE_TEXT == ps->node) {
+               assert(NULL != ps->cur);
+               pnode_trim(ps->cur);
+               ps->cur = ps->cur->parent;
+               assert(NULL != ps->cur);
+               ps->node = ps->cur->node;
+       }
+
+       for (node = 0; node < NODE__MAX; node++)
+               if (NULL == nodes[node].name)
+                       continue;
+               else if (0 == strcmp(nodes[node].name, name))
+                       break;
+
+       if (NODE__MAX == node && NODE_ROOT == ps->node) {
+               return;
+       } else if (NODE__MAX == node) {
+               fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
+                       ps->fname, XML_GetCurrentLineNumber(ps->xml),
+                       XML_GetCurrentColumnNumber(ps->xml), name);
+               ps->stop = 1;
+               return;
+       } else if (NODE_ROOT == ps->node && NULL != ps->root) {
+               fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
+                       ps->fname, XML_GetCurrentLineNumber(ps->xml),
+                       XML_GetCurrentColumnNumber(ps->xml));
+               ps->stop = 1;
+               return;
+       } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
+               return;
+       } else if ( ! isparent(node, ps->node)) {
+               fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
+                       "of node \"%s\"\n",
+                       ps->fname, XML_GetCurrentLineNumber(ps->xml),
+                       XML_GetCurrentColumnNumber(ps->xml),
+                       NULL == nodes[ps->node].name ?
+                       "(none)" : nodes[ps->node].name,
+                       NULL == nodes[node].name ?
+                       "(none)" : nodes[node].name);
+               ps->stop = 1;
+               return;
+       }
+
+       if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
+               perror(NULL);
+               exit(EXIT_FAILURE);
+       }
+
+       dat->node = ps->node = node;
+       dat->parent = ps->cur;
+       TAILQ_INIT(&dat->childq);
+       TAILQ_INIT(&dat->attrq);
+
+       if (NULL != ps->cur)
+               TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
+
+       ps->cur = dat;
+       if (NULL == ps->root)
+               ps->root = dat;
+
+       /*
+        * Process attributes.
+        */
+       for (att = atts; NULL != *att; att += 2) {
+               for (key = 0; key < ATTRKEY__MAX; key++)
+                       if (0 == strcmp(*att, attrkeys[key]))
+                               break;
+               if (ATTRKEY__MAX == key) {
+                       fprintf(stderr, "%s:%zu:%zu: unknown "
+                               "attribute \"%s\"\n", ps->fname,
+                               XML_GetCurrentLineNumber(ps->xml),
+                               XML_GetCurrentColumnNumber(ps->xml),
+                               *att);
+                       continue;
+               } else if ( ! isattrkey(node, key)) {
+                       fprintf(stderr, "%s:%zu:%zu: bad "
+                               "attribute \"%s\"\n", ps->fname,
+                               XML_GetCurrentLineNumber(ps->xml),
+                               XML_GetCurrentColumnNumber(ps->xml),
+                               *att);
+                       continue;
+               }
+               for (val = 0; val < ATTRVAL__MAX; val++)
+                       if (0 == strcmp(*(att + 1), attrvals[val]))
+                               break;
+               if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
+                       fprintf(stderr, "%s:%zu:%zu: bad "
+                               "value \"%s\"\n", ps->fname,
+                               XML_GetCurrentLineNumber(ps->xml),
+                               XML_GetCurrentColumnNumber(ps->xml),
+                               *(att + 1));
+                       continue;
+               }
+               pattr = calloc(1, sizeof(struct pattr));
+               pattr->key = key;
+               pattr->val = val;
+               if (ATTRVAL__MAX == val)
+                       pattr->rawval = strdup(*(att + 1));
+               TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
+       }
+
+}
+
+/*
+ * Roll up the parse tree.
+ * If we're at a text node, roll that one up first.
+ * If we hit the root, then assign ourselves as the NODE_ROOT.
+ */
+static void
+xml_elem_end(void *arg, const XML_Char *name)
+{
+       struct parse    *ps = arg;
+
+       if (ps->stop || NODE_ROOT == ps->node)
+               return;
+
+       /* Close out text node, if applicable... */
+       if (NODE_TEXT == ps->node) {
+               assert(NULL != ps->cur);
+               pnode_trim(ps->cur);
+               ps->cur = ps->cur->parent;
+               assert(NULL != ps->cur);
+               ps->node = ps->cur->node;
+       }
+
+       if (NULL == (ps->cur = ps->cur->parent))
+               ps->node = NODE_ROOT;
+       else
+               ps->node = ps->cur->node;
+}
+
+/*
+ * Recursively free a node (NULL is ok).
+ */
+static void
+pnode_free(struct pnode *pn)
+{
+       struct pnode    *pp;
+       struct pattr    *ap;
+
+       if (NULL == pn)
+               return;
+
+       while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
+               TAILQ_REMOVE(&pn->childq, pp, child);
+               pnode_free(pp);
+       }
+
+       while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
+               TAILQ_REMOVE(&pn->attrq, ap, child);
+               free(ap->rawval);
+               free(ap);
+       }
+
+       free(pn->b);
+       free(pn);
+}
+
+/*
+ * Unlink a node from its parent and pnode_free() it.
+ */
+static void
+pnode_unlink(struct pnode *pn)
+{
+
+       if (NULL != pn->parent)
+               TAILQ_REMOVE(&pn->parent->childq, pn, child);
+       pnode_free(pn);
+}
+
+/*
+ * Unlink all children of a node and pnode_free() them.
+ */
+static void
+pnode_unlinksub(struct pnode *pn)
+{
+
+       while ( ! TAILQ_EMPTY(&pn->childq))
+               pnode_unlink(TAILQ_FIRST(&pn->childq));
+}
+
+/*
+ * Reset the lookaside buffer.
+ */
+static void
+bufclear(struct parse *p)
+{
+
+       p->b[p->bsz = 0] = '\0';
+}
+
+/*
+ * Append NODE_TEXT contents to the current buffer, reallocating its
+ * size if necessary.
+ * The buffer is ALWAYS nil-terminated.
+ */
+static void
+bufappend(struct parse *p, struct pnode *pn)
+{
+
+       assert(NODE_TEXT == pn->node);
+       if (p->bsz + pn->bsz + 1 > p->mbsz) {
+               p->mbsz = p->bsz + pn->bsz + 1;
+               if (NULL == (p->b = realloc(p->b, p->mbsz))) {
+                       perror(NULL);
+                       exit(EXIT_FAILURE);
+               }
+       }
+       memcpy(p->b + p->bsz, pn->b, pn->bsz);
+       p->bsz += pn->bsz;
+       p->b[p->bsz] = '\0';
+}
+
+/*
+ * Recursively append all NODE_TEXT nodes to the buffer.
+ * This descends into non-text nodes, but doesn't do anything beyond
+ * them.
+ * In other words, this is a recursive text grok.
+ */
+static void
+bufappend_r(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       if (NODE_TEXT == pn->node)
+               bufappend(p, pn);
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               bufappend_r(p, pp);
+}
+
+#define MACROLINE_NORM 0
+#define MACROLINE_UPPER        1
+/*
+ * Recursively print text presumably on a macro line.
+ * Convert all whitespace to regular spaces.
+ */
+static void
+pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
+{
+       char            *cp;
+
+       if (0 == p->newln)
+               putchar(' ');
+
+       bufclear(p);
+       bufappend_r(p, pn);
+
+       /* Convert all space to spaces. */
+       for (cp = p->b; '\0' != *cp; cp++)
+               if (isspace((int)*cp))
+                       *cp = ' ';
+
+       for (cp = p->b; isspace((int)*cp); cp++)
+               /* Spin past whitespace (XXX: necessary?) */ ;
+       for ( ; '\0' != *cp; cp++) {
+               /* Escape us if we look like a macro. */
+               if ((cp == p->b || ' ' == *(cp - 1)) &&
+                       isupper((int)*cp) &&
+                       '\0' != *(cp + 1) &&
+                       islower((int)*(cp + 1)) &&
+                       ('\0' == *(cp + 2) ||
+                        ' ' == *(cp + 2) ||
+                        (islower((int)*(cp + 2)) &&
+                         ('\0' == *(cp + 3) ||
+                          ' ' == *(cp + 3)))))
+                       fputs("\\&", stdout);
+               if (MACROLINE_UPPER & fl)
+                       putchar(toupper((int)*cp));
+               else
+                       putchar((int)*cp);
+               /* If we're a character escape, escape us. */
+               if ('\\' == *cp)
+                       putchar('e');
+       }
+}
+
+static void
+pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
+{
+
+       pnode_printmacrolinetext(p, pn, 0);
+}
+
+/*
+ * Just pnode_printmacrolinepart() but with a newline.
+ * If no text, just the newline.
+ */
+static void
+pnode_printmacroline(struct parse *p, struct pnode *pn)
+{
+
+       assert(0 == p->newln);
+       pnode_printmacrolinetext(p, pn, 0);
+       putchar('\n');
+       p->newln = 1;
+}
+
+static void
+pnode_printmopen(struct parse *p)
+{
+       if (p->newln) {
+               putchar('.');
+               p->newln = 0;
+       } else
+               putchar(' ');
+}
+
+static void
+pnode_printmclose(struct parse *p, int sv)
+{
+
+       if (sv && ! p->newln) {
+               putchar('\n');
+               p->newln = 1;
+       }
+}
+
+/*
+ * If the SYNOPSIS macro has a superfluous title, kill it.
+ */
+static void
+pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TITLE == pp->node) {
+                       pnode_unlink(pp);
+                       return;
+               }
+}
+
+/*
+ * Start a hopefully-named `Sh' section.
+ */
+static void
+pnode_printrefsect(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TITLE == pp->node)
+                       break;
+
+       if (NODE_REFSECT1 == pn->node)
+               fputs(".Sh", stdout);
+       else
+               fputs(".Ss", stdout);
+
+       p->newln = 0;
+
+       if (NULL != pp) {
+               pnode_printmacrolinetext(p, pp,
+                       NODE_REFSECT1 == pn->node ?
+                       MACROLINE_UPPER : 0);
+               pnode_printmclose(p, 1);
+               pnode_unlink(pp);
+       } else {
+               puts("UNKNOWN");
+               p->newln = 1;
+       }
+}
+
+/*
+ * Start a reference, extracting the title and volume.
+ */
+static void
+pnode_printciterefentry(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp, *title, *manvol;
+
+       title = manvol = NULL;
+       assert(p->newln);
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_MANVOLNUM == pp->node)
+                       manvol = pp;
+               else if (NODE_REFENTRYTITLE == pp->node)
+                       title = pp;
+
+       fputs(".Xr", stdout);
+       p->newln = 0;
+
+       if (NULL != title) {
+               pnode_printmacrolinepart(p, title);
+       } else
+               fputs(" unknown ", stdout);
+
+       if (NULL == manvol) {
+               puts(" 1");
+               p->newln = 1;
+       } else
+               pnode_printmacroline(p, manvol);
+}
+
+static void
+pnode_printrefmeta(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp, *title, *manvol;
+
+       title = manvol = NULL;
+       assert(p->newln);
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_MANVOLNUM == pp->node)
+                       manvol = pp;
+               else if (NODE_REFENTRYTITLE == pp->node)
+                       title = pp;
+
+       puts(".Dd $Mdocdate" "$");
+       fputs(".Dt", stdout);
+       p->newln = 0;
+
+       if (NULL != title)
+               pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
+       else
+               fputs(" UNKNOWN ", stdout);
+
+       if (NULL == manvol) {
+               puts(" 1");
+               p->newln = 1;
+       } else
+               pnode_printmacroline(p, manvol);
+
+       puts(".Os");
+}
+
+static void
+pnode_printfuncdef(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp, *ftype, *func;
+
+       assert(p->newln);
+       ftype = func = NULL;
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TEXT == pp->node)
+                       ftype = pp;
+               else if (NODE_FUNCTION == pp->node)
+                       func = pp;
+
+       if (NULL != ftype) {
+               fputs(".Ft", stdout);
+               p->newln = 0;
+               pnode_printmacroline(p, ftype);
+       }
+
+       if (NULL != func) {
+               fputs(".Fo", stdout);
+               p->newln = 0;
+               pnode_printmacroline(p, func);
+       } else {
+               puts(".Fo UNKNOWN");
+               p->newln = 1;
+       }
+}
+
+static void
+pnode_printparamdef(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp, *ptype, *param;
+
+       assert(p->newln);
+       ptype = param = NULL;
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TEXT == pp->node)
+                       ptype = pp;
+               else if (NODE_PARAMETER == pp->node)
+                       param = pp;
+
+       fputs(".Fa \"", stdout);
+       p->newln = 0;
+       if (NULL != ptype) {
+               pnode_printmacrolinepart(p, ptype);
+               putchar(' ');
+       }
+
+       if (NULL != param)
+               pnode_printmacrolinepart(p, param);
+
+       puts("\"");
+       p->newln = 1;
+}
+
+static void
+pnode_printfuncprototype(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp, *fdef;
+
+       assert(p->newln);
+       TAILQ_FOREACH(fdef, &pn->childq, child)
+               if (NODE_FUNCDEF == fdef->node)
+                       break;
+
+       if (NULL != fdef)
+               pnode_printfuncdef(p, fdef);
+       else
+               puts(".Fo UNKNOWN");
+
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_PARAMDEF == pp->node)
+                       pnode_printparamdef(p, pp);
+
+       puts(".Fc");
+       p->newln = 1;
+}
+
+/*
+ * The <arg> element is more complicated than it should be because text
+ * nodes are treated like ".Ar foo", but non-text nodes need to be
+ * re-sent into the printer (i.e., without the preceding ".Ar").
+ * This also handles the case of "repetition" (or in other words, the
+ * ellipsis following an argument) and optionality.
+ */
+static void
+pnode_printarg(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+       struct pattr    *ap;
+       int              isop, isrep;
+
+       isop = 1;
+       isrep = 0;
+       TAILQ_FOREACH(ap, &pn->attrq, child)
+               if (ATTRKEY_CHOICE == ap->key &&
+                       (ATTRVAL_PLAIN == ap->val ||
+                        ATTRVAL_REQ == ap->val))
+                       isop = 0;
+               else if (ATTRKEY_REP == ap->key &&
+                       (ATTRVAL_REPEAT == ap->val))
+                       isrep = 1;
+
+       if (isop) {
+               pnode_printmopen(p);
+               fputs("Op", stdout);
+       }
+
+       TAILQ_FOREACH(pp, &pn->childq, child) {
+               if (NODE_TEXT == pp->node) {
+                       pnode_printmopen(p);
+                       fputs("Ar", stdout);
+               }
+               pnode_print(p, pp);
+               if (NODE_TEXT == pp->node && isrep)
+                       fputs("...", stdout);
+       }
+}
+
+static void
+pnode_printgroup(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp, *np;
+       struct pattr    *ap;
+       int              isop, sv;
+
+       isop = 1;
+       TAILQ_FOREACH(ap, &pn->attrq, child)
+               if (ATTRKEY_CHOICE == ap->key &&
+                       (ATTRVAL_PLAIN == ap->val ||
+                        ATTRVAL_REQ == ap->val)) {
+                       isop = 0;
+                       break;
+               }
+
+       /*
+        * Make sure we're on a macro line.
+        * This will prevent pnode_print() for putting us on a
+        * subsequent line.
+        */
+       sv = p->newln;
+       pnode_printmopen(p);
+       if (isop)
+               fputs("Op", stdout);
+       else if (sv)
+               fputs("No", stdout);
+
+       /*
+        * Keep on printing text separated by the vertical bar as long
+        * as we're within the same origin node as the group.
+        * This is kind of a nightmare.
+        * Eh, DocBook...
+        * FIXME: if there's a "Fl", we don't cut off the leading "-"
+        * like we do in pnode_print().
+        */
+       TAILQ_FOREACH(pp, &pn->childq, child) {
+               pnode_print(p, pp);
+               np = TAILQ_NEXT(pp, child);
+               while (NULL != np) {
+                       if (pp->node != np->node)
+                               break;
+                       fputs(" |", stdout);
+                       pnode_printmacrolinepart(p, np);
+                       pp = np;
+                       np = TAILQ_NEXT(np, child);
+               }
+       }
+
+       pnode_printmclose(p, sv);
+}
+
+/*
+ * Recursively search and return the first instance of "node".
+ */
+static struct pnode *
+pnode_findfirst(struct pnode *pn, enum nodeid node)
+{
+       struct pnode    *pp, *res;
+
+       res = NULL;
+       TAILQ_FOREACH(pp, &pn->childq, child) {
+               res = pp->node == node ? pp :
+                       pnode_findfirst(pp, node);
+               if (NULL != res)
+                       break;
+       }
+
+       return(res);
+}
+
+static void
+pnode_printprologue(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       pp = NULL == p->root ? NULL :
+               pnode_findfirst(p->root, NODE_REFMETA);
+
+       if (NULL != pp) {
+               pnode_printrefmeta(p, pp);
+               pnode_unlink(pp);
+       } else {
+               puts(".\\\" Supplying bogus prologue...");
+               puts(".Dd $Mdocdate" "$");
+               puts(".Dt UNKNOWN 1");
+               puts(".Os");
+       }
+}
+
+static void
+pnode_printvarlistentry(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       assert(p->newln);
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TERM == pp->node) {
+                       fputs(".It", stdout);
+                       p->newln = 0;
+                       pnode_print(p, pp);
+                       pnode_unlink(pp);
+                       pnode_printmclose(p, 1);
+                       return;
+               }
+
+       puts(".It");
+       p->newln = 1;
+}
+
+static void
+pnode_printitemizedlist(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       assert(p->newln);
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TITLE == pp->node) {
+                       puts(".Pp");
+                       pnode_print(p, pp);
+                       pnode_unlink(pp);
+               }
+
+       assert(p->newln);
+
+       if (NODE_ORDEREDLIST == pn->node)
+               puts(".Bl -enum");
+       else
+               puts(".Bl -item");
+
+       TAILQ_FOREACH(pp, &pn->childq, child) {
+               assert(p->newln);
+               puts(".It");
+               pnode_print(p, pp);
+               pnode_printmclose(p, 1);
+       }
+       assert(p->newln);
+       puts(".El");
+}
+
+static void
+pnode_printvariablelist(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+
+       assert(p->newln);
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_TITLE == pp->node) {
+                       puts(".Pp");
+                       pnode_print(p, pp);
+                       pnode_unlink(pp);
+               }
+
+       assert(p->newln);
+       puts(".Bl -tag -width Ds");
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               if (NODE_VARLISTENTRY != pp->node) {
+                       assert(p->newln);
+                       fputs(".It", stdout);
+                       pnode_printmacroline(p, pp);
+               } else {
+                       assert(p->newln);
+                       pnode_print(p, pp);
+               }
+       assert(p->newln);
+       puts(".El");
+}
+
+/*
+ * Print a parsed node (or ignore it--whatever).
+ * This is a recursive function.
+ * FIXME: if we're in a literal context (<screen> or <programlisting> or
+ * whatever), don't print inline macros.
+ */
+static void
+pnode_print(struct parse *p, struct pnode *pn)
+{
+       struct pnode    *pp;
+       char            *cp;
+       int              last, sv;
+
+       if (NULL == pn)
+               return;
+
+       sv = p->newln;
+
+       switch (pn->node) {
+       case (NODE_ARG):
+               pnode_printarg(p, pn);
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_CITEREFENTRY):
+               assert(p->newln);
+               pnode_printciterefentry(p, pn);
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_CODE):
+               pnode_printmopen(p);
+               fputs("Li", stdout);
+               break;
+       case (NODE_COMMAND):
+               pnode_printmopen(p);
+               fputs("Nm", stdout);
+               break;
+       case (NODE_EMPHASIS):
+               pnode_printmopen(p);
+               fputs("Em", stdout);
+               break;
+       case (NODE_ENVAR):
+               pnode_printmopen(p);
+               fputs("Ev", stdout);
+               break;
+       case (NODE_FILENAME):
+               pnode_printmopen(p);
+               fputs("Pa", stdout);
+               break;
+       case (NODE_FUNCTION):
+               pnode_printmopen(p);
+               fputs("Fn", stdout);
+               break;
+       case (NODE_FUNCPROTOTYPE):
+               assert(p->newln);
+               pnode_printfuncprototype(p, pn);
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_FUNCSYNOPSISINFO):
+               pnode_printmopen(p);
+               fputs("Fd", stdout);
+               break;
+       case (NODE_ITEMIZEDLIST):
+               /* FALLTHROUGH */
+       case (NODE_ORDEREDLIST):
+               assert(p->newln);
+               pnode_printitemizedlist(p, pn);
+               break;
+       case (NODE_GROUP):
+               pnode_printgroup(p, pn);
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_LITERAL):
+               pnode_printmopen(p);
+               fputs("Li", stdout);
+               break;
+       case (NODE_OPTION):
+               pnode_printmopen(p);
+               fputs("Fl", stdout);
+               break;
+       case (NODE_PARA):
+               assert(p->newln);
+               if (NULL != pn->parent &&
+                       NODE_LISTITEM == pn->parent->node)
+                       break;
+               puts(".Pp");
+               break;
+       case (NODE_PARAMETER):
+               /* Suppress non-text children... */
+               pnode_printmopen(p);
+               fputs("Fa \"", stdout);
+               pnode_printmacrolinepart(p, pn);
+               puts("\"");
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_PROGRAMLISTING):
+               /* FALLTHROUGH */
+       case (NODE_SCREEN):
+               assert(p->newln);
+               puts(".Bd -literal");
+               break;
+       case (NODE_REFENTRYINFO):
+               /* Suppress. */
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_REFMETA):
+               abort();
+               break;
+       case (NODE_REFNAME):
+               /* Suppress non-text children... */
+               pnode_printmopen(p);
+               fputs("Nm", stdout);
+               p->newln = 0;
+               pnode_printmacrolinepart(p, pn);
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_REFNAMEDIV):
+               assert(p->newln);
+               puts(".Sh NAME");
+               break;
+       case (NODE_REFPURPOSE):
+               assert(p->newln);
+               pnode_printmopen(p);
+               fputs("Nd", stdout);
+               break;
+       case (NODE_REFSYNOPSISDIV):
+               assert(p->newln);
+               pnode_printrefsynopsisdiv(p, pn);
+               puts(".Sh SYNOPSIS");
+               break;
+       case (NODE_REFSECT1):
+               /* FALLTHROUGH */
+       case (NODE_REFSECT2):
+               assert(p->newln);
+               pnode_printrefsect(p, pn);
+               break;
+       case (NODE_REPLACEABLE):
+               pnode_printmopen(p);
+               fputs("Ar", stdout);
+               break;
+       case (NODE_SBR):
+               assert(p->newln);
+               puts(".br");
+               break;
+       case (NODE_STRUCTNAME):
+               pnode_printmopen(p);
+               fputs("Vt", stdout);
+               break;
+       case (NODE_TEXT):
+               if (0 == p->newln)
+                       putchar(' ');
+               bufclear(p);
+               bufappend(p, pn);
+               /*
+                * Output all characters, squeezing out whitespace
+                * between newlines.
+                * XXX: all whitespace, including tabs (?).
+                * Remember to escape control characters and escapes.
+                */
+               assert(p->bsz);
+               cp = p->b;
+               /*
+                * There's often a superfluous "-" in its <option> tags
+                * before the actual flags themselves.
+                * "Fl" does this for us, so remove it.
+                */
+               if (NULL != pn->parent &&
+                       NODE_OPTION == pn->parent->node &&
+                       '-' == *cp)
+                       cp++;
+               for (last = '\n'; '\0' != *cp; ) {
+                       if ('\n' == last) {
+                               /* Consume all whitespace. */
+                               if (isspace((int)*cp)) {
+                                       while (isspace((int)*cp))
+                                               cp++;
+                                       continue;
+                               } else if ('\'' == *cp || '.' == *cp)
+                                       fputs("\\&", stdout);
+                       }
+                       putchar(last = *cp++);
+                       /* If we're a character escape, escape us. */
+                       if ('\\' == last)
+                               putchar('e');
+               }
+               p->newln = 0;
+               break;
+       case (NODE_VARIABLELIST):
+               assert(p->newln);
+               pnode_printvariablelist(p, pn);
+               pnode_unlinksub(pn);
+               break;
+       case (NODE_VARLISTENTRY):
+               assert(p->newln);
+               pnode_printvarlistentry(p, pn);
+               break;
+       case (NODE_USERINPUT):
+               pnode_printmopen(p);
+               fputs("Li", stdout);
+               break;
+       default:
+               break;
+       }
+
+       TAILQ_FOREACH(pp, &pn->childq, child)
+               pnode_print(p, pp);
+
+       switch (pn->node) {
+       case (NODE_ARG):
+       case (NODE_CODE):
+       case (NODE_COMMAND):
+       case (NODE_EMPHASIS):
+       case (NODE_ENVAR):
+       case (NODE_FILENAME):
+       case (NODE_FUNCTION):
+       case (NODE_FUNCSYNOPSISINFO):
+       case (NODE_LITERAL):
+       case (NODE_OPTION):
+       case (NODE_PARAMETER):
+       case (NODE_REPLACEABLE):
+       case (NODE_REFPURPOSE):
+       case (NODE_STRUCTNAME):
+       case (NODE_TEXT):
+       case (NODE_USERINPUT):
+               pnode_printmclose(p, sv);
+               break;
+       case (NODE_REFNAME):
+               /*
+                * If we're in the NAME macro and we have multiple
+                * <refname> macros in sequence, then print out a
+                * trailing comma before the newline.
+                */
+               if (NULL != pn->parent &&
+                       NODE_REFNAMEDIV == pn->parent->node &&
+                       NULL != TAILQ_NEXT(pn, child) &&
+                       NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
+                       fputs(" ,", stdout);
+               pnode_printmclose(p, sv);
+               break;
+       case (NODE_PROGRAMLISTING):
+               /* FALLTHROUGH */
+       case (NODE_SCREEN):
+               assert(p->newln);
+               puts(".Ed");
+               p->newln = 1;
+               break;
+       default:
+               break;
+       }
+}
+
+/*
+ * Loop around the read buffer until we've drained it of all data.
+ * Invoke the parser context with each buffer fill.
+ */
+static int
+readfile(XML_Parser xp, int fd,
+       char *b, size_t bsz, const char *fn)
+{
+       struct parse     p;
+       int              rc;
+       ssize_t          ssz;
+
+       memset(&p, 0, sizeof(struct parse));
+
+       p.b = malloc(p.bsz = p.mbsz = 1024);
+       p.fname = fn;
+       p.xml = xp;
+
+       XML_SetCharacterDataHandler(xp, xml_char);
+       XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
+       XML_SetUserData(xp, &p);
+
+       while ((ssz = read(fd, b, bsz)) >= 0) {
+               if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
+                       fprintf(stderr, "%s: %s\n", fn,
+                               XML_ErrorString
+                               (XML_GetErrorCode(xp)));
+               else if ( ! p.stop && ssz > 0)
+                       continue;
+               /*
+                * Exit when we've read all or errors have occured
+                * during the parse sequence.
+                */
+               p.newln = 1;
+               pnode_printprologue(&p, p.root);
+               pnode_print(&p, p.root);
+               pnode_free(p.root);
+               free(p.b);
+               return(0 != rc && ! p.stop);
+       }
+
+       /* Read error has occured. */
+       perror(fn);
+       pnode_free(p.root);
+       free(p.b);
+       return(0);
+}
+
+int
+main(int argc, char *argv[])
+{
+       XML_Parser       xp;
+       const char      *fname;
+       char            *buf;
+       int              fd, rc;
+
+       fname = "-";
+       xp = NULL;
+       buf = NULL;
+       rc = 0;
+
+       if (-1 != getopt(argc, argv, ""))
+               return(EXIT_FAILURE);
+
+       argc -= optind;
+       argv += optind;
+
+       if (argc > 1)
+               return(EXIT_FAILURE);
+       else if (argc > 0)
+               fname = argv[0];
+
+       /* Read from stdin or a file. */
+       fd = 0 == strcmp(fname, "-") ?
+               STDIN_FILENO : open(fname, O_RDONLY, 0);
+
+       /*
+        * Open file for reading.
+        * Allocate a read buffer.
+        * Create the parser context.
+        * Dive directly into the parse.
+        */
+       if (-1 == fd)
+               perror(fname);
+       else if (NULL == (buf = malloc(4096)))
+               perror(NULL);
+       else if (NULL == (xp = XML_ParserCreate(NULL)))
+               perror(NULL);
+       else if ( ! readfile(xp, fd, buf, 4096, fname))
+               rc = 1;
+
+       XML_ParserFree(xp);
+       free(buf);
+       if (STDIN_FILENO != fd)
+               close(fd);
+       return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
+}
Index: extern.h
===================================================================
RCS file: extern.h
diff -N extern.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ extern.h    1 Apr 2014 11:03:52 -0000
@@ -0,0 +1,99 @@
+#ifndef EXTERN_H
+#define EXTERN_H
+
+/*
+ * All recognised node types.
+ */
+enum   nodeid {
+       NODE_ROOT = 0, /* Must comes first. */
+       /* Alpha-ordered hereafter. */
+       NODE_ACRONYM,
+       NODE_ARG,
+       NODE_CITEREFENTRY,
+       NODE_CMDSYNOPSIS,
+       NODE_CODE,
+       NODE_COMMAND,
+       NODE_DATE,
+       NODE_EMPHASIS,
+       NODE_ENVAR,
+       NODE_FILENAME,
+       NODE_FUNCDEF,
+       NODE_FUNCPROTOTYPE,
+       NODE_FUNCSYNOPSIS,
+       NODE_FUNCSYNOPSISINFO,
+       NODE_FUNCTION,
+       NODE_ITEMIZEDLIST,
+       NODE_GROUP,
+       NODE_LINK,
+       NODE_LISTITEM,
+       NODE_LITERAL,
+       NODE_MANVOLNUM,
+       NODE_OPTION,
+       NODE_ORDEREDLIST,
+       NODE_PARA,
+       NODE_PARAMDEF,
+       NODE_PARAMETER,
+       NODE_PROGRAMLISTING,
+       NODE_PROMPT,
+       NODE_REFCLASS,
+       NODE_REFDESCRIPTOR,
+       NODE_REFENTRY,
+       NODE_REFENTRYINFO,
+       NODE_REFENTRYTITLE,
+       NODE_REFMETA,
+       NODE_REFMISCINFO,
+       NODE_REFNAME,
+       NODE_REFNAMEDIV,
+       NODE_REFPURPOSE,
+       NODE_REFSECT1,
+       NODE_REFSECT2,
+       NODE_REFSYNOPSISDIV,
+       NODE_REPLACEABLE,
+       NODE_SBR,
+       NODE_SCREEN,
+       NODE_STRUCTNAME,
+       NODE_SYNOPSIS,
+       NODE_TERM,
+       NODE_TEXT,
+       NODE_TITLE,
+       NODE_ULINK,
+       NODE_USERINPUT,
+       NODE_VARIABLELIST,
+       NODE_VARLISTENTRY,
+       NODE__MAX
+};
+
+/*
+ * All recognised attribute keys.
+ */
+enum   attrkey {
+       /* Alpha-order... */
+       ATTRKEY_CHOICE = 0,
+       ATTRKEY_ID,
+       ATTRKEY_REP,
+       ATTRKEY__MAX
+};
+
+/*
+ * All [explicitly] recognised attribute values.
+ * If an attribute has ATTRVAL__MAX, it could be a free-form.
+ */
+enum   attrval {
+       /* Alpha-order... */
+       ATTRVAL_NOREPEAT,
+       ATTRVAL_OPT,
+       ATTRVAL_PLAIN,
+       ATTRVAL_REPEAT,
+       ATTRVAL_REQ,
+       ATTRVAL__MAX
+};
+
+__BEGIN_DECLS
+
+int isattrkey(enum nodeid node, enum attrkey key);
+int isattrval(enum attrkey key, enum attrval val);
+int isparent(enum nodeid node, enum nodeid parent);
+
+__END_DECLS
+
+#endif
Index: rules.c
===================================================================
RCS file: rules.c
diff -N rules.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ rules.c     1 Apr 2014 11:03:52 -0000
@@ -0,0 +1,723 @@
+#include <stdlib.h>
+
+#include "extern.h"
+
+int
+isattrkey(enum nodeid node, enum attrkey key)
+{
+
+       switch (key) {
+       case (ATTRKEY_CHOICE):
+               switch (node) {
+               case (NODE_ARG):
+               case (NODE_GROUP):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (ATTRKEY_ID):
+               /* Common to all. */
+               return(1);
+       case (ATTRKEY_REP):
+               switch (node) {
+               case (NODE_ARG):
+               case (NODE_GROUP):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       default:
+               break;
+       }
+       abort();
+       return(0);
+}
+
+int
+isattrval(enum attrkey key, enum attrval val)
+{
+
+       switch (val) {
+       case (ATTRVAL_OPT):
+       case (ATTRVAL_PLAIN):
+       case (ATTRVAL_REQ):
+               return(key == ATTRKEY_CHOICE);
+       case (ATTRVAL_REPEAT):
+       case (ATTRVAL_NOREPEAT):
+               return(key == ATTRKEY_REP);
+       default:
+               break;
+       }
+       abort();
+       return(0);
+}
+
+/*
+ * Look up whether "parent" is a valid parent for "node".
+ * This is sucked directly from the DocBook specification: look at the
+ * "children" and "parent" sections of each node.
+ */
+int
+isparent(enum nodeid node, enum nodeid parent)
+{
+
+       switch (node) {
+       case (NODE_ROOT):
+               return(0);
+       case (NODE_ACRONYM):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_ARG):
+               switch (parent) {
+               case (NODE_ARG):
+               case (NODE_CMDSYNOPSIS):
+               case (NODE_GROUP):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_CITEREFENTRY):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_CMDSYNOPSIS):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_PARA):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_CODE):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_COMMAND):
+               switch (parent) {
+               case (NODE_CMDSYNOPSIS):
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_DATE):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYINFO):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_EMPHASIS):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_ENVAR):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_FILENAME):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_FUNCDEF):
+               return(NODE_FUNCPROTOTYPE == parent);
+       case (NODE_FUNCPROTOTYPE):
+               return(NODE_FUNCSYNOPSIS == parent);
+       case (NODE_FUNCSYNOPSIS):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_PARA):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_FUNCSYNOPSISINFO):
+               return(NODE_FUNCSYNOPSIS == parent);
+       case (NODE_FUNCTION):
+               switch (parent) {
+               case (NODE_CODE):
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCDEF):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_ITEMIZEDLIST):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_GROUP):
+               switch (parent) {
+               case (NODE_ARG):
+               case (NODE_CMDSYNOPSIS):
+               case (NODE_GROUP):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_LINK):
+       case (NODE_ULINK): /* Synonyms. */
+               switch (parent) {
+               case (NODE_ACRONYM):
+               case (NODE_ARG):
+               case (NODE_CODE):
+               case (NODE_COMMAND):
+               case (NODE_EMPHASIS):
+               case (NODE_ENVAR):      
+               case (NODE_FILENAME):
+               case (NODE_FUNCDEF):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_FUNCTION):
+               case (NODE_LINK):
+               case (NODE_LITERAL):
+               case (NODE_MANVOLNUM):
+               case (NODE_OPTION):
+               case (NODE_PARA):
+               case (NODE_PARAMDEF):
+               case (NODE_PARAMETER):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_PROMPT):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFMISCINFO):
+               case (NODE_REFNAME):
+               case (NODE_REPLACEABLE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_LISTITEM):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_ORDEREDLIST):
+               case (NODE_VARLISTENTRY):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_LITERAL):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_MANVOLNUM):
+               switch (parent) {
+               case (NODE_CITEREFENTRY):
+               case (NODE_REFMETA):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_OPTION):
+               switch (parent) {
+               case (NODE_ARG):
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_GROUP):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_ORDEREDLIST):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_PARA):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_PARA):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_PARAMDEF):
+               return(NODE_FUNCPROTOTYPE == parent);
+       case (NODE_PARAMETER):
+               switch (parent) {
+               case (NODE_CODE):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PARAMDEF):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_PROGRAMLISTING):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_PARA):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_PROMPT):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_REFCLASS):
+               return(parent == NODE_REFNAMEDIV);
+       case (NODE_REFDESCRIPTOR):
+               return(parent == NODE_REFNAMEDIV);
+       case (NODE_REFENTRY):
+               return(parent == NODE_ROOT);
+       case (NODE_REFENTRYINFO):
+               return(parent == NODE_REFENTRY);
+       case (NODE_REFENTRYTITLE):
+               switch (parent) {
+               case (NODE_CITEREFENTRY):
+               case (NODE_REFMETA):
+                       return(1);
+               default:
+                       break;
+               }
+       case (NODE_REFMETA):
+               return(parent == NODE_REFENTRY);
+       case (NODE_REFMISCINFO):
+               return(parent == NODE_REFMETA);
+       case (NODE_REFNAME):
+               return(parent == NODE_REFNAMEDIV);
+       case (NODE_REFNAMEDIV):
+               return(parent == NODE_REFENTRY);
+       case (NODE_REFPURPOSE):
+               return(parent == NODE_REFNAMEDIV);
+       case (NODE_REFSECT1):
+               return(parent == NODE_REFENTRY);
+       case (NODE_REFSECT2):
+               switch (parent) {
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_REFSECT1):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_REFSYNOPSISDIV):
+               return(parent == NODE_REFENTRY);
+       case (NODE_REPLACEABLE):
+               switch (parent) {
+               case (NODE_ACRONYM):
+               case (NODE_ARG):
+               case (NODE_CODE):
+               case (NODE_COMMAND):
+               case (NODE_EMPHASIS):
+               case (NODE_ENVAR):      
+               case (NODE_FILENAME):
+               case (NODE_FUNCDEF):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_FUNCTION):
+               case (NODE_GROUP):
+               case (NODE_LINK):
+               case (NODE_LITERAL):
+               case (NODE_MANVOLNUM):
+               case (NODE_OPTION):
+               case (NODE_PARA):
+               case (NODE_PARAMDEF):
+               case (NODE_PARAMETER):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_PROMPT):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFMISCINFO):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_REPLACEABLE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+               default:
+                       return(1);
+               }
+               return(0);
+       case (NODE_SBR):
+               switch (parent) {
+               case (NODE_ARG):
+               case (NODE_CMDSYNOPSIS):
+               case (NODE_GROUP):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_SCREEN):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_PARA):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_STRUCTNAME):
+               switch (parent) {
+               case (NODE_CODE):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_FUNCTION):
+               case (NODE_OPTION):
+               case (NODE_PARA):
+               case (NODE_PARAMETER):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SYNOPSIS):
+               case (NODE_TITLE):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_SYNOPSIS):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_LISTITEM):
+               case (NODE_ORDEREDLIST):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_TITLE):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_ORDEREDLIST):
+               case (NODE_REFENTRYINFO):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_TERM):
+               return(NODE_VARLISTENTRY == parent);
+       case (NODE_TEXT):
+               return(1);
+       case (NODE_USERINPUT):
+               switch (parent) {
+               case (NODE_EMPHASIS):
+               case (NODE_FUNCSYNOPSISINFO):
+               case (NODE_LINK):
+               case (NODE_PARA):
+               case (NODE_PROGRAMLISTING):
+               case (NODE_REFDESCRIPTOR):
+               case (NODE_REFENTRYTITLE):
+               case (NODE_REFNAME):
+               case (NODE_REFPURPOSE):
+               case (NODE_SCREEN):
+               case (NODE_SYNOPSIS):
+               case (NODE_TERM):
+               case (NODE_TITLE):
+               case (NODE_ULINK):
+               case (NODE_USERINPUT):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_VARIABLELIST):
+               switch (parent) {
+               case (NODE_ITEMIZEDLIST):
+               case (NODE_ORDEREDLIST):
+               case (NODE_PARA):
+               case (NODE_REFSECT1):
+               case (NODE_REFSECT2):
+               case (NODE_REFSYNOPSISDIV):
+               case (NODE_VARIABLELIST):
+                       return(1);
+               default:
+                       break;
+               }
+               return(0);
+       case (NODE_VARLISTENTRY):
+               return (NODE_VARIABLELIST == parent);
+       case (NODE__MAX):
+               break;
+       }
+
+       abort();
+       return(0);
+}
+




Reply via email to