Bug#533593: manpage-writer: Remove extraneous newline characters in roff output

Ben Finney Thu, 18 Jun 2009 23:19:50 -0700

Howdy Englebert,

A newline character in roff markup is significant (unlike in, e.g.,
HTML), so it's important to only have newlines where they are
necessary for the effect desired when the markup is rendered.


The current manpage writer produces roff output with many extraneous
newline characters. This results in large stretches of unwanted
whitespace in the rendered man page.

The attached patch against current VCS addresses this in the test
input, but it may need to be modified in response to other tests.

-- 
 \     “Are you pondering what I'm pondering?” “I think so, Brain, but |
  `\        why would anyone want a depressed tongue?” —_Pinky and The |
_o__)                                                           Brain_ |
Ben Finney <[email protected]>

=== modified file 'docutils/writers/manpage.py'
--- docutils/writers/manpage.py	2009-04-29 08:58:51 +0000
+++ docutils/writers/manpage.py	2009-06-19 05:57:29 +0000
@@ -89,7 +89,7 @@
 .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
 .in \\n[rst2man-indent\\n[rst2man-indent-level]]u
 ..
-""")
+""").lstrip()
 
 class Writer(writers.Writer):
 
@@ -190,16 +190,16 @@
                 'indent' : ('.INDENT %.1f\n', '.UNINDENT\n'),
                 'definition' : ('', ''),
                 'definition_list' : ('', '.TP 0\n'),
-                'definition_list_item' : ('\n.TP', ''),
+                'definition_list_item' : ('.TP', ''),
                 #field_list
                 #field
-                'field_name' : ('\n.TP\n.B ', '\n'),
-                'field_body' : ('', '.RE\n', ),
+                'field_name' : ('.TP\n.B ', '\n'),
+                'field_body' : ('', '.RE\n'),
                 'literal' : ('\\fB', '\\fP'),
                 'literal_block' : ('\n.nf\n', '\n.fi\n'),
 
                 #option_list
-                'option_list_item' : ('\n.TP', ''),
+                'option_list_item' : ('.TP', ''),
                 #option_group, option
                 'description' : ('\n', ''),
                 
@@ -220,12 +220,15 @@
 
     def comment_begin(self, text):
         """Return commented version of the passed text WITHOUT end of line/comment."""
-        prefix = '\n.\\" '
-        return prefix+prefix.join(text.split('\n'))
+        prefix = '.\\" '
+        out_text = ''.join(
+            (prefix + in_line + '\n')
+            for in_line in text.split('\n'))
+        return out_text
 
     def comment(self, text):
         """Return commented version of the passed text."""
-        return self.comment_begin(text)+'\n'
+        return self.comment_begin(text)
 
     def astext(self):
         """Return the final formatted document as a string."""
@@ -298,10 +301,21 @@
         self._list_char.pop()
 
     def header(self):
-        tmpl = (".TH %(title)s %(manual_section)s"
-                " \"%(date)s\" \"%(version)s\" \"%(manual_group)s\"\n"
-                ".SH NAME\n"
-                "%(title)s \- %(subtitle)s\n")
+        th_line_tmpl = (
+            ".TH"
+            " %(title)s"
+            " %(manual_section)s"
+            " \"%(date)s\""
+            " \"%(version)s\""
+            " \"%(manual_group)s\"")
+        tmpl = "".join([
+            "%(line)s\n" % vars()
+            for line in [
+                th_line_tmpl,
+                ".SH NAME",
+                "%(title)s \- %(subtitle)s",
+                ]
+            ])
         return tmpl % self._docinfo
 
     def append_header(self):
@@ -519,7 +533,7 @@
 
     def depart_document(self, node):
         if self._docinfo['author']:
-            self.body.append('\n.SH AUTHOR\n%s\n' 
+            self.body.append('.SH AUTHOR\n%s\n' 
                     % self._docinfo['author'])
         if 'organization' in self._docinfo:
             self.body.append(self.defs['organization'][0])
@@ -530,7 +544,7 @@
             self.body.append(self._docinfo['address'])
             self.body.append(self.defs['address'][1])
         if self._docinfo['copyright']:
-            self.body.append('\n.SH COPYRIGHT\n%s\n' 
+            self.body.append('.SH COPYRIGHT\n%s\n' 
                     % self._docinfo['copyright'])
         self.body.append(
                 self.comment(
@@ -744,10 +758,10 @@
         self.body.append('</div>\n')
 
     def visit_line_block(self, node):
-        self.body.append('\n')
+        pass
 
     def depart_line_block(self, node):
-        self.body.append('\n')
+        pass
 
     def visit_line(self, node):
         pass
@@ -757,7 +771,7 @@
 
     def visit_list_item(self, node):
         # man 7 man argues to use ".IP" instead of ".TP"
-        self.body.append('\n.IP %s %d\n' % (
+        self.body.append('.IP %s %d\n' % (
                 self._list_char[-1].next(),
                 self._list_char[-1].get_width(),) )
 
@@ -879,7 +893,7 @@
     def depart_paragraph(self, node):
         # TODO .PP or an empty line
         if not self._in_entry:
-            self.body.append('\n\n')
+            self.body.append('\n')
 
     def visit_problematic(self, node):
         self.body.append(self.defs['problematic'][0])
@@ -889,7 +903,7 @@
 
     def visit_raw(self, node):
         if node.get('format') == 'manpage':
-            self.body.append(node.astext())
+            self.body.append(node.astext() + "\n")
         # Keep non-manpage raw text out of output:
         raise nodes.SkipNode
 
@@ -1026,9 +1040,9 @@
             self._docinfo['title'] = node.astext()
             raise nodes.SkipNode
         elif self.section_level == 1:
-            self.body.append('\n.SH ')
+            self.body.append('.SH ')
         else:
-            self.body.append('\n.SS ')
+            self.body.append('.SS ')
 
     def depart_title(self, node):
         self.body.append('\n')

=== modified file 'expected/test.man'
--- expected/test.man	2008-05-15 07:12:34 +0000
+++ expected/test.man	2009-06-19 05:37:14 +0000
@@ -1,8 +1,7 @@
 .\" Man page generated from reStructeredText.
-.TH rst2man 1 "2006-10-22" "0.0.1" "text processing"
+.TH rst2man 1 "2009-06-19" "0.0.1" "text processing"
 .SH NAME
 rst2man \- generate unix manpages from reStructured text
-
 .nr rst2man-indent-level 0
 .
 .de1 rstReportMargin
@@ -29,176 +28,107 @@
 .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
 .in \\n[rst2man-indent\\n[rst2man-indent-level]]u
 ..
-
 .\" TODO: authors and author with name <email>
-
 .SH SYNOPSIS
 \fBrst2man\fP \fB\-\-help\fP
-
 \fBrst2man\fP [ OPTIONS ] [ SOURCE [ \fIdestination\fP ] ]
-
-
 .SH DESCRIPTION
 Run it and examine output.
-
-
 .SH OPTIONS
 .INDENT 0.0
-
 .TP
 .BI \-o\  x
 an option
-
-
 .TP
 .B \-b
 another
-
 .UNINDENT
 For all other options see \fB\-\-help\fP.
-
-
 .SH EXAMPLES
 .INDENT 0.0
-
 .TP
 .B rst2man.py xml\-schema\-catalog.rst xml\-schema\-catalog.man
 create a manpage from xml\-schema\-catalog.rst
-
 .UNINDENT
-
 .SS Character formatting
 \fIBUG\fP The section title should be separated from the title above.
-
-
 .SS Lists
 .INDENT 0.0
-
 .IP \(bu 2
 bullet list
-
-
 .IP \(bu 2
 and a longer entry, and a longer entry, and a longer entry, and a longer entry,
 and a longer entry, and a longer entry, and a longer entry, and a longer entry,
-
 .INDENT 2.0
-
 .IP 1. 4
 and a nested
-
-
 .IP 2. 4
 enumeration
-
 .INDENT 2.0
-
 .IP \(bu 2
 some line
-
-
 .IP \(bu 2
 bullets
-
 .UNINDENT
-
 .IP 3. 4
 a third
-
-
 .IP 4. 4
 a third
-
-
 .IP 5. 4
 a third
-
 .INDENT 2.0
-
 .IP a. 3
 alphabetically list
-
-
 .IP b. 3
 betacarotin
-
-
 .IP c. 3
 grammar
-
 .UNINDENT
-
 .IP 6. 4
 a third
-
-
 .IP 7. 4
 a third
-
-
 .IP 8. 4
 a third
-
-
 .IP 9. 4
 a third
-
-
 .IP 10. 4
 bigger indentation if there are more than nine
 enumerated items (or there are larger numbers)
-
 .UNINDENT
 .UNINDENT
 .INDENT 0.0
-
 .IP 7. 3
 dont start with 1
-
 .UNINDENT
 .INDENT 0.0
-
 .IP 9. 3
 dont increment by 1
-
 .UNINDENT
 .INDENT 0.0
-
 .IP 13. 3
 does not work.
-
 .UNINDENT
 This paragraph must not be indented with the text the last list\-item,
 but start at the left margin of the numbers.
-
-
 .SS Field lists
 .INDENT 0.0
-
 .TP
 .B first
 a short label and a short text
-
-
 .TP
 .B secondly a longer label
 and some more text and some more text
 and some more text   and some more text
-
 .UNINDENT
 .INDENT 0.0
-
 .TP
 .B definition lists
 are also a valuable option and simple nonintrusive formatting
-
-
 .TP
 .B definition too :
 with the colon.
-
 .UNINDENT
-
 .SS Tables
 .TS
 center;
@@ -210,73 +140,46 @@
 header	 
 _
 .TE
-
 .SS lineblock
-
 paul
 .br
 tel. 12345
 .br
 LA
 .br
-
-
 .SS raw
 raw input to man
 .SS other
 inline references \fIref something\fP .
-
-
 .SH FILES
 This is a file.
-
-
 .SH SEE ALSO
 \fI\%docutils\fP
-
 \fBrst2xml\fP(dummy)
-
 More information can be found about
-
 .INDENT 0.0
-
 .IP \(bu 2
 xml\-schema\-catalog at
 \fI\%http://xml\-schema\-catalog.origo.ethz.ch/\fP
-
 .UNINDENT
 And see the stars at the sky!
-
-
 .SH BUGS
 Numerous mapping problems.
-
 .INDENT 0.0
-
 .IP 1. 3
 Where do we get the manual section number from ? Commandline ?
-
-
 .IP 2. 3
 docutils authors should be filled into section "AUTHORS".
-
-
 .IP 3. 3
 be carefull with linebreaks in generated code.
-
-
 .IP 4. 3
 list items.
 bullets and enumerators.
-
 .UNINDENT
-
 .SH AUTHOR
 [email protected]
-
 .SH COPYRIGHT
 public domain
 Behave responsible.
-
 .\" Generated by docutils manpage writer on 2008-05-15 09:11.
 .\" 

=== modified file 'input/test.txt'
--- input/test.txt	2009-04-07 13:26:48 +0000
+++ input/test.txt	2009-06-19 04:05:30 +0000
@@ -11,7 +11,7 @@
 :Address: 123 Example Street
           Example, EX  Canada
           A1B 2C3
-:Date:   2006-10-22
+:Date:   2009-06-19
 :Copyright: public domain
             Behave responsible.
 :Version: 0.0.1

Bug#533593: manpage-writer: Remove extraneous newline characters in roff output

Reply via email to