Hi Rogério and Ben,

Good news!

I have written a Scheme script which converts README.md to Perl POD, it needs 
guile-2.0 in order to run.

Running:

$ git clone https://github.com/rg3/youtube-dl.git
$ ./prepare_manpage.scm < youtube-dl/README.md | uniq | pod2man --center='User 
Commands' --date='2015-09-06' --errors='die' --name='YOUTUBE-DL' 
--release="$(printf "$(printf '\nprint(__version__)\n')\n" | cat 
youtube-dl/youtube_dl/version.py - | python)" --section=1 | man -l -

should fire up the man pager and render the man page from stdin.

I have tried to set format to markdown_github but pandoc still doesn't work 
very well, so I write this.

I will try to convert the Scheme script to Python script and see if upstream 
accepts it or not.

After that, I will try to see what can I do with 
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=723990

Cheers,
Alex
#!/usr/bin/guile -s
!#

;; Written in 2015 by Alex Vong <alexvong1...@gmail.com>
;;
;; To the extent possible under law, the author(s) have dedicated all copyright
;; and related and neighboring rights to this software to the public domain
;; worldwide. This software is distributed without any warranty.
;;
;; You should have received a copy of the CC0 Public Domain Dedication along
;; with this software.
;; If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.

(use-modules (srfi srfi-1))
(use-modules (ice-9 regex))

(define (from-stdin)
  (define output-string-port (open-output-string))
  (let loop ((char (read-char (current-input-port))))
    (if (not (eof-object? char))
	(begin
	  (write-char char output-string-port)
	  (loop (read-char (current-input-port))))))
  output-string-port)

(define readme (string-append "=pod\n\n=encoding utf8\n\n# NAME\n\n"
			      (get-output-string (from-stdin))))

(define (match? match) (not (eq? match #f)))

(define (regex-sub match . items)
  (apply regexp-substitute (cons #f (cons match items))))

(define (pat-lst->regex-lst pat-lst . items)
  (map (λ(pat) (apply make-regexp (cons pat items))) pat-lst))

(define (regex-sub/global regex-lst str . items)
  (define (next str)
    (apply regexp-substitute/global
	   (cons #f (cons (car regex-lst) (cons str items)))))
  (if (null? regex-lst)
      str
      (apply regex-sub/global
	     (cons (cdr regex-lst) (cons (next str) items)))))

(define (regex-split pat str regex-param)
  (define match (regexp-exec
		 (apply make-regexp (cons pat regex-param))
		 str))
  (if (match?  match)
      (concatenate
       (list (regex-split pat (regex-sub match 'pre) regex-param)
	     (list (regex-sub match 0))
	     (regex-split pat (regex-sub match 'post) regex-param)))
      (list str)))

(define (remove-table-of-content str)
  (define pat-lst '("^-[[:blank:]]+\\[[-[:blank:][:upper:]]+\\]\\(#[-[:lower:]]+\\)[[:blank:]]*$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre 'post))

(define (bold str)
  (define pat-lst '("\\*\\*([^\\*]+)\\*\\*"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "B<<< " 1 " >>>" 'post))

(define (italic str)
  (define pat-lst '("\\*([^\\*]+)\\*" "`{1,2}([^`]+)`{1,2}"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "I<<< " 1 " >>>" 'post))

(define (remove-internal-links str)
  (define pat-lst '("\\[([^]|]+)\\]\\(#[^\\)|]+\\)"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre 1 'post))

(define (external-links str)
  (define pat-lst '("\\[([^]|]+)\\]\\(([^\\)|]+)\\)"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "L<<< " 1 "|" 2 " >>>" 'post))

(define (lexer-generator pat type regex-param)
  (define (match str)
    (regexp-exec (apply make-regexp (cons pat regex-param)) str))
  (λ(str)
    (map (λ(str) (if (match? (match str))
		      (list type (regex-sub (match str) 1))
		      str))
	 (regex-split pat str regex-param))))

(define (parser-generator type post)
  (define (parse-loop lst accum)
    (if (null? lst)
	accum
	(if (list? (car lst))
	    (parse-loop (cddr lst)
			(cons (list (car lst) (post (cadr lst)))
			      accum))
	    (parse-loop (cdr lst)
			(cons (post (car lst))
			      accum)))))
  (λ(lst)
    (reverse (parse-loop lst '()))))

(define (front-end-generator post pat type regex-param)
  (compose (parser-generator type post)
	   (lexer-generator pat type regex-param)))

(define (connect-front-end . lst-of-lst)
  (define (connect-loop lst-of-lst accum)
    (if (null? lst-of-lst)
	accum
	(connect-loop (cdr lst-of-lst)
		      (apply (caar lst-of-lst)
			     (cons accum (cdar lst-of-lst))))))
  (connect-loop lst-of-lst identity))

(define title-pattern "^#[[:blank:]]+([-[:blank:][:upper:]]+)[[:blank:]]*$")

(define subtitle-pattern "^#{2,3}[[:blank:]]+([^#]+)#*[[:blank:]]*$")

(define hyphen-pattern "^[[:blank:]]*-[[:blank:]]+(.+)$")

(define asterisk-pattern "^\\*[[:blank:]]+([^\\*]+)$")

(define number-pattern "^([[:digit:]]+\\.[[:blank:]]+.+)$")

(define verbatim-pattern "```([^`]+)```")

(define ast
  ((compose (connect-front-end
	     (list front-end-generator
		   verbatim-pattern
		   'VERBATIM
		   (list regexp/extended))
	     (list front-end-generator
		   number-pattern
		   'NUMBER
		   (list regexp/newline
			 regexp/extended))
	     (list front-end-generator
		   asterisk-pattern
		   'ASTERISK
		   (list regexp/newline
			 regexp/extended))
	     (list front-end-generator
		   hyphen-pattern
		   'HYPHEN
		   (list regexp/newline
			 regexp/extended))
	     (list front-end-generator
		   subtitle-pattern
		   'SUBTITLE
		   (list regexp/newline
			 regexp/extended))
	     (list front-end-generator
		   title-pattern
		   'TITLE
		   (list regexp/newline
			 regexp/extended)))
	    external-links
	    remove-internal-links
	    italic
	    bold
	    remove-table-of-content)
   readme))

(define (fetch-symbol ast type-lst)
  (if (list? ast)
      (if (and (list? (car ast))
	       (any (λ(type) (eq? (caar ast) type))
		    type-lst))
	  (list (car ast)
		(fetch-symbol (cdr ast) type-lst))
	  (append (filter (λ(x) (not (null? x)))
			  (map (λ(sub-tree) (fetch-symbol sub-tree type-lst))
			       ast))))
      '()))

(define (flatten lst)
  (cond ((null? lst) '())
	((not (list? lst)) (list lst))
	(else (append (flatten (car lst)) (flatten (cdr lst))))))

(define (group-adj-elements lst)
  (define lst-of-index-lst (map list
				(filter even? (iota (length lst)))
				(filter odd? (iota (length lst)))))
  (define (index->element k) (list-ref lst k))
  (map (λ(index-lst) (map index->element index-lst))
       lst-of-index-lst))

(define type-lst '(TITLE SUBTITLE HYPHEN ASTERISK NUMBER VERBATIM))

(define symbol-table
  ((compose group-adj-elements flatten) (fetch-symbol ast type-lst)))

(define (list-of-string? lst) (every string? lst))

(define (contain-string-lst? s-exp)
  (if (list? s-exp)
      (if (list-of-string? s-exp)
	  #t
	  (any contain-string-lst? s-exp))
      #f))

(define (join-string-lst s-exp)
  (if (list? s-exp)
      (if (list-of-string? s-exp)
	  (string-join s-exp "")
	  (map join-string-lst s-exp))
      s-exp))

(define (recursively-join-string-lst s-exp)
  (if (not (contain-string-lst? s-exp))
      s-exp
      (recursively-join-string-lst (join-string-lst s-exp))))

(define (s-exp-first s-exp) (car (flatten s-exp)))

(define (process-ast proc type ast)
  (if (list? ast)
      (if (and (list? (car ast))
	       (eq? (caar ast) type))
	  (proc type (cadar ast) (s-exp-first (cadr ast)))
	  (map (λ(sub-tree) (process-ast proc type sub-tree))
	       ast))
      ast))

(define (back-end-generator proc type)
  (λ(ast) (recursively-join-string-lst (process-ast proc type ast))))


(define (remove-formatter-name str)
  (define pat-lst '("^bash[[:blank:]]*$" "^python[[:blank:]]*$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre 'post))

(define (indent-by-4-spaces str)
  (define pat-lst '("^(.+)"))
  (define 4-spaces "    ")
  (string-append
   4-spaces
   (regex-sub/global
    (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
    str
    'pre 4-spaces 1 'post)))

(define (verbatim-processor type symbol rest)
  (string-append (indent-by-4-spaces (remove-formatter-name symbol)) rest))

(define (list-subtract lst1 lst2) (lset-difference eqv? lst1 lst2))

(define (non-nil-take-while pred lst)
  (take-while pred (drop-while (negate pred) lst)))

(define (group-by-type type lst)
  (define sublst (non-nil-take-while (λ(s-exp) (eq? (car s-exp) type)) lst))
  (if (null? sublst)
      '()
      (cons sublst (group-by-type type (list-subtract lst sublst)))))

(define (item-position-decider-generator func)
  (λ(str type symbol-table)
    (any (λ(symbol) (eq? symbol str))
	 (map (compose cadr func)
	      (group-by-type type symbol-table)))))

(define first-item? (item-position-decider-generator first))

(define last-item? (item-position-decider-generator last))

(define (process-item type prefix rest symbol)
  (cond ((first-item? symbol type symbol-table)
	 (string-append "=over 7\n\n=item Z<>" prefix "\n\n" rest "\n"))
	((last-item? symbol type symbol-table)
	 (string-append "=item Z<>" prefix "\n\n" rest "\n\n=back"))
	(else
	 (string-append "=item Z<>" prefix "\n\n" rest "\n"))))

(define (empty-string? s-exp)
  (and (string? s-exp) (string-null? s-exp)))

(define (item-processor-generator symbol-table)
  (λ(type symbol rest)
    (define pattern "^([[:digit:]]+\\.)[[:blank:]]+")
    (define number-item-lst
      (filter (negate empty-string?)
	      ((lexer-generator pattern 'NUMBER (list regexp/newline
						      regexp/extended))
	       symbol)))

    (cond ((eq? type 'HYPHEN)
	   (string-append (process-item type
					"-"
					symbol
					symbol)
			  rest))
	  ((eq? type 'ASTERISK)
	   (string-append (process-item type
					"*"
					symbol
					symbol)
			  rest))
	  ((eq? type 'NUMBER)
	   (string-append (process-item type
					(cadar number-item-lst)
					(cadr number-item-lst)
					symbol)
			  rest))
	  (else
	   (error "unknown type of expression")))))

(define (subtitle-processor type symbol rest)
  (string-append "=head2 " symbol "\n\n" rest))

(define (append-title str)
  (string-append "=head1 " str "\n\n"))

(define (installation-section-processor type symbol rest)
  "")

(define (name-section-processor type symbol rest)
  (define synopsis "\n\n=head1 SYNOPSIS\n\nB<<< youtube-dl >>> [I<<< OPTIONS >>>] I<<< URL >>> [I<<< URL >>>...]\n\n")
  (string-append (append-title symbol) rest synopsis))

(define (remove-synopsis-in-description str)
  (define pat-lst '("^ +.+$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre 'post))

(define (description-section-processor type symbol rest)
  (string-append (append-title symbol) (remove-synopsis-in-description rest)))

(define (spaces->newline str)
  (define pat-lst '(" +"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "\n" 'post))

(define (remove-leading-newlines str)
  (define pat-lst '("^\n+"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre 'post))

(define (multiple-newlines->single-newline str)
  (define pat-lst '("\n+"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "\n" 'post))

(define (sentence-per-line->word-per-line str)
  ((compose multiple-newlines->single-newline
	    remove-leading-newlines
	    spaces->newline)
   str))

(define (short-long-opt-with-arg-processor str)
  (define pat-lst '("^(-[^[:space:]]+)[[:space:]]*,[[:space:]]*(--[^[:space:]]+)[[:space:]]+([^[:lower:][:space:]]+)[[:space:]]+([[:upper:]].+)$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "\n=item\nB<<< " 1 " >>>\n,\nB<<< " 2 " >>>\nI<<< " 3 " >>>\n\n" 4 'post))

(define (short-long-opt-without-arg-processor str)
  (define pat-lst '("^(-[^[:space:]]+)[[:space:]]*,[[:space:]]*(--[^[:space:]]+)[[:space:]]+([[:upper:]].+)$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "\n=item\nB<<< " 1 " >>>\n,\nB<<< " 2 " >>>\n\n" 3 'post))

(define (long-opt-with-arg-processor str)
  (define pat-lst '("^(--[^[:space:]]+)[[:space:]]+([^[:lower:][:space:]]+)[[:space:]]+([[:upper:]].+)$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "\n=item\nB<<< " 1 " >>>\nI<<< " 2" >>>\n\n" 3 'post ))

(define (long-opt-without-arg-processor str)
  (define pat-lst '("^(--[^[:space:]]+)[[:space:]]+([[:upper:]].+)$"))
  (regex-sub/global (pat-lst->regex-lst pat-lst regexp/newline regexp/extended)
		    str
		    'pre "\n=item\nB<<< " 1 " >>>\n\n" 2 'post ))

(define (process-options str)
  ((compose long-opt-without-arg-processor
	    long-opt-with-arg-processor
	    short-long-opt-without-arg-processor
	    short-long-opt-with-arg-processor
	    sentence-per-line->word-per-line)
   str))

(define (options-section-processor type symbol rest)
  (define subtitle-pattern "^=head2 (.+)$")
  (define sub-tree ((connect-front-end (list front-end-generator
					     subtitle-pattern
					     'SUBTITLE
					     (list regexp/newline
						   regexp/extended))) rest))
  (define (options-subsections-processor type symbol rest)
    (string-append "\n=back\n\n=head2 "
		   symbol
		   "\n\n=over 7\n\n"
		   (process-options rest)))
  (string-append (append-title symbol)
		 "=over 7\n\n"
		 ((back-end-generator options-subsections-processor
				      'SUBTITLE)
		  (cons (process-options (car sub-tree))
			(cdr sub-tree)))
		 "\n=back\n\n"))

(define (title-processor type symbol rest)
  (cond ((string=? symbol "INSTALLATION")
	 (installation-section-processor type symbol rest))
	((string=? symbol "NAME")
	 (name-section-processor type symbol rest))
	((string=? symbol "DESCRIPTION")
	 (description-section-processor type symbol rest))
	((string=? symbol "OPTIONS")
	 (options-section-processor type symbol rest))
	(else
	 (string-append (append-title symbol) rest))))

(display ((compose
	   (back-end-generator title-processor 'TITLE)
	   (back-end-generator subtitle-processor 'SUBTITLE)
	   (back-end-generator (item-processor-generator symbol-table)
			       'HYPHEN)
	   (back-end-generator (item-processor-generator symbol-table)
			       'ASTERISK)
	   (back-end-generator (item-processor-generator symbol-table)
			       'NUMBER)
	   (back-end-generator verbatim-processor 'VERBATIM))
	  ast))

Reply via email to