Hi
I thought out a useful pet project to learn to use the PEG module.
Unfortunately I ended up avoiding it because it errors out when it
encounters a s-exp.
This is what I tried to do:
run a command in a pipe returning the contents of all source files of
guix (once)
parse this large string with PEG
create stats on the number of lines with comments etc.
This is what I ended up doing:
run a multiple commands in a pipe with sed|grep|wc -l-foo returning the
numbers I was interested in.
avoid PEG
print stats.
In the process I learned a lot.
1. (get-string-all) does not return a proper string (string? => #f). I
dont understand this as the manual says it should return a string.
2. (ice-9 match) only works on lists it seems.
3. (ice-9 peg) returns #f if it encounters a paren. Maybe there is a way
to avoid this - in that case it is not documented in the manual (or I
could not find it). (see my example in the file attached)
4. Maybe I misunderstood how to use the PEG parser and matching. In that
case a working example and a tip would be very welcome :D
Any thoughts?
--
Cheers Swedebugia
(use-modules
(ice-9 match)
(ice-9 peg)
(ice-9 popen)
(ice-9 textual-ports)
(ice-9 pretty-print)
(ice-9 regex)
(ice-9 rdelim))
;;; Note: this script must be run in the root of a guix checkout.
;;;
;;; Construct the commands
;;;
;; TODO refactor to only run find once and do the rest in guile.
(define all-find-xargs
"find . -name \"*.scm\" -or -name \"*.sh\" |xargs cat")
;; Note this is guix-code excluding the code in the gnu-subdirectory!
(define guix-find-xargs
"find guix/ gnu.scm guix.scm -name \"*.scm\" -or -name \"*.sh\" |xargs cat")
(define gnu-find-xargs
"find gnu/ -name \"*.scm\" -or -name \"*.sh\" |xargs cat")
(define gnu-packages-find-xargs
"find gnu/packages gnu/packages.scm -name \"*.scm\" -or -name \"*.sh\" |xargs cat")
(define gnu-services-find-xargs
"find gnu/services gnu/services.scm -name \"*.scm\" -or -name \"*.sh\" |xargs cat")
(define gnu-etc-find-xargs
"find gnu/system gnu/system.scm gnu/build gnu/bootloader gnu/bootloader.scm gnu/artwork.scm gnu/tests gnu/tests.scm -name \"*.scm\" -or -name \"*.sh\" |xargs cat")
(define sed
;;"Trims copyrights and license and lines with only ;;'s and no characters and returns all code relevant lines."
"sed \\
-e '/^;;; Copyright/d' \\
-e '/^;;; This file/,/;;; along with/d' \\
-e '/^;*$/d' \\
-e '/^$/d'")
(define pipe
"|")
(define grep-comments
"grep ';'")
(define grep-line-comments
"grep ';;'")
(define grep-inline-comments
"grep -v ';;' | grep ';'")
(define wc
"wc -l")
(define (all-lines find-xargs)
(open-input-pipe (string-append find-xargs pipe sed)))
(define (count-lines find-xargs)
(open-input-pipe (string-append find-xargs pipe sed pipe wc)))
(define (count-comment-lines find-xargs)
(open-input-pipe (string-append find-xargs pipe sed pipe grep-comments pipe wc)))
(define (count-line-comment-lines find-xargs)
(open-input-pipe
(string-append find-xargs pipe sed pipe grep-line-comments pipe wc)))
(define (count-inline-comment-lines find-xargs)
(open-input-pipe
(string-append find-xargs pipe sed pipe grep-inline-comments pipe wc)))
(define (get-count port)
(let ((str (read-line port)))
(close-pipe port)
str))
;; this exactly mimics the manual but does not work on the string below.
(define-peg-string-patterns
"comment <- entry* !.
entry <-- (! NL .)* NL*
NL < '\n'")
;; Broken :S
(define-peg-string-patterns
"comment1 <-- entry* !.
entry <-- SP* SC+ SP words NL*
words <-- text
text <- (!NL .)*
SC < ';'
SP < ' '
NL < '\n'")
;; This only works if the open-bracket is escaped.
(define *test*
";; test
;;test2
; test3
;test4
\(define %tor-accounts
;; User account and groups for Tor.")
;; both return #f
(display
(peg:tree
(match-pattern comment *test*)))
(display
(peg:tree
(match-pattern comment1 *test*)))
;; Uncomment to display the lines for debugging:
;;(display (get-string-all (all-lines gnu-services-find-xargs)))
(define open-bracket
(make-regexp "[(]"))
(define close-bracket
(make-regexp "[)]"))
;; This fails because (get-string-all) did not return a proper string:
;; #<procedure escaped-lines ()>ERROR: In procedure string-copy:
;; In procedure string-copy: Wrong type argument in position 1 (expecting string): #<procedure escaped-lines ()>
;; (define (escaped-lines)
;; (regexp-substitute/global #f open-bracket (get-string-all (all-lines gnu-services-find-xargs))))
;; (display escaped-lines)
;; (display
;; (peg:tree
;; (match-pattern comment escaped-lines)))
;; NOTE: string? returns false:
;; (display
;; (string? (lines (all-lines gnu-services-find-xargs))
;; ))
;; Test
;; this only works on lists it seems...
;; (display
;; (match (string-get-all (all-lines gnu-services-find-xargs))
;; ((str ...)
;; (#\;))))
;;;
;;; Statistics
;;;
(define (compare2 smaller bigger)
(exact->inexact
(/
(* 100 smaller) bigger)))
(define (statistic name find-xargs)
(let ((total (string->number (get-count (count-lines find-xargs))))
(all-comment-lines
(string->number
(get-count (count-comment-lines find-xargs))))
(line-comment-lines
(string->number
(get-count (count-line-comment-lines find-xargs))))
(inline-comment-lines
(string->number
(get-count (count-inline-comment-lines find-xargs)))))
(format #t "
~a:
Total lines: ~a
Total lines with any comment: ~a
Total lines with line-comment: ~a
Total lines with inline-comment: ~a
Percent comment lines: ~a%~%"
name total all-comment-lines line-comment-lines
inline-comment-lines (compare2 all-comment-lines total))
))
(statistic "Guix, excluding gnu/" guix-find-xargs)
(statistic "gnu-subdir" gnu-find-xargs)
(statistic "gnu/packages-subdir" gnu-packages-find-xargs)
(statistic "gnu/services-subdir" gnu-services-find-xargs)
(statistic "the rest of the gnu-subdir" gnu-etc-find-xargs)
(statistic "All of Guix" all-find-xargs)