From: Rutger van Beusekom <rut...@dezyne.org> * module/ice-9/peg/codegen.scm (trace?): New function. (indent): New variable. (%peg:debug?): New exported parameter. (wrap-parser-for-users): Use them to provide debug tracing. * test-suite/tests/peg.test ("Parse tracing"): Test it. * doc/ref/api-peg.texi (Debug tracing): Document it.
Co-authored-by: Janneke Nieuwenhuizen <jann...@gnu.org> --- doc/ref/api-peg.texi | 18 ++++++++- module/ice-9/peg.scm | 3 +- module/ice-9/peg/codegen.scm | 74 ++++++++++++++++++++++++------------ test-suite/tests/peg.test | 39 ++++++++++++++++--- 4 files changed, 102 insertions(+), 32 deletions(-) diff --git a/doc/ref/api-peg.texi b/doc/ref/api-peg.texi index 0214f7ff1..dfa806832 100644 --- a/doc/ref/api-peg.texi +++ b/doc/ref/api-peg.texi @@ -1,6 +1,6 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 2006, 2010, 2011 +@c Copyright (C) 2006, 2010, 2011, 2024 @c Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @@ -1035,3 +1035,19 @@ symbol function)}, where @code{symbol} is the symbol that will indicate a form of this type and @code{function} is the code generating function described above. The function @code{add-peg-compiler!} is exported from the @code{(ice-9 peg codegen)} module. + +@subsubheading Debug tracing + +Due to the backtracking nature of PEG, the parser result is @code{#f} +when it cannot match the input text. It proves to be a big pain +determining whether the problem is actually in the input or in the +grammar, especially when changing the grammar itself. Setting the +parameter @var{%peg:debug?} to @code{#t} enables debug tracing, which +will make the PEG parser print for each production rule: its name, the +current state of the input, as well as the parse result. + +@lisp +(define-peg-string-patterns "grammar @dots{}") +(parameterize ((%peg:debug? #t)) + (and=> (match-pattern grammar input-text) peg:tree)) +@end lisp diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm index 4e03131cd..499c3820c 100644 --- a/module/ice-9/peg.scm +++ b/module/ice-9/peg.scm @@ -1,6 +1,6 @@ ;;;; peg.scm --- Parsing Expression Grammar (PEG) parser generator ;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 2010, 2011, 2024 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -28,6 +28,7 @@ #:use-module (ice-9 peg cache) #:re-export (define-peg-pattern define-peg-string-patterns + %peg:debug? match-pattern search-for-pattern compile-peg-pattern diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm index d80c3e849..c450be440 100644 --- a/module/ice-9/peg/codegen.scm +++ b/module/ice-9/peg/codegen.scm @@ -1,6 +1,6 @@ ;;;; codegen.scm --- code generation for composable parsers ;;;; -;;;; Copyright (C) 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 2011, 2024 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,11 @@ ;;;; (define-module (ice-9 peg codegen) - #:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!) + #:export (compile-peg-pattern + wrap-parser-for-users + add-peg-compiler! + %peg:debug?) + #:use-module (ice-9 pretty-print) #:use-module (system base pmatch)) @@ -332,28 +336,48 @@ return EXP." "Not one of" (map car peg-compiler-alist))))))) ;; Packages the results of a parser + +(define %peg:debug? (make-parameter #f)) +(define (trace? symbol) + (%peg:debug?)) + +(define indent 0) + (define (wrap-parser-for-users for-syntax parser accumsym s-syn) - #`(lambda (str strlen at) + #`(lambda (str strlen at) + (when (trace? '#,s-syn) + (format (current-error-port) "~a~a\n" + (make-string indent #\space) + '#,s-syn)) + (set! indent (+ indent 4)) (let ((res (#,parser str strlen at))) - ;; Try to match the nonterminal. - (if res - ;; If we matched, do some post-processing to figure out - ;; what data to propagate upward. - (let ((at (car res)) - (body (cadr res))) - #,(cond - ((eq? accumsym 'name) - #`(list at '#,s-syn)) - ((eq? accumsym 'all) - #`(list (car res) - (cond - ((not (list? body)) - (list '#,s-syn body)) - ((null? body) '#,s-syn) - ((symbol? (car body)) - (list '#,s-syn body)) - (else (cons '#,s-syn body))))) - ((eq? accumsym 'none) #`(list (car res) '())) - (else #`(begin res)))) - ;; If we didn't match, just return false. - #f)))) + (set! indent (- indent 4)) + (let ((pos (or (and res (car res)) 0))) + (when (and (trace? '#,s-syn) (< at pos)) + (format (current-error-port) "~a~a := ~s\tnext: ~s\n" + (make-string indent #\space) + '#,s-syn + (substring str at pos) + (substring str pos (min strlen (+ pos 10))))) + ;; Try to match the nonterminal. + (if res + ;; If we matched, do some post-processing to figure out + ;; what data to propagate upward. + (let ((at (car res)) + (body (cadr res))) + #,(cond + ((eq? accumsym 'name) + #`(list at '#,s-syn)) + ((eq? accumsym 'all) + #`(list (car res) + (cond + ((not (list? body)) + (list '#,s-syn body)) + ((null? body) '#,s-syn) + ((symbol? (car body)) + (list '#,s-syn body)) + (else (cons '#,s-syn body))))) + ((eq? accumsym 'none) #`(list (car res) '())) + (else #`(begin res)))) + ;; If we didn't match, just return false. + #f))))) diff --git a/test-suite/tests/peg.test b/test-suite/tests/peg.test index f516571e8..6a8709794 100644 --- a/test-suite/tests/peg.test +++ b/test-suite/tests/peg.test @@ -1,14 +1,14 @@ +;;;;; PEG test suite. -*- scheme -*- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;; PEG test suite. ;; Tests the parsing capabilities of (ice-9 peg). Could use more ;; tests for edge cases. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-module (test-suite test-peg) - :use-module (test-suite lib) - :use-module (ice-9 peg) - :use-module (ice-9 pretty-print) - :use-module (srfi srfi-1)) + #:use-module (srfi srfi-1) + #:use-module (test-suite lib) + #:use-module (ice-9 peg) + #:use-module (ice-9 pretty-print)) ;; Doubled up for pasting into REPL. (use-modules (test-suite lib)) @@ -276,3 +276,32 @@ number <-- [0-9]+") (equal? (eq-parse "1+1/2*3+(1+1)/2") '(+ (+ 1 (* (/ 1 2) 3)) (/ (+ 1 1) 2))))) +(define-peg-string-patterns + "trace-grammar <-- foo bar* baz +foo <-- 'foo' +bar <-- bla+ +bla <-- 'bar' +baz <-- 'baz'") + +(with-test-prefix "Parse tracing" + (pass-if-equal + "trace" +"trace-grammar + foo + foo := \"foo\" next: \"barbarbaz\" + bar + bla + bla := \"bar\" next: \"barbaz\" + bla + bla := \"bar\" next: \"baz\" + bla + bar := \"barbar\" next: \"baz\" + bar + baz + baz := \"baz\" next: \"\" +trace-grammar := \"foobarbarbaz\" next: \"\" +" + (parameterize ((%peg:debug? #t)) + (with-error-to-string + (lambda _ (and=> (match-pattern trace-grammar "foobarbarbaz") + peg:tree)))))) -- 2.46.0