From: Rutger van Beusekom <rut...@dezyne.org>

* module/ice-9/peg/codegen.scm (trace?): New function.
(indent): New variable.
(%peg:debug?): New exported parameter.
(wrap-parser-for-users): Use them to provide debug tracing.
* test-suite/tests/peg.test ("Parse tracing"): Test it.
* doc/ref/api-peg.texi (Debug tracing): Document it.

Co-authored-by: Janneke Nieuwenhuizen <jann...@gnu.org>
---
 doc/ref/api-peg.texi         | 18 ++++++++-
 module/ice-9/peg.scm         |  3 +-
 module/ice-9/peg/codegen.scm | 74 ++++++++++++++++++++++++------------
 test-suite/tests/peg.test    | 39 ++++++++++++++++---
 4 files changed, 102 insertions(+), 32 deletions(-)

diff --git a/doc/ref/api-peg.texi b/doc/ref/api-peg.texi
index 0214f7ff1..dfa806832 100644
--- a/doc/ref/api-peg.texi
+++ b/doc/ref/api-peg.texi
@@ -1,6 +1,6 @@
 @c -*-texinfo-*-
 @c This is part of the GNU Guile Reference Manual.
-@c Copyright (C) 2006, 2010, 2011
+@c Copyright (C) 2006, 2010, 2011, 2024
 @c   Free Software Foundation, Inc.
 @c See the file guile.texi for copying conditions.
 
@@ -1035,3 +1035,19 @@ symbol function)}, where @code{symbol} is the symbol 
that will indicate
 a form of this type and @code{function} is the code generating function
 described above.  The function @code{add-peg-compiler!} is exported from
 the @code{(ice-9 peg codegen)} module.
+
+@subsubheading Debug tracing
+
+Due to the backtracking nature of PEG, the parser result is @code{#f}
+when it cannot match the input text.  It proves to be a big pain
+determining whether the problem is actually in the input or in the
+grammar, especially when changing the grammar itself.  Setting the
+parameter @var{%peg:debug?} to @code{#t} enables debug tracing, which
+will make the PEG parser print for each production rule: its name, the
+current state of the input, as well as the parse result.
+
+@lisp
+(define-peg-string-patterns "grammar @dots{}")
+(parameterize ((%peg:debug? #t))
+  (and=> (match-pattern grammar input-text) peg:tree))
+@end lisp
diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 4e03131cd..499c3820c 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -1,6 +1,6 @@
 ;;;; peg.scm --- Parsing Expression Grammar (PEG) parser generator
 ;;;;
-;;;;   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;   Copyright (C) 2010, 2011, 2024 Free Software Foundation, Inc.
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -28,6 +28,7 @@
   #:use-module (ice-9 peg cache)
   #:re-export (define-peg-pattern
                define-peg-string-patterns
+               %peg:debug?
                match-pattern
                search-for-pattern
                compile-peg-pattern
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index d80c3e849..c450be440 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -1,6 +1,6 @@
 ;;;; codegen.scm --- code generation for composable parsers
 ;;;;
-;;;;   Copyright (C) 2011 Free Software Foundation, Inc.
+;;;;   Copyright (C) 2011, 2024 Free Software Foundation, Inc.
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,11 @@
 ;;;;
 
 (define-module (ice-9 peg codegen)
-  #:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!)
+  #:export (compile-peg-pattern
+            wrap-parser-for-users
+            add-peg-compiler!
+            %peg:debug?)
+
   #:use-module (ice-9 pretty-print)
   #:use-module (system base pmatch))
 
@@ -332,28 +336,48 @@ return EXP."
                                 "Not one of" (map car peg-compiler-alist)))))))
 
 ;; Packages the results of a parser
+
+(define %peg:debug? (make-parameter #f))
+(define (trace? symbol)
+  (%peg:debug?))
+
+(define indent 0)
+
 (define (wrap-parser-for-users for-syntax parser accumsym s-syn)
-   #`(lambda (str strlen at)
+  #`(lambda (str strlen at)
+      (when (trace? '#,s-syn)
+        (format (current-error-port) "~a~a\n"
+                (make-string indent #\space)
+                '#,s-syn))
+      (set! indent (+ indent 4))
       (let ((res (#,parser str strlen at)))
-        ;; Try to match the nonterminal.
-        (if res
-            ;; If we matched, do some post-processing to figure out
-            ;; what data to propagate upward.
-            (let ((at (car res))
-                  (body (cadr res)))
-              #,(cond
-                 ((eq? accumsym 'name)
-                  #`(list at '#,s-syn))
-                 ((eq? accumsym 'all)
-                  #`(list (car res)
-                          (cond
-                           ((not (list? body))
-                            (list '#,s-syn body))
-                           ((null? body) '#,s-syn)
-                           ((symbol? (car body))
-                            (list '#,s-syn body))
-                           (else (cons '#,s-syn body)))))
-                 ((eq? accumsym 'none) #`(list (car res) '()))
-                 (else #`(begin res))))
-            ;; If we didn't match, just return false.
-            #f))))
+        (set! indent (- indent 4))
+        (let ((pos (or (and res (car res)) 0)))
+          (when (and (trace? '#,s-syn) (< at pos))
+            (format (current-error-port) "~a~a := ~s\tnext: ~s\n"
+                    (make-string indent #\space)
+                    '#,s-syn
+                    (substring str at pos)
+                    (substring str pos (min strlen (+ pos 10)))))
+          ;; Try to match the nonterminal.
+          (if res
+              ;; If we matched, do some post-processing to figure out
+              ;; what data to propagate upward.
+              (let ((at (car res))
+                    (body (cadr res)))
+                #,(cond
+                   ((eq? accumsym 'name)
+                    #`(list at '#,s-syn))
+                   ((eq? accumsym 'all)
+                    #`(list (car res)
+                            (cond
+                             ((not (list? body))
+                              (list '#,s-syn body))
+                             ((null? body) '#,s-syn)
+                             ((symbol? (car body))
+                              (list '#,s-syn body))
+                             (else (cons '#,s-syn body)))))
+                   ((eq? accumsym 'none) #`(list (car res) '()))
+                   (else #`(begin res))))
+              ;; If we didn't match, just return false.
+              #f)))))
diff --git a/test-suite/tests/peg.test b/test-suite/tests/peg.test
index f516571e8..6a8709794 100644
--- a/test-suite/tests/peg.test
+++ b/test-suite/tests/peg.test
@@ -1,14 +1,14 @@
+;;;;; PEG test suite. -*- scheme -*-
 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; PEG test suite.
 ;; Tests the parsing capabilities of (ice-9 peg).  Could use more
 ;; tests for edge cases.
 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define-module (test-suite test-peg)
-  :use-module (test-suite lib)
-  :use-module (ice-9 peg)
-  :use-module (ice-9 pretty-print)
-  :use-module (srfi srfi-1))
+  #:use-module (srfi srfi-1)
+  #:use-module (test-suite lib)
+  #:use-module (ice-9 peg)
+  #:use-module (ice-9 pretty-print))
 
 ;; Doubled up for pasting into REPL.
 (use-modules (test-suite lib))  
@@ -276,3 +276,32 @@ number <-- [0-9]+")
    (equal? (eq-parse "1+1/2*3+(1+1)/2")
           '(+ (+ 1 (* (/ 1 2) 3)) (/ (+ 1 1) 2)))))
 
+(define-peg-string-patterns
+  "trace-grammar <-- foo bar* baz
+foo <-- 'foo'
+bar <-- bla+
+bla <-- 'bar'
+baz <-- 'baz'")
+
+(with-test-prefix "Parse tracing"
+  (pass-if-equal
+   "trace"
+"trace-grammar
+    foo
+    foo := \"foo\"     next: \"barbarbaz\"
+    bar
+        bla
+        bla := \"bar\" next: \"barbaz\"
+        bla
+        bla := \"bar\" next: \"baz\"
+        bla
+    bar := \"barbar\"  next: \"baz\"
+    bar
+    baz
+    baz := \"baz\"     next: \"\"
+trace-grammar := \"foobarbarbaz\"      next: \"\"
+"
+   (parameterize ((%peg:debug? #t))
+      (with-error-to-string
+        (lambda _ (and=> (match-pattern trace-grammar "foobarbarbaz")
+                         peg:tree))))))
-- 
2.46.0


Reply via email to