Janneke Nieuwenhuizen writes: > From: Rutger van Beusekom <rut...@dezyne.org> > > This allows production of incomplete parse trees, without errors, e.g., > for code completion. > > * module/ice-9/peg/codegen.scm (%peg:fall-back?): New exported > parameter. > (%enable-expect, %continuation, %final-continuation): New parameter. > (final-continuation): New function. > (cg-or-rest): New function. > (cg-and-int): Recover from expectation failures, fall-back by skipping > forward or escalating upward. > (cg-*): Prepare fall-back %continuation. > * test-suite/tests/peg.test ("Fall-back parser"): Test it. > * doc/ref/api-peg.texi (PEG Internals): Document it. > > Co-authored-by: Janneke Nieuwenhuizen <jann...@gnu.org> > > fall-back > > fall-back > > fallback
Oops, find cleaned-up version attached.
>From b3a3b48c0b76a2baed4d4b11f1d38ec0f772717c Mon Sep 17 00:00:00 2001 From: Rutger van Beusekom <rut...@dezyne.org> Date: Tue, 7 Jan 2020 13:33:15 +0100 Subject: [PATCH v2 5/5] peg: Add fall-back parsing. This allows production of incomplete parse trees, without errors, e.g., for code completion. * module/ice-9/peg/codegen.scm (%peg:fall-back?): New exported parameter. (%enable-expect, %continuation, %final-continuation): New parameter. (final-continuation): New function. (cg-or-rest): New function. (cg-and-int): Recover from expectation failures, fall-back by skipping forward or escalating upward. (cg-*): Prepare fall-back %continuation. * test-suite/tests/peg.test ("Fall-back parser"): Test it. * doc/ref/api-peg.texi (PEG Internals): Document it. Co-authored-by: Janneke Nieuwenhuizen <jann...@gnu.org> --- doc/ref/api-peg.texi | 14 ++++ module/ice-9/peg.scm | 5 +- module/ice-9/peg/codegen.scm | 146 +++++++++++++++++++++++++++-------- test-suite/tests/peg.test | 24 +++++- 4 files changed, 151 insertions(+), 38 deletions(-) diff --git a/doc/ref/api-peg.texi b/doc/ref/api-peg.texi index 733cb1c6d..4c96b2acf 100644 --- a/doc/ref/api-peg.texi +++ b/doc/ref/api-peg.texi @@ -1116,3 +1116,17 @@ language. Putting a @code{#} behind a terminal or non-terminal indicates that its parsing must succeed, otherwise an exception is thrown containing the current parser state providing a hook to produce informative parse errors. + +@subsubheading Fallback parsing + +A natural extension to expect parsing is fallback parsing. It is +enabled by setting parameter @var{%peg:fall-back?} to @code{#t}. +Fallback parsing is implemented by catching the exception thrown by the +expect operator. At this point the parser attempts to recover its state +by eating away at the input until the input runs out or until one of the +grammar continuations matches and parsing continues regularly. + +When error occurs, @var{%peg:error} is invoked. + +@deffn {Scheme Procedure} %peg:error str line-number column-number error-type error +@end deffn diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm index fd9dce54c..aa7ddc743 100644 --- a/module/ice-9/peg.scm +++ b/module/ice-9/peg.scm @@ -25,13 +25,15 @@ ;; peg-sexp-compile. #:use-module (ice-9 peg simplify-tree) #:use-module (ice-9 peg using-parsers) - #:use-module (ice-9 peg cache) + #:re-export (define-peg-pattern define-peg-string-patterns define-skip-parser %peg:debug? + %peg:fall-back? %peg:locations? %peg:skip? + %peg:error match-pattern search-for-pattern compile-peg-pattern @@ -43,4 +45,3 @@ peg:tree peg:substring peg-record?)) - diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm index 458a7e3ab..642f31c63 100644 --- a/module/ice-9/peg/codegen.scm +++ b/module/ice-9/peg/codegen.scm @@ -23,9 +23,12 @@ add-peg-compiler! define-skip-parser %peg:debug? + %peg:error + %peg:fall-back? %peg:locations? %peg:skip?) + #:use-module (srfi srfi-1) #:use-module (ice-9 pretty-print) #:use-module (system base pmatch)) @@ -60,6 +63,8 @@ return EXP." (set! lst (cons obj lst))))) +(define %peg:fall-back? (make-parameter #f)) ;; public interface, enable fall-back parsing + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;; CODE GENERATORS ;; These functions generate scheme code for parsing PEGs. @@ -169,6 +174,71 @@ return EXP." ((eq? accum 'none) 'none))) (define baf builtin-accum-filter) +(define (final-continuation str strlen at) #f) + +(define %continuation (make-parameter final-continuation)) + +(define %fall-back-skip-at (make-parameter #f)) + +;;Fallback parsing is triggered by a syntax-error exception +;;the 'at' parameter is then pointing to "incomplete or erroneous" input +;;and moves ahead in the input until one of the continuations +;;of the production rules in the current callstack matches the input at that point. +;;At this point parsing continues regularly, but with an incomplete or erroneous parse tree. +;;If none of the continuations match then parsing fails without a result. +;;The operators involved for determining a continuation are: '(+ * and) +;;operator / is naturally not combined with the use of # +;;operators '(! &) may be considered later, since they may prove useful as asserts + +(define (format-error error str) + "Return procedure with two parameters (FROM TO) that formats parser +exception ERROR (offset . error) according using the source text in STR +and collects it using procedure (%peg:error)." + (define (get-error-type from to) + (if (< from to) + 'expected + 'error)) + (lambda (from to) + (let* ((error-type (get-error-type from to)) + (error-pos (caar error)) + (line-number (1+ (string-count str #\newline 0 error-pos))) + (col-number (- error-pos + (or (string-rindex str #\newline 0 error-pos) -1)))) + ((%peg:error) str line-number col-number error-type error)))) + +(define* (fall-back-skip kernel #:optional sequence?) + (if (not (%peg:fall-back?)) kernel + (lambda (str strlen start) + (catch 'syntax-error + (lambda _ + (kernel str strlen start)) + (lambda (key . args) + (let* ((expected (cadar args)) + (format-error (format-error args str))) + (let loop ((at start)) + (cond ((or (= at strlen) + ;; TODO: decide what to do; inspecting at might not be enough?!! + (unless (and (%fall-back-skip-at) + (eq? (%fall-back-skip-at) at)) + (parameterize ((%fall-back-skip-at at)) + ((%continuation) str strlen at)))) + (format-error start at) + (if sequence? `(,at ()) `(,at (,expected)))) + (else + (let ((res (false-if-exception (kernel str strlen (1+ at))))) + (if res + (begin + (format-error (or (string-index str (char-set-complement char-set:whitespace) start at) start) at) + res) + (loop (1+ at))))))))))))) + + +(define (partial-match kernel sym) + (lambda (str strlen at) + (catch 'syntax-error + (lambda _ (kernel str strlen at)) + (lambda (key . args) (and (< at (caar args)) (car args)))))) + ;; Top-level function builder for AND. Reduces to a call to CG-AND-INT. (define (cg-and clauses accum) #`(lambda (str len pos) @@ -181,8 +251,17 @@ return EXP." (() (cggr accum 'cg-and #`(reverse #,body) at)) ((first rest ...) - #`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at))) - (and res + #`(let* ((next #,(cg-or #'(rest ...) 'body)) + (kernel #,(compile-peg-pattern #'first accum)) + (res (parameterize + ((%continuation + (let ((after-that (%continuation))) + (lambda (str strlen at) + (or ((partial-match next 'next) str strlen at) + ((partial-match after-that 'after-that) + str strlen at)))))) + ((fall-back-skip kernel) #,str #,strlen #,at)))) + (and res ;; update AT and BODY then recurse (let ((newat (car res)) (newbody (cadr res))) @@ -207,42 +286,40 @@ return EXP." (define (cg-* args accum) (syntax-case args () ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#t) - (lp new-end count) - (let ((success #,#t)) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) + #`(let* ((kernel #,(compile-peg-pattern #'pat (baf accum))) + (kleene (lambda (str strlen at) + (let ((body '())) + (let lp ((end at) (count 0)) + (let* ((match ((fall-back-skip kernel #t) str strlen end)) + (new-end (if match (car match) end)) + (count (if (> new-end end) (1+ count) count))) + (when (> new-end end) + (push-not-null! body (single-filter (cadr match)))) + (if (and (> new-end end) #,#t) (lp new-end count) + (let ((success #,#t)) + #,#`(and success + #,(cggr (baf accum) 'cg-body + #'(reverse body) #'new-end)))))))))) + kleene)))) (define (cg-+ args accum) (syntax-case args () ((pat) - #`(lambda (str strlen at) - (let ((body '())) - (let lp ((end at) (count 0)) - (let* ((match (#,(compile-peg-pattern #'pat (baf accum)) - str strlen end)) - (new-end (if match (car match) end)) - (count (if (> new-end end) (1+ count) count))) - (if (> new-end end) - (push-not-null! body (single-filter (cadr match)))) - (if (and (> new-end end) - #,#t) - (lp new-end count) - (let ((success #,#'(>= count 1))) - #,#`(and success - #,(cggr (baf accum) 'cg-body - #'(reverse body) #'new-end))))))))))) + #`(let* ((kernel #,(compile-peg-pattern #'pat (baf accum))) + (multiple (lambda (str strlen at) + (let ((body '())) + (let lp ((end at) (count 0)) + (let* ((match ((fall-back-skip kernel #t) str strlen end)) + (new-end (if match (car match) end)) + (count (if (> new-end end) (1+ count) count))) + (when (> new-end end) + (push-not-null! body (single-filter (cadr match)))) + (if (and (> new-end end) #,#t) (lp new-end count) + (let ((success #,#'(>= count 1))) + #,#`(and success + #,(cggr (baf accum) 'cg-body + #'(reverse body) #'new-end)))))))))) + multiple)))) (define (cg-? args accum) (syntax-case args () @@ -351,6 +428,7 @@ return EXP." ;; Packages the results of a parser +(define %peg:error (make-parameter (const #f))) (define %peg:debug? (make-parameter #f)) (define %peg:locations? (make-parameter #f)) (define %peg:skip? (make-parameter (lambda (str strlen at) `(,at ())))) diff --git a/test-suite/tests/peg.test b/test-suite/tests/peg.test index 4f267f561..8a20cda41 100644 --- a/test-suite/tests/peg.test +++ b/test-suite/tests/peg.test @@ -6,6 +6,7 @@ (define-module (test-suite test-peg) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) #:use-module (test-suite lib) #:use-module (ice-9 peg) #:use-module (ice-9 pretty-print)) @@ -310,8 +311,7 @@ trace-grammar := \"foobarbarbaz\" next: \"\" "expect-grammar <-- one two three / .* one <-- 'one'# two <-- 'two'# -three <-- 'three'" -) +three <-- 'three'") (with-test-prefix "Parsing expect" (pass-if-equal "expect okay" @@ -379,3 +379,23 @@ baz (%peg:locations? #t)) (match-pattern trace-grammar program-text)) peg:tree))) + +(with-test-prefix "Fall-back parser" + (pass-if-equal "only one" + '(expect-grammar "one") + (and=> (parameterize ((%peg:skip? peg-skip) + (%peg:fall-back? #t)) + (match-pattern expect-grammar "one")) + peg:tree)) + (pass-if-equal "no two" + '(expect-grammar (one "one") (three "three")) + (and=> (parameterize ((%peg:skip? peg-skip) + (%peg:fall-back? #t)) + (match-pattern expect-grammar "one three")) + (compose (cute remove string? <>) peg:tree))) + (pass-if-equal "missing one" + '(expect-grammar (two "two") (three "three")) + (and=> (parameterize ((%peg:skip? peg-skip) + (%peg:fall-back? #t)) + (match-pattern expect-grammar "two three")) + (compose (cute remove string? <>) peg:tree)))) -- 2.46.0
-- Janneke Nieuwenhuizen <jann...@gnu.org> | GNU LilyPond https://LilyPond.org Freelance IT https://www.JoyOfSource.com | AvatarĀ® https://AvatarAcademy.com