From 296a31d0ecf3a6f758871f9c3dc2b6937592b25d Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Mon, 28 Mar 2011 15:13:35 -0400
Subject: [PATCH 1/2] Move define-nonterm

* module/ice-9/peg/string-peg.scm: remove define-nonterm and make a simpler
   macro called `define-sexp-parser' to make the PEG grammar
* module/ice-9/peg.scm: move define-nonterm macro to this file
* module/ice-9/peg/codegen.scm: move code to wrap a parser result nicely to
   this file, under name `wrap-parser-for-users'
---
 module/ice-9/peg.scm            |   33 +++++++++++-
 module/ice-9/peg/codegen.scm    |   29 ++++++++++-
 module/ice-9/peg/string-peg.scm |  107 +++++++++++----------------------------
 3 files changed, 89 insertions(+), 80 deletions(-)

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 644af6d..4f4bbf8 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -19,7 +19,7 @@
 
 (define-module (ice-9 peg)
   #:export (peg-parse
-;            define-nonterm
+            define-nonterm
 ;            define-nonterm-f
             peg-match)
 ;  #:export-syntax (define-nonterm)
@@ -30,7 +30,7 @@
   #:re-export (peg-sexp-compile
                define-grammar
                define-grammar-f
-               define-nonterm
+;               define-nonterm
                keyword-flatten
                context-flatten
                peg:start
@@ -67,6 +67,35 @@ execute the STMTs and try again."
         #f
         (make-prec 0 (car res) string (string-collapse (cadr res))))))
 
+;; The results of parsing using a nonterminal are cached.  Think of it like a
+;; hash with no conflict resolution.  Process for deciding on the cache size
+;; wasn't very scientific; just ran the benchmarks and stopped a little after
+;; the point of diminishing returns on my box.
+(define *cache-size* 512)
+
+;; Defines a new nonterminal symbol accumulating with ACCUM.
+(define-syntax define-nonterm
+  (lambda (x)
+    (syntax-case x ()
+      ((_ sym accum pat)
+       (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+             (accumsym (syntax->datum #'accum))
+             (c (datum->syntax x (gensym))));; the cache
+         ;; CODE is the code to parse the string if the result isn't cached.
+         (let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
+           #`(begin
+               (define #,c (make-vector *cache-size* #f));; the cache
+               (define (sym str strlen at)
+                 (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
+                   ;; Check to see whether the value is cached.
+                   (if (and vref (eq? (car vref) str) (= (cadr vref) at))
+                       (caddr vref);; If it is return it.
+                       (let ((fres ;; Else calculate it and cache it.
+                              (#,syn str strlen at)))
+                         (vector-set! #,c (modulo at *cache-size*)
+                                      (list str at fres))
+                         fres)))))))))))
+
 ;; Searches through STRING for something that parses to PEG-MATCHER.  Think
 ;; regexp search.
 (define-syntax peg-match
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index 2c85ccc..0804d1e 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -18,7 +18,7 @@
 ;;;;
 
 (define-module (ice-9 peg codegen)
-  #:export (peg-sexp-compile)
+  #:export (peg-sexp-compile wrap-parser-for-users)
   #:use-module (ice-9 peg)
   #:use-module (ice-9 peg string-peg)
   #:use-module (ice-9 pretty-print)
@@ -244,3 +244,30 @@ return EXP."
                       (lit
                        #`(and success
                               #,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
+
+;; Packages the results of a parser
+(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
+   #`(lambda (str strlen at)
+      (let ((res (#,parser str strlen at)))
+        ;; Try to match the nonterminal.
+        (if res
+            ;; If we matched, do some post-processing to figure out
+            ;; what data to propagate upward.
+            (let ((at (car res))
+                  (body (cadr res)))
+              #,(cond
+                 ((eq? accumsym 'name)
+                  #`(list at '#,s-syn))
+                 ((eq? accumsym 'all)
+                  #`(list (car res)
+                          (cond
+                           ((not (list? body))
+                            (list '#,s-syn body))
+                           ((null? body) '#,s-syn)
+                           ((symbol? (car body))
+                            (list '#,s-syn body))
+                           (else (cons '#,s-syn body)))))
+                 ((eq? accumsym 'none) #`(list (car res) '()))
+                 (else #`(begin res))))
+            ;; If we didn't match, just return false.
+            #f))))
diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
index f7e21f6..a899727 100644
--- a/module/ice-9/peg/string-peg.scm
+++ b/module/ice-9/peg/string-peg.scm
@@ -22,16 +22,11 @@
             peg-as-peg
             define-grammar
             define-grammar-f
-            define-nonterm
             peg-grammar)
   #:use-module (ice-9 peg)
-  #:use-module (ice-9 peg codegen))
-
-;; The results of parsing using a nonterminal are cached.  Think of it like a
-;; hash with no conflict resolution.  Process for deciding on the cache size
-;; wasn't very scientific; just ran the benchmarks and stopped a little after
-;; the point of diminishing returns on my box.
-(define *cache-size* 512)
+  #:use-module (ice-9 peg codegen)
+  #:use-module (ice-9 peg match-record)
+  #:use-module (ice-9 peg simplify-tree))
 
 ;; Gets the left-hand depth of a list.
 (define (depth lst)
@@ -39,58 +34,6 @@
       0
       (+ 1 (depth (car lst)))))
 
-(eval-when (compile load eval)
-(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
-;  (let ((matchf-syn (datum->syntax for-syntax matchf)))
-   #`(lambda (str strlen at)
-      (let ((res (#,matchf-syn str strlen at)))
-        ;; Try to match the nonterminal.
-        (if res
-            ;; If we matched, do some post-processing to figure out
-            ;; what data to propagate upward.
-            (let ((at (car res))
-                  (body (cadr res)))
-              #,(cond
-                 ((eq? accumsym 'name)
-                  #`(list at '#,s-syn))
-                 ((eq? accumsym 'all)
-                  #`(list (car res)
-                          (cond
-                           ((not (list? body))
-                            (list '#,s-syn body))
-                           ((null? body) '#,s-syn)
-                           ((symbol? (car body))
-                            (list '#,s-syn body))
-                           (else (cons '#,s-syn body)))))
-                 ((eq? accumsym 'none) #`(list (car res) '()))
-                 (else #`(begin res))))
-            ;; If we didn't match, just return false.
-            #f))))
-)
-
-;; Defines a new nonterminal symbol accumulating with ACCUM.
-(define-syntax define-nonterm
-  (lambda (x)
-    (syntax-case x ()
-      ((_ sym accum pat)
-       (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
-             (accumsym (syntax->datum #'accum))
-             (c (datum->syntax x (gensym))));; the cache
-         ;; CODE is the code to parse the string if the result isn't cached.
-         (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
-           #`(begin
-               (define #,c (make-vector *cache-size* #f));; the cache
-               (define (sym str strlen at)
-                 (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
-                   ;; Check to see whether the value is cached.
-                   (if (and vref (eq? (car vref) str) (= (cadr vref) at))
-                       (caddr vref);; If it is return it.
-                       (let ((fres ;; Else calculate it and cache it.
-                              (#,syn str strlen at)))
-                         (vector-set! #,c (modulo at *cache-size*)
-                                      (list str at fres))
-                         fres)))))))))))
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;;; Parse string PEGs using sexp PEGs.
 ;; See the variable PEG-AS-PEG for an easier-to-read syntax.
@@ -114,34 +57,43 @@ LB < '['
 RB < ']'
 ")
 
-(define-nonterm peg-grammar all
+(define-syntax define-sexp-parser
+  (lambda (x)
+    (syntax-case x ()
+      ((_ sym accum pat)
+       (let* ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+              (accumsym (syntax->datum #'accum))
+              (syn (wrap-parser-for-users x matchf accumsym #'sym)))
+           #`(define sym #,syn))))))
+
+(define-sexp-parser peg-grammar all
   (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
-(define-nonterm peg-pattern all
+(define-sexp-parser peg-pattern all
   (and peg-alternative
        (body lit (and (ignore "/") peg-sp peg-alternative) *)))
-(define-nonterm peg-alternative all
+(define-sexp-parser peg-alternative all
   (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
-(define-nonterm peg-suffix all
+(define-sexp-parser peg-suffix all
   (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
-(define-nonterm peg-primary all
+(define-sexp-parser peg-primary all
   (or (and "(" peg-sp peg-pattern ")" peg-sp)
       (and "." peg-sp)
       peg-literal
       peg-charclass
       (and peg-nonterminal (body ! "<" 1))))
-(define-nonterm peg-literal all
+(define-sexp-parser peg-literal all
   (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
-(define-nonterm peg-charclass all
+(define-sexp-parser peg-charclass all
   (and (ignore "[")
        (body lit (and (body ! "]" 1)
                       (or charclass-range charclass-single)) *)
        (ignore "]")
        peg-sp))
-(define-nonterm charclass-range all (and peg-any "-" peg-any))
-(define-nonterm charclass-single all peg-any)
-(define-nonterm peg-nonterminal all
+(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
+(define-sexp-parser charclass-single all peg-any)
+(define-sexp-parser peg-nonterminal all
   (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
-(define-nonterm peg-sp none
+(define-sexp-parser peg-sp none
   (body lit (or " " "\t" "\n") *))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -297,9 +249,10 @@ RB < ']'
 
 ;; Builds a lambda-expressions for the pattern STR using accum.
 (define (peg-string-compile str-stx accum)
-  (peg-sexp-compile
-   (compressor
-    (peg-pattern->defn
-     (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
-    str-stx)
-   accum))
+  (let ((string (syntax->datum str-stx)))
+    (peg-sexp-compile
+     (compressor
+      (peg-pattern->defn
+       (peg:tree (peg-parse peg-pattern string)) str-stx)
+      str-stx)
+     accum)))
-- 
1.7.4.1

