Sorry that I didn't have time to prepare anything better, but here's a little appetizer. It is my implementation of recursive descent parser. Perhaps it isn't as expressive as bison or yacc, but it is considerably shorter :)
It implements backtracking through the mechanism of exceptions. The code is so small that I'm pasting it here, along with example usages. It uses the srfi-1 module as well as my (ice-9 nice-9) module to annoy Taylan a little ;] https://github.com/panicz/pamphlet/blob/master/libraries/ice-9/nice-9.scm Here it is: (use-modules (srfi srfi-1) (ice-9 nice-9)) (define ((in? l) x) (match l ((h . t) (or (eq? x h) ((in? t) x))) (_ #f))) (define (non-terminals grammar) (delete-duplicates (map (λ ((non-terminal => . production)) non-terminal) grammar))) (define (terminals grammar) (let ((non-terminals (non-terminals grammar)) (productions (append-map (λ ((non-terminal => . production)) production) grammar))) (delete-duplicates (lset-difference eq? productions non-terminals)))) (define ((recursive-descent-parser grammar) input) (let ((non-terminals (non-terminals grammar)) (terminals (terminals grammar)) (((initial-rule => . _) . _) grammar)) (define (initial-match? rule input) (let* ((prefix _ (span (in? terminals) rule)) (prefix-length (length prefix))) (and (>= (length input) prefix-length) (equal? prefix (take input prefix-length))))) (define (match-rule rule input) (match rule (() (values '() input)) (((? (in? terminals) t) . next) (let (((token . input) input)) (if (eq? token t) (let ((rest input (match-rule next input))) (values `(,token . ,rest) input)) (throw 'parse-error input)))) (((? (in? non-terminals) A) . next) (let* ((parsed input (parse-rule A input)) (rest input (match-rule next input))) (values `(,parsed . ,rest) input))))) (define (parse-rule rule-name input) (let try ((variants (filter (λ ((name => . rule)) (and (eq? name rule-name) (initial-match? rule input))) grammar))) (catch 'parse-error (λ () (match variants (((A => . first-rule) . _) (let ((parsed input (match-rule first-rule input))) (values `(,A . ,parsed) input))) (_ (throw 'parse-error input)))) (λ errors (match variants ((failed . remaining) (try remaining)) (_ (throw 'parse-error input))))))) (parse-rule initial-rule input))) ;; for example: ((recursive-descent-parser '((A => a A b) (A => c))) '(a a a c b b b)) ;; => (A a (A a (A a (A c) b) b) b) ((recursive-descent-parser '((<S> => a <S> d) (<S> => <B>) (<B> => b <B> c) (<B> => e))) '(a a b b e c c d d)) ; => (<S> a (<S> a (<S> (<B> b (<B> b (<B> e) c) c)) d) d) ((recursive-descent-parser '((<S> => <A> <B>) (<A> => a <A>) (<A> => x) (<B> => b <B>) (<B> => x))) '(a a a a x b b x)) ; => (<S> (<A> a (<A> a (<A> a (<A> a (<A> x))))) (<B> b (<B> b (<B> x)))) ;; note that the parser doesn''t always work; ;; it only works for the so-called LL grammars ;; for instance, the following grammar will result ;; in stack overflow: ((recursive-descent-parser '((<A> => <A> + <B>) (<A> => <B>) (<B> => a) (<B> => c))) '(a + c + a + c + a + a)) ;; ~~~> <boom!> ;; However, the grammar can be transformed to an equivalent form: ((recursive-descent-parser '((<A> => <B> +<B>*) (+<B>* => + <B> +<B>*) (+<B>* => ) (<B> => a) (<B> => c))) '(a + c + a + c + a + a)) ;; but then, some strange non-terminal symbols appear: ;=> (<A> (<B> a) (+<B>* + (<B> c) ; (+<B>* + (<B> a) (+<B>* + (<B> c) ; (+<B>* + (<B> a) ; (+<B>* + (<B> a) (+<B>*))))))) ; We need to introduce additional transformation on the ouput: (define (eliminate+<B>* tree) (match tree (('<A> <B> +<B>*) `(<A> ,<B> . ,(eliminate+<B>* +<B>*))) (('+<B>* '+ <B> +<B>*) `(+ ,<B> . ,(eliminate+<B>* +<B>*))) (('+<B>*) '()) (_ tree))) (eliminate+<B>* ((recursive-descent-parser '((<A> => <B> +<B>*) (+<B>* => + <B> +<B>*) (+<B>* => ) (<B> => a) (<B> => c))) '(a + c + a + c + a + a))) ;=> (<A> (<B> a) + (<B> c) + (<B> a) + (<B> c) + (<B> a) + (<B> a)) Happy birthday, Guile!
