branch: elpa/parseclj
commit 0644bcdbf4a54a3c6162632e2ccb21d8f098813a
Author: Arne Brasseur <[email protected]>
Commit: Arne Brasseur <[email protected]>
Implement :fail-fast
---
parseclj-lex.el | 3 ++
parseclj.el | 48 ++++++++++++++++++++++---------
test/parseclj-test.el | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 118 insertions(+), 13 deletions(-)
diff --git a/parseclj-lex.el b/parseclj-lex.el
index 78c89f17b4..9e72a2972c 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -33,6 +33,9 @@
(cons (car pair) (cadr pair)))
(seq-partition args 2))))
+(defun parseclj-lex-token? (token)
+ (and (consp token) (consp (car token)) (eq 'type (caar token))))
+
(defun parseclj-lex-token-type (token)
(and (listp token)
(cdr (assq 'type token))))
diff --git a/parseclj.el b/parseclj.el
index 9791b11eaa..306ceff31c 100644
--- a/parseclj.el
+++ b/parseclj.el
@@ -111,6 +111,15 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Shift-Reduce Parser
+(define-error 'parseclj-parse-error "parseclj: Syntax error")
+
+(defun parseclj--error (format &rest args)
+ "Signal a parse error.
+Takes a FORMAT string and optional ARGS to be passed to
+`format-message'. Signals a 'parseclj-parse-error signal, which
+can be handled with `condition-case'."
+ (signal 'parseclj-parse-error (list (apply #'format-message format args))))
+
(defun parseclj--find-opener (stack closer-token)
(cl-case (parseclj-lex-token-type closer-token)
(:rparen :lparen)
@@ -118,21 +127,29 @@
(:rbrace (parseclj-lex-token-type
(seq-find (lambda (token) (member (parseclj-lex-token-type
token) '(:lbrace :set))) stack)))))
-(defun parseclj--reduce-coll (stack closer-token reduceN)
+(defun parseclj--reduce-coll (stack closer-token reduce-branch options)
"Reduce collection based on the top of the stack"
(let ((opener-type (parseclj--find-opener stack closer-token))
+ (fail-fast (a-get options :fail-fast t))
(coll nil))
- (while (and stack
- (not (eq (parseclj-lex-token-type (car stack)) opener-type)))
+ (while (and stack (not (eq (parseclj-lex-token-type (car stack))
opener-type)))
(push (pop stack) coll))
(if (eq (parseclj-lex-token-type (car stack)) opener-type)
(let ((node (pop stack)))
- (funcall reduceN stack node coll))
- ;; Syntax error
- (progn
- (message "STACK: %S , CLOSER: %S" stack closer-token)
- (error "Syntax Error")))))
+ (when fail-fast
+ (when-let ((token (seq-find #'parseclj-lex-token? coll)))
+ (parseclj--error "parseclj: Syntax Error at position %s,
unmatched %S"
+ (a-get token 'pos)
+ (parseclj-lex-token-type token))))
+ (funcall reduce-branch stack node coll))
+
+ (if fail-fast
+ (parseclj--error "parseclj: Syntax Error at position %s, unmatched
%S"
+ (a-get closer-token 'pos)
+ (parseclj-lex-token-type closer-token))
+ ;; Unwound the stack without finding a matching paren: return the
original stack and continue parsing
+ (reverse coll)))))
(defun parseclj-parse (reduce-leaf reduce-branch &optional options)
"Clojure/EDN stack-based shift-reduce parser.
@@ -158,7 +175,8 @@ errors.
OPTIONS is an association list which is passed on to the reducing
functions.
"
- (let ((stack nil))
+ (let ((fail-fast (a-get options :fail-fast t))
+ (stack nil))
(while (not (eq (parseclj-lex-token-type (setq token (parseclj-lex-next)))
:eof))
;; (message "STACK: %S" stack)
@@ -168,7 +186,7 @@ functions.
(let ((token-type (parseclj-lex-token-type token)))
(cond
((member token-type parseclj--leaf-tokens) (setf stack (funcall
reduce-leaf stack token)))
- ((member token-type parseclj--closer-tokens) (setf stack
(parseclj--reduce-coll stack token reduce-branch)))
+ ((member token-type parseclj--closer-tokens) (setf stack
(parseclj--reduce-coll stack token reduce-branch options)))
(t (push token stack))))
;; Reduce based on top two items on the stack (special prefixed elements)
@@ -179,9 +197,13 @@ functions.
(setf stack (funcall reduce-branch (cddr stack) lookup (list
top))))))
;; reduce root
- (setf stack (funcall reduce-branch stack '((type . :root) (pos . 1))
(reverse stack)))
- ;; (message "RESULT: %S" stack)
- stack))
+ (when fail-fast
+ (when-let ((token (seq-find #'parseclj-lex-token? stack)))
+ (parseclj--error "parseclj: Syntax Error at position %s, unmatched %S"
+ (a-get token 'pos)
+ (parseclj-lex-token-type token))))
+
+ (funcall reduce-branch stack '((type . :root) (pos . 1)) (reverse stack))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Top level API
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index ae623a4c88..39658d642a 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -64,6 +64,86 @@
(:form . "bar")
(:value . bar)))))))
+(ert-deftest parseclj-parse-clojure-fail-fast-test ()
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "foo]")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 4, unmatched :rbracket"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "[foo")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 1, unmatched :lbracket"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "(1 2 [ 4)")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 6, unmatched :lbracket"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "1 2 #_")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 5, unmatched :discard"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "(1 [2 {3 ( 4}])")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 10, unmatched :lparen")))
+
+(ert-deftest parseclj-parse-clojure-fail-fast-test ()
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "foo]")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 4, unmatched :rbracket"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "[foo")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 1, unmatched :lbracket"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "(1 2 [ 4)")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 6, unmatched :lbracket"))
+
+ (should (equal
+ (condition-case errdata
+ (parseclj-parse-clojure "1 2 #_")
+ (parseclj-parse-error (cadr errdata)))
+ "parseclj: Syntax Error at position 5, unmatched :discard"))
+
+ (should (equal (parseclj-parse-clojure "(1 [2 {3 ( 4}])" :fail-fast nil)
+ '((:node-type . :root)
+ (:position . 0)
+ (:children ((:node-type . :list)
+ (:position . 1)
+ (:children ((:node-type . :number)
+ (:position . 2)
+ (:form . "1")
+ (:value . 1))
+ ((:node-type . :vector)
+ (:position . 4)
+ (:children ((:node-type . :number)
+ (:position . 5)
+ (:form . "2")
+ (:value . 2))
+ ((:node-type . :map)
+ (:position . 7)
+ (:children ((:node-type
. :number) (:position . 8) (:form . "3") (:value . 3))
+ ((type .
:lparen) (form . "(") (pos . 10))
+ ((:node-type
. :number) (:position . 12) (:form . "4") (:value . 4))))))))))))
+
+ ;; TODO: uneven map forms
+ )
+
(provide 'parseclj-test)
;;; parseclj-test.el ends here