branch: elpa/parseclj
commit f8822bb43c1d466085a65150a399e04cefebf9e4
Author: Daniel Barreto <[email protected]>
Commit: Daniel Barreto <[email protected]>
Add support for tags in lexer
---
clj-lex-test.el | 44 ++++++++++++++++++++++++++++++++++++++++++++
clj-lex.el | 53 +++++++++++++++++++++++++++++++++--------------------
2 files changed, 77 insertions(+), 20 deletions(-)
diff --git a/clj-lex-test.el b/clj-lex-test.el
index 405420afcb..30d1082625 100644
--- a/clj-lex-test.el
+++ b/clj-lex-test.el
@@ -63,6 +63,21 @@
(goto-char 1)
(should (equal (clj-lex-next) '((type . :symbol) (form . "hello-world")
(pos . 1)))))
+ (with-temp-buffer
+ (insert "-hello-world")
+ (goto-char 1)
+ (should (equal (clj-lex-next) '((type . :symbol) (form . "-hello-world")
(pos . 1)))))
+
+ (with-temp-buffer
+ (insert "#inst")
+ (goto-char 1)
+ (should (equal (clj-lex-next) '((type . :tag) (form . "#inst") (pos .
1)))))
+
+ (with-temp-buffer
+ (insert "#qualified/tag")
+ (goto-char 1)
+ (should (equal (clj-lex-next) '((type . :tag) (form . "#qualified/tag")
(pos . 1)))))
+
(with-temp-buffer
(insert "\\newline\\return\\space\\tab\\a\\b\\c")
(goto-char 1)
@@ -101,6 +116,11 @@
(goto-char 1)
(should (equal (clj-lex-next) (clj-lex-token :keyword ":hello-world" 1))))
+ (with-temp-buffer
+ (insert ":hello/world")
+ (goto-char 1)
+ (should (equal (clj-lex-next) (clj-lex-token :keyword ":hello/world" 1))))
+
(with-temp-buffer
(insert "::hello-world")
(goto-char 1)
@@ -184,6 +204,7 @@
(should (equal (clj-lex-symbol-start? ?a) t))
(should (equal (clj-lex-symbol-start? ?A) t))
(should (equal (clj-lex-symbol-start? ?.) t))
+ (should (equal (clj-lex-symbol-start? ?. t) nil))
(should (equal (clj-lex-symbol-start? ?~) nil))
(should (equal (clj-lex-symbol-start? ? ) nil)))
@@ -195,6 +216,29 @@
(should (equal (clj-lex-symbol-rest? ?~) nil))
(should (equal (clj-lex-symbol-rest? ? ) nil)))
+(ert-deftest clj-lex-test-get-symbol-at-point ()
+ (with-temp-buffer
+ (insert "a-symbol")
+ (goto-char 1)
+ (should (equal (clj-lex-get-symbol-at-point 1) "a-symbol"))
+ (should (equal (point) 9))))
+
+(ert-deftest clj-lex-test-invalid-tag ()
+ (with-temp-buffer
+ (insert "#.not-a-tag")
+ (goto-char 1)
+ (should (equal (clj-lex-next) '((type . :lex-error) (form . "#.not-a-tag")
(pos . 1) (error-type . :invalid-hashtag-dispatcher)))))
+
+ (with-temp-buffer
+ (insert "#-not-a-tag")
+ (goto-char 1)
+ (should (equal (clj-lex-next) '((type . :lex-error) (form . "#-not-a-tag")
(pos . 1) (error-type . :invalid-hashtag-dispatcher)))))
+
+ (with-temp-buffer
+ (insert "#+not-a-tag")
+ (goto-char 1)
+ (should (equal (clj-lex-next) '((type . :lex-error) (form . "#+not-a-tag")
(pos . 1) (error-type . :invalid-hashtag-dispatcher))))))
+
(ert-deftest clj-lex-test-string ()
(with-temp-buffer
(insert "\"abc\"")
diff --git a/clj-lex.el b/clj-lex.el
index af6581ed01..af07cd40bd 100644
--- a/clj-lex.el
+++ b/clj-lex.el
@@ -25,7 +25,7 @@
(defun clj-lex-token (type form pos &rest args)
`((type . ,type)
(form . ,form)
- (pos . , pos)
+ (pos . ,pos)
,@(mapcar (lambda (pair)
(cons (car pair) (cadr pair)))
(-partition 2 args))))
@@ -81,26 +81,34 @@
(and (member char '(?- ?+ ?.))
(clj-lex-digit? (char-after (1+ (point))))))))
-(defun clj-lex-symbol-start? (char)
+(defun clj-lex-symbol-start? (char &optional alpha-only)
"Symbols begin with a non-numeric character and can contain
- alphanumeric characters and . * + ! - _ ? $ % & = < >. If -, +
- or . are the first character, the second character (if any)
- must be non-numeric."
+alphanumeric characters and . * + ! - _ ? $ % & = < >. If -, + or
+. are the first character, the second character (if any) must be
+non-numeric.
+
+In some cases, like in tagged elements, symbols are required to
+start with alphabetic characters only. ALPHA-ONLY ensures this
+behavior."
(not (not (and char
(or (and (<= ?a char) (<= char ?z))
(and (<= ?A char) (<= char ?Z))
- (member char '(?. ?* ?+ ?! ?- ?_ ?? ?$ ?% ?& ?= ?< ?>
?/)))))))
+ (and (not alpha-only) (member char '(?. ?* ?+ ?! ?- ?_ ??
?$ ?% ?& ?= ?< ?> ?/))))))))
(defun clj-lex-symbol-rest? (char)
(or (clj-lex-symbol-start? char)
(clj-lex-digit? char)))
+(defun clj-lex-get-symbol-at-point (pos)
+ "Return the symbol at point."
+ (while (clj-lex-symbol-rest? (char-after (point)))
+ (right-char))
+ (buffer-substring-no-properties pos (point)))
+
(defun clj-lex-symbol ()
(let ((pos (point)))
(right-char)
- (while (clj-lex-symbol-rest? (char-after (point)))
- (right-char))
- (let ((sym (buffer-substring-no-properties pos (point))))
+ (let ((sym (clj-lex-get-symbol-at-point pos)))
(cond
((equal sym "nil") (clj-lex-token :nil "nil" pos))
((equal sym "true") (clj-lex-token :true "true" pos))
@@ -162,10 +170,7 @@
(when (equal (char-after (point)) ?:)
(right-char))
(if (clj-lex-symbol-start? (char-after (point)))
- (progn
- (while (clj-lex-symbol-rest? (char-after (point)))
- (right-char))
- (clj-lex-token :keyword (buffer-substring-no-properties pos (point))
pos))
+ (clj-lex-token :keyword (clj-lex-get-symbol-at-point pos) pos)
(progn
(right-char)
(clj-lex-token :lex-error (buffer-substring-no-properties pos (point))
pos 'error-type :invalid-keyword)))))
@@ -221,13 +226,21 @@
((equal char ?#)
(right-char)
(let ((char (char-after (point))))
- (cl-case char
- (?{
- (right-char)
- (clj-lex-token :set "#{" pos))
- (?_
- (right-char)
- (clj-lex-token :discard "#_" pos)))))
+ (cond
+ ((equal char ?{)
+ (right-char)
+ (clj-lex-token :set "#{" pos))
+ ((equal char ?_)
+ (right-char)
+ (clj-lex-token :discard "#_" pos))
+ ((clj-lex-symbol-start? char t)
+ (right-char)
+ (clj-lex-token :tag (concat "#" (clj-lex-get-symbol-at-point (1+
pos))) pos))
+ (t
+ (while (not (or (clj-lex-at-whitespace?)
+ (clj-lex-at-eof?)))
+ (right-char))
+ (clj-lex-token :lex-error (buffer-substring-no-properties pos
(point)) pos 'error-type :invalid-hashtag-dispatcher)))))
":("))))