185: Support \uxxxx and \oxxx escape codes in strings

ELPA Syncer Tue, 28 Dec 2021 11:06:22 -0800

branch: elpa/parseclj
commit 22f2eb106fd37272b64c4bfde6c388d308358463
Author: Arne Brasseur <[email protected]>
Commit: Arne Brasseur <[email protected]>


    Support \uxxxx and \oxxx escape codes in strings
---
 clj-lex-test.el   |  7 ++++++-
 clj-parse-test.el |  7 ++++++-
 clj-parse.el      | 42 ++++++++++++++++++++++++++----------------
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/clj-lex-test.el b/clj-lex-test.el
index eeabcdc4db..426698303a 100644
--- a/clj-lex-test.el
+++ b/clj-lex-test.el
@@ -89,7 +89,12 @@
     (insert "\\u0078\\o170")
     (goto-char 1)
     (should (equal (clj-lex-next) (clj-lex-token :character "\\u0078" 1)))
-    (should (equal (clj-lex-next) (clj-lex-token :character "\\o170" 7)))))
+    (should (equal (clj-lex-next) (clj-lex-token :character "\\o170" 7))))
+
+  (with-temp-buffer
+    (insert "\"\\u0078\\o170\"")
+    (goto-char 1)
+    (should (equal (clj-lex-next) (clj-lex-token :string "\"\\u0078\\o170\"" 
1)))))
 
 (ert-deftest clj-lex-test-at-number? ()
   (dolist (str '("123" ".9" "+1" "0" "-456"))
diff --git a/clj-parse-test.el b/clj-parse-test.el
index 2c003cb17b..fae8cc03e5 100644
--- a/clj-parse-test.el
+++ b/clj-parse-test.el
@@ -64,7 +64,12 @@
   (with-temp-buffer
     (insert "(\\newline \\return \\space \\tab \\a \\b \\c \\u0078 \\o171)")
     (goto-char 1)
-    (should (equal (clj-parse) '((?\n ?\r ?\ ?\t ?a ?b ?c ?x ?y))))))
+    (should (equal (clj-parse) '((?\n ?\r ?\ ?\t ?a ?b ?c ?x ?y)))))
+
+  (with-temp-buffer
+    (insert "\"\\u0078 \\o171\"")
+    (goto-char 1)
+    (should (equal (clj-parse) '("x y")))))
 
 (provide 'clj-parse-test)
 
diff --git a/clj-parse.el b/clj-parse.el
index 610a98eac0..004901090b 100644
--- a/clj-parse.el
+++ b/clj-parse.el
@@ -39,23 +39,33 @@
                                  :character)
   "Tokens that represent leaf nodes in the AST.")
 
-;; Java/JavaScript strings support other escape codes like "\u0111", but
-;; these are the only ones mentioned in the EDN spec.
-;; Although of course for bare characters
+;; The EDN spec is not clear about wether \u0123 and \o012 are supported in
+;; strings. They are described as character literals, but not as string escape
+;; codes. In practice all implementations support them (mostly with broken
+;; surrogate pair support), so we do the same. Sorry, emoji 🙁.
+;;
+;; Note that this is kind of broken, we don't correctly detect if \u or \o 
forms
+;; don't have the right forms.
 (defun clj-parse-string (s)
-  (replace-regexp-in-string "\\\\[tbnrf'\"\\]"
-                            (lambda (x)
-                              (cl-case (elt x 1)
-                                (?t "\t")
-                                (?f "\f")
-                                (?\" "\"")
-                                (?r "\r")
-                                (?n "\n")
-                                (?\\ "\\\\")
-                                (t (substring x 1 2))))
-                            (substring s 1 -1)))
-
-
+  (replace-regexp-in-string
+   "\\\\o[0-8]\\{3\\}"
+   (lambda (x)
+     (make-string 1 (string-to-number (substring x 2) 8) ))
+   (replace-regexp-in-string
+    "\\\\u[0-9a-fA-F]\\{4\\}"
+    (lambda (x)
+      (make-string 1 (string-to-number (substring x 2) 16)))
+    (replace-regexp-in-string "\\\\[tbnrf'\"\\]"
+                              (lambda (x)
+                                (cl-case (elt x 1)
+                                  (?t "\t")
+                                  (?f "\f")
+                                  (?\" "\"")
+                                  (?r "\r")
+                                  (?n "\n")
+                                  (?\\ "\\\\")
+                                  (t (substring x 1))))
+                              (substring s 1 -1)))))
 
 (defun clj-parse-character (c)
   (let* ((form (cdr (assq 'form token)))

[nongnu] elpa/parseclj 22f2eb106f 028/185: Support \uxxxx and \oxxx escape codes in strings

Reply via email to