branch: externals/matlab-mode
commit b60330dfa4862cae04fe16dfb3d44f93219c57b8
Author: John Ciolfi <john.ciolfi...@gmail.com>
Commit: John Ciolfi <john.ciolfi...@gmail.com>

    matlab-ts-mode: doc handling corrupted content
---
 contributing/treesit-mode-how-to.org | 86 ++++++++++++++++++++++++++++++++++++
 tests/t-utils.el                     | 32 ++++++++------
 2 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/contributing/treesit-mode-how-to.org 
b/contributing/treesit-mode-how-to.org
index 985cc4e6fa..6d2f42e4ca 100644
--- a/contributing/treesit-mode-how-to.org
+++ b/contributing/treesit-mode-how-to.org
@@ -1460,6 +1460,92 @@ there-end with mismatch true (t) because the string is 
missing the starting quot
     No buffer modifications
 #+end_src
 
+* Setup: Handling Corrupted Content
+
+Corrupted content in LANGUAGE, NAME.LANG, files can crash Emacs when your 
tree-sitter
+language shared library runs on the corrupted content.  For example, try load 
a large
+binary file and =M-x LANGUAGE-major-mode=.  Since content should be utf-8, you 
should add
+to the start of your LANGUAGE-major-mode:
+
+#+begin_src emacs-lisp
+  (defun LANGUAGE-ts-mode--check-file-encoding ()
+    "Check file encoding.
+  Error is signaled if contents are corrupt because non-utf8 printable
+  content can crash Emacs via the LANGUAGE tree-sitter parser."
+
+    (let ((bad-char-point (save-excursion
+                            (goto-char (point-min))
+                            (when (re-search-forward "[^[:print:][:space:]]" 
nil t)
+                              (point)))))
+      (when bad-char-point
+        (fundamental-mode)
+        (goto-char bad-char-point)
+        (user-error "Buffer appears corrupt, non-printable utf8 character at 
point %d: %c"
+                    bad-char-point (char-before)))))
+
+
+  (define-derived-mode matlab-ts-mode prog-mode "LANGUAGE:ts"
+    "Documentation."
+
+    (LANGUAGE-ts-mode--check-file-encoding)
+
+    (when (treesit-ready-p 'LANGUAGE)
+      ;; <snip>
+      ))
+#+end_src
+
+** Test: Handling Corrupted Content
+
+Test setup:
+
+ #+begin_example
+ ./LANGUAGE-ts-mode.el
+ ./tests/test-LANUGAGE-ts-mode-file-encoding.el
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME1.LANG
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME1_expected.txt
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME2.LANG
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME2_expected.txt
+ ....
+ #+end_example
+
+=./tests/test-LANUGAGE-ts-mode-file-encoding.el= contains:
+
+  #+begin_src emacs-lisp
+    (require 't-utils)
+    (require 'LANGUAGE-ts-mode)
+
+    (defvar test-LANGUAGE-ts-mode-file-encoding--file nil)
+
+    (defun test-LANGUAGE-ts-mode-file-encoding--file (lang-file)
+      "Test file-encoding on LANG-FILE."
+      (let ((test-LANGUAGE-ts-mode-file-encoding--file lang-file))
+        (ert-run-tests-interactively "test-LANGUAGE-ts-mode-file-encoding")))
+
+    (ert-deftest test-LANGUAGE-ts-mode-file-encoding ()
+      (let* ((test-name "test-LANGUAGE-ts-mode-file-encoding")
+             (lang-files (t-utils-get-files
+                       test-name
+                       (rx ".lang" eos)
+                       nil
+                       test-LANGUAGE-ts-mode-file-encoding--file)))
+        (t-utils-error-if-no-treesit-for \\='LANGUAGE test-name)
+        (t-utils-test-file-encoding test-name lang-files 
\\='#LANGUAGE-ts-mode)))
+  #+end_src
+
+Create /tests/test-LANUGAGE-ts-mode-file-encoding-files/*.LANG files 
containing corrupted
+(non-utf-8) content. Also create at least one valid *.LANG files.
+
+Run the test:
+
+ : M-x ert RET test-LANUGAGE-ts-mode-file-encoding RET
+
+In the =ert= result buffer, you can type \"m\" at the point of the test (where
+the color marker is) to see messages that were displayed by your test.
+
+If the =./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME*_expected.txt~= 
files look good
+rename them to 
=./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME*_expected.txt= (per the
+messages shown by ert).
+  
 * Final version
 
 TODO
diff --git a/tests/t-utils.el b/tests/t-utils.el
index 1075f1bb30..cdcf0b10b2 100644
--- a/tests/t-utils.el
+++ b/tests/t-utils.el
@@ -47,13 +47,13 @@
 ;; after examining it, rename it to
 ;; ./tests/test-LANGUAGE-ts-mode-font-lock-files/font_lock_test1_expected.txt.
 ;;
-;; When you run ert interactively, you'll be presented with a *ert* buffer.  
You can
-;; type "m" on the colored dots in the *ert* buffer to see the messages for 
that ert test
+;; When you run ert interactively, you'll be presented with an ert result 
buffer.  You can
+;; type "m" on the colored dots in the ert result buffer to see the messages 
for that ert test
 ;; and the messages contain the sub-tests from the test loop for that ert 
test.  This will bring
 ;; up an *ERT Messages* buffer.  In this buffer, type
 ;;   M-x compilation-minor-mode
-;; to view the and navigate errors.  The default error viewing in the *ert* 
buffer is a bit dense
-;; due to the looping nature of the t-utils tests.
+;; to view the and navigate errors.  The default error viewing in the ert 
result buffer is a bit
+;; dense due to the looping nature of the t-utils tests.
 ;;
 ;; To run your tests in a build system, use
 ;;
@@ -785,8 +785,9 @@ To loop over all NAME*.LANG font-lock test files, 
interactively
 
   \\[ert] RET test-LANGUAGE-ts-mode-font-lock RET
 
-In the *ert* buffer, you can type \"m\" at the point of the test (where
-the color marker is) to see messages that were displayed by your test.
+In the `ert' result buffer, you can type \"m\" at the point of the
+test (where the color marker is) to see messages that were displayed by
+your test.
 
 To debug a specific font-lock test file
 
@@ -989,8 +990,9 @@ To loop over all NAME*.LANG indent test files, interactively
 
   \\[ert] RET test-LANGUAGE-ts-mode-indent RET
 
-In the *ert* buffer, you can type \"m\" at the point of the test (where
-the color marker is) to see messages that were displayed by your test.
+In the `ert' result buffer, you can type \"m\" at the point of the
+test (where the color marker is) to see messages that were displayed by
+your test.
 
 To debug a specific indent test file
 
@@ -1553,14 +1555,15 @@ Where ./tests/test-LANUGAGE-ts-mode-file-encoding.el 
contains:
                      nil
                      test-LANGUAGE-ts-mode-file-encoding--file)))
       (t-utils-error-if-no-treesit-for \\='LANGUAGE test-name)
-      (t-utils-test-file-encoding test-name lang-files)))
+      (t-utils-test-file-encoding test-name lang-files \\='#LANGUAGE-ts-mode)))
 
 To loop over all NAME*.LANG file-encoding test files, interactively
 
   \\[ert] RET test-LANGUAGE-ts-mode-file-encoding RET
 
-In the *ert* buffer, you can type \"m\" at the point of the test (where
-the color marker is) to see messages that were displayed by your test.
+In the `ert' result buffer, you can type \"m\" at the point of the
+test (where the color marker is) to see messages that were displayed by
+your test.
 
 To debug a specific file-encoding test file
 
@@ -1583,13 +1586,14 @@ To debug a specific file-encoding test file
                  (got "Major mode activated succesfully.")
                  (got-file (concat expected-file "~")))
 
-            (t-utils--insert-file-for-test lang-file file-major-mode)
-            
+            ;; Load lang-file in temp buffer and activate file-major-mode
             (condition-case err
-                (t-utils--insert-file-for-test lang-file)
+                (t-utils--insert-file-for-test lang-file file-major-mode)
               (error
                (setq got (concat "Major mode errored with message\n" 
(error-message-string err)))))
 
+            (setq got (concat got "\n\n" "Entered major-mode: " (symbol-name 
major-mode) "\n"))
+
             (kill-buffer)
 
             (let ((error-msg (t-utils--baseline-check

Reply via email to