branch: externals/matlab-mode commit b60330dfa4862cae04fe16dfb3d44f93219c57b8 Author: John Ciolfi <john.ciolfi...@gmail.com> Commit: John Ciolfi <john.ciolfi...@gmail.com>
matlab-ts-mode: doc handling corrupted content --- contributing/treesit-mode-how-to.org | 86 ++++++++++++++++++++++++++++++++++++ tests/t-utils.el | 32 ++++++++------ 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/contributing/treesit-mode-how-to.org b/contributing/treesit-mode-how-to.org index 985cc4e6fa..6d2f42e4ca 100644 --- a/contributing/treesit-mode-how-to.org +++ b/contributing/treesit-mode-how-to.org @@ -1460,6 +1460,92 @@ there-end with mismatch true (t) because the string is missing the starting quot No buffer modifications #+end_src +* Setup: Handling Corrupted Content + +Corrupted content in LANGUAGE, NAME.LANG, files can crash Emacs when your tree-sitter +language shared library runs on the corrupted content. For example, try load a large +binary file and =M-x LANGUAGE-major-mode=. Since content should be utf-8, you should add +to the start of your LANGUAGE-major-mode: + +#+begin_src emacs-lisp + (defun LANGUAGE-ts-mode--check-file-encoding () + "Check file encoding. + Error is signaled if contents are corrupt because non-utf8 printable + content can crash Emacs via the LANGUAGE tree-sitter parser." + + (let ((bad-char-point (save-excursion + (goto-char (point-min)) + (when (re-search-forward "[^[:print:][:space:]]" nil t) + (point))))) + (when bad-char-point + (fundamental-mode) + (goto-char bad-char-point) + (user-error "Buffer appears corrupt, non-printable utf8 character at point %d: %c" + bad-char-point (char-before))))) + + + (define-derived-mode matlab-ts-mode prog-mode "LANGUAGE:ts" + "Documentation." + + (LANGUAGE-ts-mode--check-file-encoding) + + (when (treesit-ready-p 'LANGUAGE) + ;; <snip> + )) +#+end_src + +** Test: Handling Corrupted Content + +Test setup: + + #+begin_example + ./LANGUAGE-ts-mode.el + ./tests/test-LANUGAGE-ts-mode-file-encoding.el + ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME1.LANG + ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME1_expected.txt + ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME2.LANG + ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME2_expected.txt + .... + #+end_example + +=./tests/test-LANUGAGE-ts-mode-file-encoding.el= contains: + + #+begin_src emacs-lisp + (require 't-utils) + (require 'LANGUAGE-ts-mode) + + (defvar test-LANGUAGE-ts-mode-file-encoding--file nil) + + (defun test-LANGUAGE-ts-mode-file-encoding--file (lang-file) + "Test file-encoding on LANG-FILE." + (let ((test-LANGUAGE-ts-mode-file-encoding--file lang-file)) + (ert-run-tests-interactively "test-LANGUAGE-ts-mode-file-encoding"))) + + (ert-deftest test-LANGUAGE-ts-mode-file-encoding () + (let* ((test-name "test-LANGUAGE-ts-mode-file-encoding") + (lang-files (t-utils-get-files + test-name + (rx ".lang" eos) + nil + test-LANGUAGE-ts-mode-file-encoding--file))) + (t-utils-error-if-no-treesit-for \\='LANGUAGE test-name) + (t-utils-test-file-encoding test-name lang-files \\='#LANGUAGE-ts-mode))) + #+end_src + +Create /tests/test-LANUGAGE-ts-mode-file-encoding-files/*.LANG files containing corrupted +(non-utf-8) content. Also create at least one valid *.LANG files. + +Run the test: + + : M-x ert RET test-LANUGAGE-ts-mode-file-encoding RET + +In the =ert= result buffer, you can type \"m\" at the point of the test (where +the color marker is) to see messages that were displayed by your test. + +If the =./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME*_expected.txt~= files look good +rename them to =./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME*_expected.txt= (per the +messages shown by ert). + * Final version TODO diff --git a/tests/t-utils.el b/tests/t-utils.el index 1075f1bb30..cdcf0b10b2 100644 --- a/tests/t-utils.el +++ b/tests/t-utils.el @@ -47,13 +47,13 @@ ;; after examining it, rename it to ;; ./tests/test-LANGUAGE-ts-mode-font-lock-files/font_lock_test1_expected.txt. ;; -;; When you run ert interactively, you'll be presented with a *ert* buffer. You can -;; type "m" on the colored dots in the *ert* buffer to see the messages for that ert test +;; When you run ert interactively, you'll be presented with an ert result buffer. You can +;; type "m" on the colored dots in the ert result buffer to see the messages for that ert test ;; and the messages contain the sub-tests from the test loop for that ert test. This will bring ;; up an *ERT Messages* buffer. In this buffer, type ;; M-x compilation-minor-mode -;; to view the and navigate errors. The default error viewing in the *ert* buffer is a bit dense -;; due to the looping nature of the t-utils tests. +;; to view the and navigate errors. The default error viewing in the ert result buffer is a bit +;; dense due to the looping nature of the t-utils tests. ;; ;; To run your tests in a build system, use ;; @@ -785,8 +785,9 @@ To loop over all NAME*.LANG font-lock test files, interactively \\[ert] RET test-LANGUAGE-ts-mode-font-lock RET -In the *ert* buffer, you can type \"m\" at the point of the test (where -the color marker is) to see messages that were displayed by your test. +In the `ert' result buffer, you can type \"m\" at the point of the +test (where the color marker is) to see messages that were displayed by +your test. To debug a specific font-lock test file @@ -989,8 +990,9 @@ To loop over all NAME*.LANG indent test files, interactively \\[ert] RET test-LANGUAGE-ts-mode-indent RET -In the *ert* buffer, you can type \"m\" at the point of the test (where -the color marker is) to see messages that were displayed by your test. +In the `ert' result buffer, you can type \"m\" at the point of the +test (where the color marker is) to see messages that were displayed by +your test. To debug a specific indent test file @@ -1553,14 +1555,15 @@ Where ./tests/test-LANUGAGE-ts-mode-file-encoding.el contains: nil test-LANGUAGE-ts-mode-file-encoding--file))) (t-utils-error-if-no-treesit-for \\='LANGUAGE test-name) - (t-utils-test-file-encoding test-name lang-files))) + (t-utils-test-file-encoding test-name lang-files \\='#LANGUAGE-ts-mode))) To loop over all NAME*.LANG file-encoding test files, interactively \\[ert] RET test-LANGUAGE-ts-mode-file-encoding RET -In the *ert* buffer, you can type \"m\" at the point of the test (where -the color marker is) to see messages that were displayed by your test. +In the `ert' result buffer, you can type \"m\" at the point of the +test (where the color marker is) to see messages that were displayed by +your test. To debug a specific file-encoding test file @@ -1583,13 +1586,14 @@ To debug a specific file-encoding test file (got "Major mode activated succesfully.") (got-file (concat expected-file "~"))) - (t-utils--insert-file-for-test lang-file file-major-mode) - + ;; Load lang-file in temp buffer and activate file-major-mode (condition-case err - (t-utils--insert-file-for-test lang-file) + (t-utils--insert-file-for-test lang-file file-major-mode) (error (setq got (concat "Major mode errored with message\n" (error-message-string err))))) + (setq got (concat got "\n\n" "Entered major-mode: " (symbol-name major-mode) "\n")) + (kill-buffer) (let ((error-msg (t-utils--baseline-check