branch: externals/matlab-mode commit 1b9fcd2b9de7267d9e792651b2d9a15d81fa7e41 Author: John Ciolfi <john.ciolfi...@gmail.com> Commit: John Ciolfi <john.ciolfi...@gmail.com>
test: add tests/sweep-test-matlab-ts-grammar.el --- tests/sweep-test-matlab-ts-grammar.el | 160 ++++++++++++++++++ tests/sweep-test-matlab-ts-grammar.sh | 23 +++ tests/sweep-test-matlab-ts-mode-indent.el | 7 +- tests/t-utils.el | 260 +++++++++++++++++++++++++++--- 4 files changed, 424 insertions(+), 26 deletions(-) diff --git a/tests/sweep-test-matlab-ts-grammar.el b/tests/sweep-test-matlab-ts-grammar.el new file mode 100644 index 0000000000..3b19d28b13 --- /dev/null +++ b/tests/sweep-test-matlab-ts-grammar.el @@ -0,0 +1,160 @@ +;;; sweep-test-matlab-ts-grammar.el --- -*- lexical-binding: t -*- +;; +;; Copyright 2025 Free Software Foundation, Inc. +;; +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +;; + +;;; Commentary: +;; +;; M-: (sweep-test-matlab-ts-grammar) - Look for bad matlab tree-sitter parses +;; on *.m files in current directory +;; + + +;;; Code: + +(require 't-utils) +(require 'matlab-ts-mode) +(require 'matlab--access) + + +(defun sweep-test-matlab-ts-grammar--syntax-checker (m-files) + "Syntax check each *.m file in M-FILES using MATLAB checkIssue. + +Returns hash table where the keys are the m-files and each key +value is either \"no-syntax-errors\" or \"has-syntax-errors\"." + (let* ((matlab-exe (or (matlab--get-abs-matlab-exe) + (error "No matlab found (to fix put matlab on your PATH)"))) + (tmp-check-file (make-temp-file "sweep_test_matlab_ts_grammar" nil ".m")) + (check-fun (file-name-sans-extension (file-name-nondirectory tmp-check-file))) + (tmp-check-file-dir (file-name-directory tmp-check-file)) + (result-ht (make-hash-table :test 'equal))) + + (with-temp-buffer + (cd tmp-check-file-dir) + (insert "filesToCheck = ... + [ +") + + (dolist (m-file m-files) + (insert " \"" m-file "\"\n")) + + (insert " ]; + +for fIdx = 1:length(filesToCheck) + file = filesToCheck(fIdx); + issues = codeIssues(file); + + % Syntax errors have error Sererity + syntaxErrors = issues.Issues.Severity(:) == matlab.codeanalysis.IssueSeverity.error; + if any(syntaxErrors) + sIdx = find(syntaxErrors, 1, 'first'); + syntaxStatus = sprintf(\"has-syntax-errors at line %d:%d to %d:%d - %s\", ... + issues.Issues.LineStart(sIdx), ... + issues.Issues.ColumnStart(sIdx), ... + issues.Issues.LineEnd(sIdx), ... + issues.Issues.ColumnEnd(sIdx), ... + issues.Issues.Description(sIdx)); + else + syntaxStatus = \"no-syntax-errors\"; + end + disp(strcat(\"--> \", file, \" > \", syntaxStatus)); +end +"); + (let ((coding-system-for-write 'raw-text-unix)) + (write-region (point-min) (point-max) tmp-check-file))) + + ;; Run codeIssues(mFile) via: matlab --batch check-fun + (with-temp-buffer + (cd tmp-check-file-dir) + (let ((status (call-process matlab-exe nil t nil "-batch" check-fun))) + (when (not (= status 0)) + (error "%s -batch %s (in directory %s) returned non-zero status, %d, with output:\n%s" + matlab-exe check-fun tmp-check-file-dir status (buffer-string)))) + (goto-char (point-min)) + + (while (not (eobp)) + (when (looking-at "^--> \\([^>]+\\) > \\(.+\\)$") + (let* ((info-line (match-string 0)) + (m-file (match-string 1)) + (syntax-status (match-string 2)) + (syntax-status-pair + (cond + ((string= syntax-status "no-syntax-errors") + (cons syntax-status nil)) + ((string-match "\\`\\(has-syntax-errors\\) \\(at line [0-9]+:[0-9]+.+\\)\\'" + syntax-status) + (cons (match-string 1 syntax-status) (match-string 2 syntax-status))) + (t + (error "Unexpected result: %s" info-line))))) + (puthash m-file syntax-status-pair result-ht))) + (forward-line)) + + ;; Validate we got expected stdout: + ;; --> M-FILE1 > SYNTAX-STATUS1 + ;; --> M-FILE2 > SYNTAX-STATUS2 + ;; .... + (dolist (m-file m-files) + (when (not (gethash m-file result-ht)) + (error "%s -batch %s (in directory %s) didn't return expected stdout, got:\n%s" + matlab-exe check-fun tmp-check-file-dir (buffer-string))))) + + (delete-file tmp-check-file) + result-ht)) + +(defun sweep-test-matlab-ts-grammar (&optional directory log-file) + "Check matlab tree-sitter parse of all *.m files under DIRECTORY. +DIRECTORY defaults to the current directory. + + \\[sweep-test-matlab-ts-grammar] + +This validates that if MATLAB tree-sitter parse has ERROR nodes that the +MATLAB codeIssues command, +https://www.mathworks.com/help/matlab/ref/codeissues.html says the file +has syntax issues (issue servity of error). Likewise if MATLAB +tree-sitter parse says no syntax errors this test confirms that the +MATLAB codeIssues command reports the same. + +Messages are logged to LOG-FILE, which defaults to +sweep-test-matlab-ts-grammar.log + +When run interactively, displays the result in a *sweep-test-matlab-ts-grammar* +buffer, otherwise the results are displayed on stdout. + +On large directory trees, run via + cd /path/to/your/directory + Emacs --batch \\ + -q \\ + -L /path/to/Emacs-MATLAB-Mode \\ + -l /path/to/Emacs-MATLAB-Mode/matlab-autoload.el \\ + -L /path/to/Emacs-MATLAB-Mode/tests/ \\ + -l /path/to/Emacs-MATLAB-Mode/tests/t-utils.el \\ + -l /path/to/Emacs-MATLAB-Mode/tests/sweep-test-matlab-ts-grammar \\ + -f sweep-test-matlab-ts-grammar +to see the progress messages in your terminal." + (interactive) + (let ((test-name "sweep-test-matlab-ts-grammar")) + (t-utils-error-if-no-treesit-for 'matlab test-name) + (t-utils-sweep-test-ts-grammar test-name + (or directory default-directory) + (rx ".m" eos) + #'matlab-ts-mode + #'sweep-test-matlab-ts-grammar--syntax-checker + nil + log-file))) + +(provide 'sweep-test-matlab-ts-grammar) +;;; sweep-test-matlab-ts-grammar.el ends here diff --git a/tests/sweep-test-matlab-ts-grammar.sh b/tests/sweep-test-matlab-ts-grammar.sh new file mode 100755 index 0000000000..b84ee00bb2 --- /dev/null +++ b/tests/sweep-test-matlab-ts-grammar.sh @@ -0,0 +1,23 @@ +#!/usr/bin/bash +# File: Emacs-MATLAB-Mode/tests/sweep-test-matlab-ts-grammar.sh +# Abstract: +# cd /your/work/directory +# +# git clone https://github.com/mathworks/Emacs-MATLAB-Mode.git +# cd Emacs-MATLAB-Mode +# make lisp +# +# cd /path/to/directory/containing/mFiles +# /path/to/Emacs-MATLAB-Mode/tests/sweep-test-matlab-ts-grammar.sh +# + +EmacsMATLABModeDir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && cd .. && pwd) + +emacs --batch \ + -q \ + -L "$EmacsMATLABModeDir" \ + -l "$EmacsMATLABModeDir/matlab-autoload.el" \ + -L "$EmacsMATLABModeDir/tests" \ + -l "$EmacsMATLABModeDir/tests/t-utils.el" \ + -l "$EmacsMATLABModeDir/tests/sweep-test-matlab-ts-grammar.el" \ + -f sweep-test-matlab-ts-grammar diff --git a/tests/sweep-test-matlab-ts-mode-indent.el b/tests/sweep-test-matlab-ts-mode-indent.el index f316f37a16..c556363653 100644 --- a/tests/sweep-test-matlab-ts-mode-indent.el +++ b/tests/sweep-test-matlab-ts-mode-indent.el @@ -31,8 +31,9 @@ (require 'matlab-ts-mode) (require 'matlab--access) -(defvar sweep-test-matlab-ts-mode-indent--mlint (or (matlab--get-mlint-exe) - (error "MLint not found"))) +(defvar sweep-test-matlab-ts-mode-indent--mlint + (or (matlab--get-mlint-exe) + (error "MLint not found, is matlab on your PATH?"))) (defun sweep-test-matlab-ts-mode-indent--syntax-checker (file) "MLint FILE, return pair (VALID . CHECK-RESULT). @@ -76,7 +77,7 @@ reported which is likely a bug in the tree-sitter parser. This calls `t-utils-sweep-test-indent' with does a number of checks to validate the ident rules. When run interactively, -displays the result in a *t-utils-seep-indent* buffer, otherwise +displays the result in a *sweep-test-matlab-ts-mode-indent* buffer, otherwise the results are displayed on stdout." (let ((test-name "sweep-test-matlab-ts-mode-indent") diff --git a/tests/t-utils.el b/tests/t-utils.el index cdcf0b10b2..f110627533 100644 --- a/tests/t-utils.el +++ b/tests/t-utils.el @@ -320,20 +320,63 @@ baseline check fails." test-name lang-file (t-utils--took start-time)) error-msg)) -(defun t-utils--insert-file-for-test (file &optional file-major-mode) +(defun t-utils--display-result (test-name directory result &optional no-erase) + "Display a test RESULT string. +If noninteractive this shows the result using `message', otherwise this +creates *TEST-NAME* result buffer containing RESULT in DIRECTORY and +dislays that buffer. Optional NO-ERASE, if non-nil will not erase the +result buffer prior to inserting RESULT." + (if noninteractive + (message "%s" result) + (let ((result-buf (get-buffer-create (concat "*" test-name "*")))) + (with-current-buffer result-buf + (read-only-mode -1) + (buffer-disable-undo) + (setq-local default-directory (file-truename directory)) + (when (not no-erase) + (erase-buffer)) + (if (= (point-min) (point-max)) + (insert "# -*- compilation-minor-mode -*-\n\n") + (goto-char (point-max))) + (insert result) + (goto-char (point-min)) + (text-mode) ;; so we can enable compilation-minor-mode + (compilation-minor-mode) ;; this lets us navigate to errors (would be nice to disable "g") + (set-buffer-modified-p nil) + (read-only-mode 1)) + (display-buffer result-buf)))) + +(defun t-utils--insert-file-for-test (file &optional file-major-mode skip-corrupt-check) "Insert FILE into current temporary buffer for testing. If optional FILE-MAJOR-MODE function is provided, run that, otherwise we examine the first line of the file for the major mode: + -*- MODE-NAME -*- -or - -*- mode: MODE-NAME -*-" + -*- mode: MODE-NAME -*- + +and run that. + +If optional SKIP-CORRUPT-CHECK is non-nil, the check for corrupted content is +skipped." (insert-file-contents-literally file) + + ;; We're testing a programming lanugage which is using utf-8-unix encoding + (set-buffer-file-coding-system 'utf-8-unix) + + ;; Check for corrupted characters (these can crash Emacs via the language server parser) + (when (not skip-corrupt-check) + (goto-char (point-min)) + (when (re-search-forward "[^[:print:][:space:]]" nil t) + (error "%s appears corrupt, non-printable utf8 character at point %d: %c" + file (point) (char-before)))) + ;; CRLF -> LF for consistency between Unix and Windows (goto-char (point-min)) (while (re-search-forward "\r" nil t) (replace-match "")) (goto-char (point-min)) + ;; Set mode (if file-major-mode (funcall file-major-mode) @@ -344,8 +387,10 @@ or (let* ((mode (match-string 1)) (mode-cmd (intern (concat mode "-mode")))) (funcall mode-cmd))) + ;; Incase the mode moves the point, reset to point-min. (goto-char (point-min)) + ;; Stash away the real buffer file for later use (and return it). (setq-local t-utils--buf-file file)) @@ -1178,7 +1223,7 @@ The result is: When run in an interacive Emacs session, e.g. M-: (sweep-LANGUAGE-ts-mode-indent) -the result is shown in \"*t-utils-sweep-indent*\" buffer, otherwise it +the result is shown in \"*TEST-NAME*\" buffer, otherwise it is displayed on stdout. After running this, you examine the results to see if there are issues. @@ -1242,9 +1287,7 @@ LANGUAGE tree-sitter that need addressing or some other issue." (format "%s:1: note: indent took %.3f seconds\n" file (gethash file took-ht))) files)))) - (result (concat "# -*- compilation-minor-mode -*-\n" - "\n" - (format "Files-with-parse-error-nodes%s:\n" + (result (concat (format "Files-with-parse-error-nodes%s:\n" (if syntax-checker-fun "-but-pass-syntax-checker-fun" "")) @@ -1261,21 +1304,7 @@ LANGUAGE tree-sitter that need addressing or some other issue." "Slowest-indents:\n" slow-files))) - (if noninteractive - (message "%s" result) - (let ((dir default-directory) - (result-buf (get-buffer-create "*t-utils-sweep-indent*"))) - (with-current-buffer result-buf - (setq-local default-directory dir) - (read-only-mode -1) - (erase-buffer) - (buffer-disable-undo) - (insert result) - (goto-char (point-min)) - (text-mode) ;; so we can enable compilation-minor-mode - (compilation-minor-mode) - (read-only-mode 1)) - (display-buffer result-buf)))) + (t-utils--display-result test-name directory result)) (message "FINISHED: %s %s" test-name (t-utils--took start-time)))) @@ -1588,7 +1617,7 @@ To debug a specific file-encoding test file ;; Load lang-file in temp buffer and activate file-major-mode (condition-case err - (t-utils--insert-file-for-test lang-file file-major-mode) + (t-utils--insert-file-for-test lang-file file-major-mode 'skip-corrupt-check) (error (setq got (concat "Major mode errored with message\n" (error-message-string err))))) @@ -1606,5 +1635,190 @@ To debug a specific file-encoding test file (setq error-msgs (reverse error-msgs)) (should (equal error-msgs '())))) +(defun t-utils--log (log-file string &optional create) + "Append STRING to LOG-FILE. +If CREATE is t, create LOG-FILE instead of appending" + (let ((coding-system-for-write 'no-conversion)) + (write-region string nil log-file (not create)))) + +(defun t-utils--log-create (test-name log-file) + "Create LOG-FILE with \"START: TEST-NAME\" content. +Returns LOG-FILE truename" + + (setq log-file (file-truename (or log-file (concat test-name ".log")))) + (t-utils--log log-file (format "START: %s\n" test-name) t) + (message "Logging to: %s" log-file) + log-file) + +(defun t-utils--bad-parse-msg (lang-file parse-issue error-info) + "Return an bad parse error message for LANG-FILE containing ERROR-INFO. +PARSE-ISSUE is a string for the message. +ERROR-INFO is \"at line NUM:COL<optional-text\"" + + (cond + ((string-match "at line \\([0-9]+\\):\\([0-9]+\\)" error-info) + (format "%s:%s:%s: error: %s %s\n" + lang-file (match-string 1 error-info) (match-string 2 error-info) + parse-issue error-info)) + (t + (error "%s bad error-info, %s" lang-file error-info)))) + +(defun t-utils--err-loc (error-node) + "Get \"type at line N1:C1 to N2:C2\" string for ERROR-NODE." + + (let* ((start-point (treesit-node-start error-node)) + (start-line (line-number-at-pos start-point)) + (start-col (save-excursion ;; error messages are one based columns + (goto-char start-point) + (1+ (current-column)))) + (end-point (treesit-node-end error-node)) + (end-line (line-number-at-pos end-point)) + (end-col (save-excursion + (goto-char end-point) + (1+ (current-column))))) + (format "%s node at line %d:%d to %d:%d (point %d to %d)" + (treesit-node-type error-node) + start-line start-col + end-line end-col + start-point + end-point))) + +(defun t-utils-sweep-test-ts-grammar (test-name + directory + lang-file-regexp + major-mode-fun + syntax-checker-fun + &optional error-nodes-regexp + log-file) + "Sweep test a tree-sitter grammar shared library looking for parse issues. + +File basenames matching matching LANG-FILE-REGEXP under DIRECTORY +recursively are examined. TEST-NAME is used in messages. + +Each matching file is read into a temporary buffer and then +MAJOR-MODE-FUN is called. This should be a mode that activates +a tree-sitter grammar, i.e. calls (treesit-parser-create \\='LANGUAGE). + +ERROR-NODES-REGEXP, defaulting to (rx bol \"ERROR\" eos), is provided to +`treesit-search-subtree' to look for syntax errors in the parse tree. + +SYNTAX-CHECKER-FUN is a function that takes a list of files and should +return a hash table with files as the keys and the value of each key is +either + (cons \"no-syntax-errors\" nil) + (cons \"has-syntax-errors\" \"at line N1:COL1 to N2:COL2\") + +Progress messages are logged to LOG-FILE which defaults to +TEST_NAME.log. + +The result is: + + Files-with-parse-error-nodes-but-pass-syntax-checker-fun: + <files with tree-sitter error nodes> + + Files-that-parsed-succesfully-but-failed-syntax-checker-fun: + <files without tree-sitter error nodes> + + Total-consistently-parsed-files: M of N + +When run in an interacive Emacs session, e.g. + M-: (sweep-LANGUAGE-ts-mode-grammar) +the result is shown in \"*TEST-NAME*\" buffer, +otherwise the result is displayed on stdout." + + (when (not error-nodes-regexp) + (setq error-nodes-regexp (rx bos "ERROR" eos))) + + (setq log-file (t-utils--log-create test-name log-file)) + (when (not noninteractive) + (t-utils--display-result test-name directory (concat "Log: " log-file "\n\n"))) + + (let ((start-time (current-time)) + (all-lang-files (sort (mapcar #'file-truename ;; Expand "~" for the syntax-checker-fun + (directory-files-recursively directory lang-file-regexp)))) + (lang-files-to-check '()) + (ts-parse-result-ht (make-hash-table :test 'equal))) + + (when (= (length all-lang-files) 0) + (user-error "No files found in directory %s recursively matching regexp \"%s\"" + directory lang-file-regexp)) + (t-utils--log log-file (format "Found %d files to check %s\n" + (length all-lang-files) (t-utils--took start-time))) + + (dolist (lang-file all-lang-files) + (with-temp-buffer + (let (ok) + (t-utils--log log-file (format "Reading: %s\n" lang-file)) + (condition-case err + (progn + (t-utils--insert-file-for-test lang-file major-mode-fun) + (setq ok t)) + (error + (t-utils--log log-file (format "Skipping %s, %s\n" + lang-file (error-message-string err))))) + (when ok + (push lang-file lang-files-to-check) + (let* ((root (treesit-buffer-root-node)) + (error-node (treesit-search-subtree root error-nodes-regexp nil t)) + (syntax-status-pair (if error-node + (cons "has-syntax-errors" (t-utils--err-loc error-node)) + (cons "no-syntax-errors" nil)))) + (puthash lang-file syntax-status-pair ts-parse-result-ht) + (t-utils--log log-file (format "ts-parse: %s > %S\n" + lang-file syntax-status-pair))))))) + + (when (= (length lang-files-to-check) 0) + (user-error "No files to check (all skipped)\n")) + (setq lang-files-to-check (sort lang-files-to-check)) + (t-utils--log log-file (format "Checking %d files\n" (length lang-files-to-check))) + + (t-utils--log log-file (format "Calling %S\n" syntax-checker-fun)) + (let ((syntax-check-result-ht (funcall syntax-checker-fun lang-files-to-check)) + (files-with-bad-ts-error-parse "") + (files-with-bad-ts-success-parse "") + (n-consistent-files 0)) + + (t-utils--log log-file (format "Examinging %S result\n" syntax-checker-fun)) + + (dolist (lang-file lang-files-to-check) + (let ((ts-parse-file-result-pair (gethash lang-file ts-parse-result-ht)) + (syntax-check-file-result-pair + (let ((pair (gethash lang-file syntax-check-result-ht))) + (when (not (or (equal (car pair) "has-syntax-errors") + (equal (car pair) "no-syntax-errors"))) + (user-error "Bad hash %S, %s -> %S" syntax-check-result-ht lang-file pair)) + pair))) + (if (string= (car ts-parse-file-result-pair) (car syntax-check-file-result-pair)) + (setq n-consistent-files (1+ n-consistent-files)) + (pcase (car ts-parse-file-result-pair) + ("has-syntax-errors" ;; ts says syntax errors, syntax-check says no errors + (setq files-with-bad-ts-error-parse + (concat files-with-bad-ts-error-parse + (t-utils--bad-parse-msg lang-file + "bad tree-sitter parse" + (cdr ts-parse-file-result-pair))))) + ("no-syntax-errors";; ts says no syntax errors, syntax-check says have errors + (setq files-with-bad-ts-success-parse + (concat files-with-bad-ts-success-parse + (t-utils--bad-parse-msg lang-file + "tree-sitter did not detect error" + (cdr syntax-check-file-result-pair))))) + (_ (cl-assert nil)))))) + + (let ((result + (concat + "Files-with-parse-error-nodes-but-pass-syntax-checker-fun:\n" + files-with-bad-ts-error-parse + "\n" + "Files-that-parsed-succesfully-but-failed-syntax-checker-fun:\n" + files-with-bad-ts-success-parse + "\n" + "Total-consistently-parsed-files: " (format "%d of %d\n" n-consistent-files + (length lang-files-to-check))))) + (t-utils--display-result test-name directory result 'no-erase))) + + (t-utils--log log-file (format "FINISHED: %s %s\n" test-name (t-utils--took start-time))) + (message "Finished, see: %s" log-file))) + (provide 't-utils) ;;; t-utils.el ends here