branch: elpa/flx commit f6cd01d759d4955d2515c984e4577ad63b9c7521 Merge: b340205929 db1a328e41 Author: PythonNut <python...@users.noreply.github.com> Commit: PythonNut <python...@users.noreply.github.com>
Merge branch 'perf/dynamic-programming' into 0.6 --- .travis.yml | 4 ++ Cask | 5 ++ Makefile | 2 +- flx.el | 171 +++++++++++++++++++++++++++++++++++------------------- tests/flx-test.el | 35 ++++++----- tests/run-test.el | 7 +++ 6 files changed, 148 insertions(+), 76 deletions(-) diff --git a/.travis.yml b/.travis.yml index 673f50e13a..eaae514c4a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,10 @@ before_install: sudo apt-get install -qq emacs24 emacs24-el emacs24-common-non-dfsg; fi + - curl -fsSL https://raw.githubusercontent.com/cask/cask/master/go | python + - pwd + - ~/.cask/bin/cask + env: - EMACS=emacs24 TAGS="--tags ~@requires-e24-3" - EMACS=emacs-snapshot TAGS="" diff --git a/Cask b/Cask new file mode 100644 index 0000000000..ba211511fc --- /dev/null +++ b/Cask @@ -0,0 +1,5 @@ +(source gnu) +(source melpa) + +(development + (depends-on "async")) diff --git a/Makefile b/Makefile index 9c613a7d22..84a21f9b7a 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ all: $(ELCS) clean: $(RM) $(ELCS) $(TEST_ELCS) -show-version: show-version +show-version: echo "*** Emacs version ***" echo "EMACS = `which ${EMACS}`" ${EMACS} --version diff --git a/flx.el b/flx.el index 97985553ec..f038d8ae24 100644 --- a/flx.el +++ b/flx.el @@ -229,34 +229,6 @@ See documentation for logic." (cl-return sub))) sorted-list)) -(defun flx-get-matches (hash query &optional greater-than q-index) - "Return list of all unique indexes into str where query can match. - -That is all character sequences of query that occur in str are returned. - -HASH accept as the cached analysis of str. -sstr -e.g. (\"aab\" \"ab\") returns - '((0 2) (1 2) -" - - (setq q-index (or q-index 0)) - (let* ((q-char (aref query q-index)) - (indexes (flx-bigger-sublist - (gethash q-char hash) greater-than))) - (if (< q-index (1- (length query))) - (apply ; `mapcan' - 'nconc - (mapcar - (lambda (index) - (let ((next-matches-for-rest (flx-get-matches hash query index (1+ q-index)))) - (when next-matches-for-rest - (mapcar (lambda (match) - (cons index match)) - next-matches-for-rest)))) - indexes)) - (mapcar 'list indexes)))) - (defun flx-make-filename-cache () "Return cache hashtable appropraite for storing filenames." (flx-make-string-cache 'flx-get-heatmap-file)) @@ -282,43 +254,122 @@ e.g. (\"aab\" \"ab\") returns (puthash str res cache)) res)))) +(defun flx-find-best-match (str-info + heatmap + greater-than + query + query-length + q-index + match-cache) + "Recursively compute the best match for a string, passed as STR-INFO and +HEATMAP, according to QUERY. + +This function uses MATCH-CACHE to memoize its return values. +For other parameters, see `flx-score'" + + ;; Here, we use a simple N'ary hashing scheme + ;; You could use (/ hash-key query-length) to get greater-than + ;; Or, (mod hash-key query-length) to get q-index + ;; We use this instead of a cons key for the sake of efficiency + (let* ((hash-key (+ q-index + (* (or greater-than 0) + query-length))) + (hash-value (gethash hash-key match-cache))) + (if hash-value + ;; Here, we use the value 'no-match to distinguish a cache miss + ;; from a nil (i.e. non-matching) return value + (if (eq hash-value 'no-match) + nil + hash-value) + (let ((indexes (flx-bigger-sublist + (gethash (aref query q-index) str-info) + greater-than)) + (match) + (temp-score) + (best-score most-negative-fixnum)) + + ;; Matches are of the form: + ;; ((match_indexes) . (score . contiguous-count)) + (if (>= q-index (1- query-length)) + ;; At the tail end of the recursion, simply + ;; generate all possible matches with their scores + ;; and return the list to parent. + (setq match (mapcar (lambda (index) + (cons (list index) + (cons (aref heatmap index) 0))) + indexes)) + (dolist (index indexes) + (dolist (elem (flx-find-best-match str-info + heatmap + index + query + query-length + (1+ q-index) + match-cache)) + (setq temp-score + (if (= (1- (caar elem)) index) + (+ (cadr elem) + (aref heatmap index) + + ;; boost contiguous matches + (* (min (cddr elem) + 3) + 15) + 60) + (+ (cadr elem) + (aref heatmap index)))) + + ;; We only care about the optimal match, so only + ;; forward the match with the best score to parent + (when (> temp-score best-score) + (setq best-score temp-score + match (list (cons (cons index (car elem)) + (cons temp-score + (if (= (1- (caar elem)) + index) + (1+ (cddr elem)) + 0))))))))) + + ;; Calls are cached to avoid exponential time complexity + (puthash hash-key + (if match match 'no-match) + match-cache) + match)))) (defun flx-score (str query &optional cache) "Return best score matching QUERY against STR" (unless (or (zerop (length query)) (zerop (length str))) - (let* ((info-hash (flx-process-cache str cache)) - (heatmap (gethash 'heatmap info-hash)) - (matches (flx-get-matches info-hash query)) - (query-length (length query)) - (full-match-boost (and (< query-length 5) - (> query-length 1))) - (best-score nil)) - (mapc (lambda (match-positions) - (let ((score (if (and - full-match-boost - (= (length match-positions) - (length str))) - 10000 - 0)) - (contiguous-count 0) - last-match) - (cl-loop for index in match-positions - do (progn - (if (and last-match - (= (1+ last-match) index)) - (cl-incf contiguous-count) - (setq contiguous-count 0)) - (cl-incf score (aref heatmap index)) - (when (> contiguous-count 0) - (cl-incf score (+ 45 (* 15 (min contiguous-count 4))))) - (setq last-match index))) - (if (or (null best-score) - (> score (car best-score))) - (setq best-score (cons score match-positions))))) - matches) - best-score))) - + (let* + ((str-info (flx-process-cache str cache)) + (heatmap (gethash 'heatmap str-info)) + (query-length (length query)) + (full-match-boost (and (< 1 query-length) + (< query-length 5))) + + ;; Dynamic Programming table for memoizing flx-find-best-match + (match-cache (make-hash-table :test 'eql :size 10)) + + (optimal-match (flx-find-best-match str-info + heatmap + nil + query + query-length + 0 + match-cache))) + ;; Postprocess candidate + (and optimal-match + (cons + ;; This is the computed score, adjusted to boost the scores + ;; of exact matches. + (if (and full-match-boost + (= (length (caar optimal-match)) + (length str))) + (+ (cl-cadar optimal-match) 10000) + (cl-cadar optimal-match)) + + ;; This is the list of match positions + (caar optimal-match)))))) (defun flx-propertize (obj score &optional add-score) "Return propertized copy of obj according to score. diff --git a/tests/flx-test.el b/tests/flx-test.el index 635e1beeb4..57b2a26be5 100644 --- a/tests/flx-test.el +++ b/tests/flx-test.el @@ -34,6 +34,7 @@ (eval-when-compile (require 'cl)) (require 'ert) +(require 'async) (require 'flx) (ert-deftest flx-test-sanity () @@ -79,21 +80,6 @@ (let ((vec (vector 1 2 3))) (should (equal (vector 2 3 4) (flx-inc-vec vec))))) -(ert-deftest flx-matches-basic () - (let* ((str "aggg") - (h (flx-get-hash-for-string str 'flx-get-heatmap-str)) - (res (flx-get-matches h "g"))) - (should (equal res '((1) (2) (3)))))) - - -(ert-deftest flx-matches-more () - (let* ((str "ab-gh-ab") - (h (flx-get-hash-for-string str 'flx-get-heatmap-str)) - (res (flx-get-matches h "ab"))) - (should (equal res '((0 1) - (0 7) - (6 7)))))) - (ert-deftest flx-get-heatmap-vector-basic () "see worksheet for derivation" (let ((res (flx-get-heatmap-file "__abcab"))) @@ -214,6 +200,7 @@ In this case, the match with more contiguous characters is better." ;;; makes, we've gone the opposite way. :) ;;; ;;; We strongly prefer basename matches, where as they do not. + (ert-deftest flx-imported-prioritizes-matches-after-/ () (let ((query "b")) (let ((higher (flx-score "foo/bar" query (flx-make-filename-cache))) @@ -363,6 +350,24 @@ substring can overpower abbreviation." (should (not upper-no-folds)))) +;;; perf + +(ert-deftest flx-prune-search-space-optimizations () + "Make sure optimizations that prune bad paths early are working." + (let ((future (async-start + `(lambda () + ,(async-inject-variables "\\`load-path\\'") + (require 'flx) + (flx-score "~/foo/bar/blah.elllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll" "lllllllllllllllllllllllllllllllll" (flx-make-filename-cache))) + nil)) + result) + (with-timeout (1 (kill-process future) ) + (while (not result) ;; while process is running + (sit-for .2) + (when (async-ready future) + (setq result (async-get future))))) + (should result))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; flx-test.el ends here diff --git a/tests/run-test.el b/tests/run-test.el index 6e0bd119a8..c9842f7348 100644 --- a/tests/run-test.el +++ b/tests/run-test.el @@ -23,6 +23,13 @@ flx-root-dir)) +;; Cask +(setq package-user-dir + (expand-file-name (format ".cask/%s/elpa" emacs-version) flx-root-dir)) +(package-initialize) + + + ;; Use ERT from github when this Emacs does not have it (unless (locate-library "ert") (add-to-list