111: Fix data wrapping handling in fuzzy query functions.

Stefan Monnier Mon, 14 Dec 2020 08:40:51 -0800

branch: externals/trie
commit 5e8e73fd06c54246dc1fb8681a6fabcfaff0ac77
Author: Toby S. Cubitt <toby-predict...@dr-qubit.org>
Commit: Toby S. Cubitt <toby-predict...@dr-qubit.org>


    Fix data wrapping handling in fuzzy query functions.
    
    Also, cache fuzzy queries ranked by Lewenstein distance.
---
 trie.el | 225 +++++++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 145 insertions(+), 80 deletions(-)

diff --git a/trie.el b/trie.el
index 69abe6f..41fa9b7 100644
--- a/trie.el
+++ b/trie.el
@@ -272,6 +272,97 @@
                (,comparison-function b a))))))
 
 
+;; massage rankfun arguments for `trie-regexp-search' results
+(if (trie-lexical-binding-p)
+    (defun trie--wrap-regexp-search-rankfun (rankfun)
+      (lambda (a b)
+       ;; if car of argument contains a key+group list rather than a straight
+       ;; key, remove group list
+       ;; FIXME: the test for straight key, below, will fail if the key is a
+       ;;        list, and the first element of the key is itself a list
+       ;;        (there might be no easy way to fully fix this...)
+       (unless (or (atom (car a))
+                   (and (listp (car a))
+                        (not (sequencep (caar a)))))
+         (setq a (cons (caar a) (cdr a))))
+       (unless (or (atom (car b))
+                   (and (listp (car b))
+                        (not (sequencep (caar b)))))
+         (setq b (cons (caar b) (cdr b))))
+       (funcall rankfun a b)))
+  (defun trie--wrap-regexp-search-rankfun (rankfun)
+    `(lambda (a b)
+       ;; if car of argument contains a key+group list rather than a straight
+       ;; key, remove group list
+       ;; FIXME: the test for straight key, below, will fail if the key is a
+       ;;        list, and the first element of the key is itself a list
+       ;;        (there might be no easy way to fully fix this...)
+       (unless (or (atom (car a))
+                  (and (listp (car a))
+                      (not (sequencep (caar a)))))
+        (setq a (cons (caar a) (cdr a))))
+       (unless (or (atom (car b))
+                  (and (listp (car b))
+                       (not (sequencep (caar b)))))
+        (setq b (cons (caar b) (cdr b))))
+       (,rankfun a b))))
+
+
+(if (trie-lexical-binding-p)
+    (defun trie--wrap-regexp-search-filter (filter)
+      (lambda (seq data)
+       ;; if car of argument contains a key+group list rather than a straight
+       ;; key, remove group list
+       ;; FIXME: the test for straight key, below, will fail if the key is a
+       ;;        list, and the first element of the key is itself a list
+       ;;        (there might be no easy way to fully fix this...)
+       (unless (or (atom (car seq))
+                   (and (listp (car seq))
+                        (not (sequencep (caar seq)))))
+         (setq seq (caar seq))
+         ;; call filter on massaged arguments
+         (funcall filter seq data))))
+  (defun trie--wrap-regexp-search-filter (filter)
+    `(lambda (seq data)
+       ;; if car of argument contains a key+group list rather than a straight
+       ;; key, remove group list
+       ;; FIXME: the test for straight key, below, will fail if the key is a
+       ;;        list, and the first element of the key is itself a list
+       ;;        (there might be no easy way to fully fix this...)
+       (unless (or (atom (car seq))
+                  (and (listp (car seq))
+                       (not (sequencep (caar seq)))))
+        (setq seq (caar seq))
+        ;; call filter on massaged arguments
+        (,filter seq data)))))
+
+
+;; create Lewenstein rank function from trie comparison function
+(if (trie-lexical-binding-p)
+    (defun trie--construct-Lewenstein-rankfun (comparison-function)
+      (let ((compfun (trie-construct-sortfun comparison-function)))
+       (lambda (a b)
+         (cond
+          ((< (cdar a) (cdar b)) t)
+          ((> (cdar a) (cdar b)) nil)
+          (t (funcall compfun (caar a) (caar b)))))))
+  (defun trie--construct-Lewenstein-rankfun (comparison-function)
+    `(lambda (a b)
+       (cond
+       ((< (cdar a) (cdar b)) t)
+       ((> (cdar a) (cdar b)) nil)
+       (t ,(trie-construct-sortfun comparison-function)
+          (caar a) (caar b))))))
+
+
+;; create Lewenstein rank function from trie comparison function
+(if (trie-lexical-binding-p)
+    (defun trie--wrap-fuzzy-filter (filter)
+      (lambda (match data) (funcall filter (car match) (cdr match) data)))
+    (defun trie--wrap-fuzzy-filter (filter)
+      `(lambda (match data) (,filter (car match) (cdr match) data))))
+
+
 
 
 ;;; ----------------------------------------------------------------
@@ -1648,48 +1739,29 @@ default key-data cons cell."
   ;; convert trie from print-form if necessary
   (trie-transform-from-read-warn trie)
 
-  ;; massage rankfun to cope with grouping data
   ;; FIXME: could skip this if REGEXP contains no grouping constructs
-  ;; FIXME: crazy variable name is not needed with lexical scoping
-  (let ((--trie-regexp-search--rankfun rankfun))
-    (when rankfun
-      (setq rankfun
-           (lambda (a b)
-             ;; if car of argument contains a key+group list rather than a
-             ;; straight key, remove group list
-             ;; FIXME: the test for straight key, below, will fail if the key
-             ;;        is a list, and the first element of the key is itself
-             ;;        a list (there might be no easy way to fully fix
-             ;;        this...)
-             (unless (or (atom (car a))
-                         (and (listp (car a))
-                              (not (sequencep (caar a)))))
-               (setq a (cons (caar a) (cdr a))))
-             (unless (or (atom (car b))
-                         (and (listp (car b))
-                              (not (sequencep (caar b)))))
-               (setq b (cons (caar b) (cdr b))))
-             ;; call rankfun on massaged arguments
-             (funcall --trie-regexp-search--rankfun a b))))
-
-    ;; accumulate results
-    (trie--accumulate-results
-     rankfun maxnum reverse filter resultfun accumulator nil
-     (trie--do-regexp-search
-      (trie--root trie)
-      (tNFA-from-regexp regexp :test (trie--construct-equality-function
-                                     (trie--comparison-function trie)))
-      (cond ((stringp regexp) "") ((listp regexp) ()) (t []))  0
-      (or (and maxnum reverse) (and (not maxnum) (not reverse)))
-      ;; FIXME: Is this a case where it would pay to replace these arguments
-      ;;        with dynamically-scoped variables, to save stack space during
-      ;;        the recursive calls to `trie--do-regexp-search'?
-      ;;        Alternatively, with lexical scoping, we could use a closure
-      ;;        for `trie--do-regexp-search' instead of a function.
-      (trie--comparison-function trie)
-      (trie--lookupfun trie)
-      (trie--mapfun trie)
-      accumulator))))
+  ;; massage rankfun and filter to cope with grouping data
+  (when rankfun (setq rankfun (trie--wrap-regexp-search-rankfun rankfun)))
+  (when filter (setq filter (trie--wrap-regexp-search-filter filter)))
+
+  ;; accumulate results
+  (trie--accumulate-results
+   rankfun maxnum reverse filter resultfun accumulator nil
+   (trie--do-regexp-search
+    (trie--root trie)
+    (tNFA-from-regexp regexp :test (trie--construct-equality-function
+                                   (trie--comparison-function trie)))
+    (cond ((stringp regexp) "") ((listp regexp) ()) (t []))  0
+    (or (and maxnum reverse) (and (not maxnum) (not reverse)))
+    ;; FIXME: Is this a case where it would pay to replace these arguments
+    ;;        with dynamically-scoped variables, to save stack space during
+    ;;        the recursive calls to `trie--do-regexp-search'?  Alternatively,
+    ;;        with lexical scoping, we could use a closure for
+    ;;        `trie--do-regexp-search' instead of a function.
+    (trie--comparison-function trie)
+    (trie--lookupfun trie)
+    (trie--mapfun trie)
+    accumulator)))
 
 
 
@@ -1999,7 +2071,7 @@ reverse order. Returns nil if no results are found.
 
 Returns a list of matches, with elements of the form:
 
-    (KEY DIST . DATA)
+    ((KEY . DIST) . DATA)
 
 where KEY is a matching key from the trie, DATA its associated
 data, and DIST is its Lewenstein distance \(edit distance\) from
@@ -2024,7 +2096,7 @@ lexicographically\).
 If RANKFUN is a function, it must accept two arguments, both of
 the form:
 
-    (KEY DIST . DATA)
+    ((KEY . DIST) . DATA)
 
 where KEY is a key from the trie, DIST is its Lewenstein
 distances from STRING, and DATA is its associated data. RANKFUN
@@ -2032,14 +2104,14 @@ should return non-nil if first argument is ranked 
strictly higher
 than the second, nil otherwise.
 
 The FILTER argument sets a filter function for the matches. If
-supplied, it is called for each possible match with two
-arguments: a KEY and a (DIST . DATA) cons cell. If the filter
-function returns nil, the match is not included in the results,
-and does not count towards MAXNUM.
+supplied, it is called for each possible match with three
+arguments: KEY, DIST and DATA. If the filter function returns
+nil, the match is not included in the results, and does not count
+towards MAXNUM.
 
 RESULTFUN defines a function used to process results before
 adding them to the final result list. If specified, it should
-accept two arguments: a KEY and a (DIST . DATA) cons cell. Its
+accept two arguments: a (KEY . DIST) cons cell and DATA. Its
 return value is what gets added to the final result list, instead
 of the default key-dist-data list."
 
@@ -2048,13 +2120,10 @@ of the default key-dist-data list."
 
   ;; construct rankfun to sort by Lewenstein distance if requested
   (when (eq rankfun t)
-    (setq rankfun `(lambda (a b)
-                    (cond
-                    ((< (cadr a) (cadr b)) t)
-                    ((> (cadr a) (cadr b)) nil)
-                    (t ,(trie-construct-sortfun
-                         (trie--comparison-function trie))
-                       (car a) (car b))))))
+    (setq rankfun (trie--construct-Lewenstein-rankfun
+                  (trie--comparison-function trie))))
+  ;; massage filter function arguments
+  (when filter (setq filter (trie--wrap-fuzzy-filter filter)))
 
   ;; accumulate results
   (trie--accumulate-results
@@ -2091,8 +2160,8 @@ of the default key-dist-data list."
   (if (trie--node-data-p node)
       (when (<= (aref row (1- (length row))) distance)
        (funcall accumulator
-                seq (cons (aref row (1- (length row)))
-                          (trie--node-data node))))
+                (cons seq (aref row (1- (length row))))
+                (trie--node-data node)))
 
     ;; build next row of Lewenstein table
     (setq row (Lewenstein--next-row
@@ -2120,7 +2189,7 @@ defined by TRIE's comparison function, or in reverse 
order if
 REVERSE is non-nil. Calling `trie-stack-pop' pops the top element
 from the stack. Each stack element has the form:
 
-    (KEY DIST . DATA)
+    ((KEY . DIST) . DATA)
 
 where KEY is a matching key from the trie, DATA its associated
 data, and DIST is its Lewenstein distance \(edit distance\) from
@@ -2216,8 +2285,8 @@ within Lewenstein distance \(edit distance\) DISTANCE of 
STRING."
 
        ;; push next fuzzy match onto head of stack
        (when node
-         (push (cons seq (cons (aref row (1- (length row)))
-                               (trie--node-data node)))
+         (push (cons (cons seq (aref row (1- (length row))))
+                     (trie--node-data node))
                store))))))
 
 
@@ -2237,7 +2306,7 @@ if no results are found.
 
 Returns a list of completions, with elements of the form:
 
-    (KEY DIST . DATA)
+    ((KEY . DIST) . DATA)
 
 where KEY is a matching completion from the trie, DATA its
 associated data, and DIST is its Lewenstein distance \(edit
@@ -2262,7 +2331,7 @@ lexicographically\).
 If RANKFUN is a function, it must accept two arguments, both of
 the form:
 
-    (KEY DIST . DATA)
+    ((KEY . DIST) . DATA)
 
 where KEY is a key from the trie, DIST is its Lewenstein
 distances from PREFIX, and DATA is its associated data. RANKFUN
@@ -2270,14 +2339,14 @@ should return non-nil if first argument is ranked 
strictly higher
 than the second, nil otherwise.
 
 The FILTER argument sets a filter function for the matches. If
-supplied, it is called for each possible match with two
-arguments: a KEY and a (DIST . DATA) cons cell. If the filter
-function returns nil, the match is not included in the results,
-and does not count towards MAXNUM.
+supplied, it is called for each possible match with three
+arguments: KEY, DIST and DATA. If the filter function returns
+nil, the match is not included in the results, and does not count
+towards MAXNUM.
 
 RESULTFUN defines a function used to process results before
 adding them to the final result list. If specified, it should
-accept two arguments: a KEY and a (DIST . DATA) cons cell. Its
+accept two arguments: a (KEY . DIST) cons cell and DATA. Its
 return value is what gets added to the final result list, instead
 of the default key-dist-data list."
 
@@ -2286,13 +2355,10 @@ of the default key-dist-data list."
 
   ;; construct rankfun to sort by Lewenstein distance if requested
   (when (eq rankfun t)
-    (setq rankfun `(lambda (a b)
-                    (cond
-                    ((< (cadr a) (cadr b)) t)
-                    ((> (cadr a) (cadr b)) nil)
-                    (t ,(trie-construct-sortfun
-                         (trie--comparison-function trie))
-                       (car a) (car b))))))
+    (setq rankfun (trie--construct-Lewenstein-rankfun
+                  (trie--comparison-function trie))))
+  ;; massage filter function arguments
+  (when filter (setq filter (trie--wrap-fuzzy-filter filter)))
 
   ;; accumulate results
   (trie--accumulate-results
@@ -2330,8 +2396,8 @@ of the default key-dist-data list."
   (if (trie--node-data-p node)
       (when (<= (aref row (1- (length row))) distance)
        (funcall accumulator
-                seq (cons (aref row (1- (length row)))
-                          (trie--node-data node))))
+                (cons seq (aref row (1- (length row))))
+                (trie--node-data node)))
 
     ;; build next row of Lewenstein table
     (setq row (Lewenstein--next-row
@@ -2354,8 +2420,7 @@ of the default key-dist-data list."
       (when (<= (aref row (1- (length row))) distance)
        (trie--mapc
         (lambda (n s)
-          (funcall accumulator
-                   s (cons pfxcost (trie--node-data n))))
+          (funcall accumulator (cons s pfxcost) (trie--node-data n)))
         mapfun node seq reverse))
       )))
 
@@ -2370,7 +2435,7 @@ defined by TRIE's comparison function, or in reverse 
order if
 REVERSE is non-nil. Calling `trie-stack-pop' pops the top element
 from the stack. Each stack element has the form:
 
-    (KEY DIST . DATA)
+    ((KEY . DIST) . DATA)
 
 where KEY is a matching completion from the trie, DATA its
 associated data, and DIST is the Lewenstein distance \(edit
@@ -2497,7 +2562,7 @@ DISTANCE of PREFIX."
 
        ;; push next fuzzy completion onto head of stack
        (when node
-         (push (cons seq (cons pfxcost (trie--node-data node)))
+         (push (cons (cons seq pfxcost) (trie--node-data node))
                store))))))

[elpa] externals/trie 5e8e73f 081/111: Fix data wrapping handling in fuzzy query functions.

Reply via email to