branch: externals/minuet
commit cf16b7b2859fabae8934f28d3cafb83c6f9f72a4
Author: Milan Glacier <d...@milanglacier.com>
Commit: Milan Glacier <d...@milanglacier.com>

    feat!: Improve completion filtering with before/after context.
    
    This commit refactors the completion filtering logic to based on
    longest common match and add support for filtering based on prefix.
    
    Key changes:
    
    - Replaces the simple substring removal with a more precise
    `minuet-find-longest-match` function, which finds the longest common
    substring between a completion candidate and the surrounding context.
    
    - Introduces `minuet-before-cursor-filter-length` to trim redundant
    prefixes from completions based on the text before the cursor.
---
 minuet.el | 129 ++++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 38 deletions(-)

diff --git a/minuet.el b/minuet.el
index b53fd07f91..4f10f98c72 100644
--- a/minuet.el
+++ b/minuet.el
@@ -152,15 +152,37 @@ complete error log."
 (defcustom minuet-after-cursor-filter-length 15
   "Length of context after cursor used to filter completion text.
 
-Defines the length of non-whitespace context after the cursor used to
-filter completion text.  Set to 0 to disable filtering.
-
-Example: With after_cursor_filter_length = 3 and context: \"def
-fib(n):\\n|\\n\\nfib(5)\" (where | represents cursor position), if the
-completion text contains \"fib\", then \"fib\" and subsequent text
-will be removed.  This setting filters repeated text generated by the
-LLM.  A large value (e.g., 15) is recommended to avoid false
-positives."
+This setting helps prevent the language model from generating
+redundant text.  When filtering completions, the system compares the
+suffix of a completion candidate with the text immediately following
+the cursor.
+
+If the length of the longest common substring between the end of the
+candidate and the beginning of the post-cursor context exceeds this
+value, that common portion is trimmed from the candidate.
+
+For example, if the value is 15, and a completion candidate ends with
+a 20-character string that exactly matches the 20 characters following
+the cursor, the candidate will be truncated by those 20 characters
+before being presented."
+  :type 'integer)
+
+(defcustom minuet-before-cursor-filter-length 3
+  "Length of context before cursor used to filter completion text.
+
+This setting helps prevent the language model from generating
+redundant text at the beginning of completion.  When filtering
+completions, the system compares the prefix of a completion candidate
+with the text immediately before the cursor.
+
+If the length of the longest common substring between the beginning of
+the candidate and the end of the pre-cursor context exceeds this
+value, that common portion is trimmed from the candidate.
+
+For example, if the value is 3, and a completion candidate starts with
+a 10-character string where the last 3 characters exactly match the 3
+characters before the cursor, the candidate will be truncated by those
+3 characters before being presented."
   :type 'integer)
 
 (defcustom minuet-n-completions 3
@@ -688,40 +710,71 @@ conversation with alternating `user` and `assistant` 
roles by
       (push (apply #'concat (nreverse parts)) results))
     (nreverse results)))
 
-(defun minuet--make-context-filter-sequence (context len)
-  "Create a filtering string based on CONTEXT with maximum length LEN."
-  (if-let* ((is-string (stringp context))
-            (is-positive (> len 0))
-            (context (replace-regexp-in-string "\\`[\s\t\n]+" "" context))
-            (should-filter (>= (length context) len))
-            (context (substring context 0 len))
-            (context (replace-regexp-in-string "[\s\t\n]+\\'" "" context)))
-      context
+(cl-defun minuet--filter-text (item context)
+  "Filter ITEM based on CONTEXT using minuet-find-longest-match.
+ITEM is a completion candidate string.
+CONTEXT is a plist with :before-cursor and :after-cursor fields.
+Returns the filtered item after trimming overlapping parts."
+  (when (null item)
+    (cl-return-from minuet--filter-text nil))
+
+  (when (null context)
+    (cl-return-from minuet--filter-text item))
+
+  (setq item (string-trim item))
+
+  (let* ((before-cursor (plist-get context :before-cursor))
+         (after-cursor (plist-get context :after-cursor))
+         (filtered-item item))
+
+    ;; Filter against before-cursor context (trim from prefix)
+    (when (and before-cursor
+               (> minuet-before-cursor-filter-length 0))
+      (setq before-cursor (string-trim before-cursor))
+      (let* ((match (minuet-find-longest-match filtered-item before-cursor)))
+        (when (and match
+                   (not (string-empty-p match))
+                   (>= (length match) minuet-before-cursor-filter-length))
+          (setq filtered-item (substring filtered-item (length match))))))
+
+    ;; Filter against after-cursor context (trim from suffix)
+    (when (and after-cursor
+               (> minuet-after-cursor-filter-length 0))
+      (setq after-cursor (string-trim after-cursor))
+      (let* ((match (minuet-find-longest-match after-cursor filtered-item)))
+        (when (and match
+                   (not (string-empty-p match))
+                   (>= (length match) minuet-after-cursor-filter-length))
+          (setq filtered-item (substring filtered-item 0 (- (length 
filtered-item) (length match)))))))
+
+    filtered-item))
+
+(cl-defun minuet-find-longest-match (a b)
+  "Find the longest string that is a prefix of A and a suffix of B.
+The function iterates from the longest possible match length downwards
+for efficiency.  If A or B are not strings, it returns an empty
+string."
+  (unless (and (stringp a) (stringp b))
+    (cl-return-from minuet-find-longest-match ""))
+  (let* ((len-a (length a))
+         (len-b (length b))
+         (max-len (min len-a len-b)))
+    (cl-loop for i from max-len downto 1
+             for prefix-a = (substring a 0 i)
+             for suffix-b = (substring b (- i))
+             when (string= prefix-a suffix-b)
+             do (cl-return-from minuet-find-longest-match prefix-a))
     ""))
 
-(defun minuet--filter-text (text sequence)
-  "Remove the SEQUENCE and the rest part from TEXT."
-  (cond
-   ((or (null sequence) (null text)) text)
-   ((equal sequence "") text)
-   (t
-    (let ((start (string-match-p (regexp-quote sequence) text)))
-      (if start
-          (substring text 0 start)
-        text)))))
-
-(defun minuet--filter-sequence-in-items (items sequence)
-  "For each item in ITEMS, apply `minuet--filter-text' with SEQUENCE."
-  (mapcar (lambda (x) (minuet--filter-text x sequence))
-          items))
-
 (defun minuet--filter-context-sequence-in-items (items context)
   "Apply the filter sequence in each item in ITEMS.
 The filter sequence is obtained from CONTEXT."
-  (minuet--filter-sequence-in-items
-   items (minuet--make-context-filter-sequence
-          (plist-get context :after-cursor)
-          minuet-after-cursor-filter-length)))
+  (cl-loop for item in items
+           for filtered-item = (minuet--filter-text item context)
+           when (and filtered-item
+                     (not (string= filtered-item "")))
+           collect filtered-item))
+
 
 (defun minuet--stream-decode (response get-text-fn)
   "Decode the RESPONSE using GET-TEXT-FN."

Reply via email to