branch: externals/minuet commit cf16b7b2859fabae8934f28d3cafb83c6f9f72a4 Author: Milan Glacier <d...@milanglacier.com> Commit: Milan Glacier <d...@milanglacier.com>
feat!: Improve completion filtering with before/after context. This commit refactors the completion filtering logic to based on longest common match and add support for filtering based on prefix. Key changes: - Replaces the simple substring removal with a more precise `minuet-find-longest-match` function, which finds the longest common substring between a completion candidate and the surrounding context. - Introduces `minuet-before-cursor-filter-length` to trim redundant prefixes from completions based on the text before the cursor. --- minuet.el | 129 ++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 38 deletions(-) diff --git a/minuet.el b/minuet.el index b53fd07f91..4f10f98c72 100644 --- a/minuet.el +++ b/minuet.el @@ -152,15 +152,37 @@ complete error log." (defcustom minuet-after-cursor-filter-length 15 "Length of context after cursor used to filter completion text. -Defines the length of non-whitespace context after the cursor used to -filter completion text. Set to 0 to disable filtering. - -Example: With after_cursor_filter_length = 3 and context: \"def -fib(n):\\n|\\n\\nfib(5)\" (where | represents cursor position), if the -completion text contains \"fib\", then \"fib\" and subsequent text -will be removed. This setting filters repeated text generated by the -LLM. A large value (e.g., 15) is recommended to avoid false -positives." +This setting helps prevent the language model from generating +redundant text. When filtering completions, the system compares the +suffix of a completion candidate with the text immediately following +the cursor. + +If the length of the longest common substring between the end of the +candidate and the beginning of the post-cursor context exceeds this +value, that common portion is trimmed from the candidate. + +For example, if the value is 15, and a completion candidate ends with +a 20-character string that exactly matches the 20 characters following +the cursor, the candidate will be truncated by those 20 characters +before being presented." + :type 'integer) + +(defcustom minuet-before-cursor-filter-length 3 + "Length of context before cursor used to filter completion text. + +This setting helps prevent the language model from generating +redundant text at the beginning of completion. When filtering +completions, the system compares the prefix of a completion candidate +with the text immediately before the cursor. + +If the length of the longest common substring between the beginning of +the candidate and the end of the pre-cursor context exceeds this +value, that common portion is trimmed from the candidate. + +For example, if the value is 3, and a completion candidate starts with +a 10-character string where the last 3 characters exactly match the 3 +characters before the cursor, the candidate will be truncated by those +3 characters before being presented." :type 'integer) (defcustom minuet-n-completions 3 @@ -688,40 +710,71 @@ conversation with alternating `user` and `assistant` roles by (push (apply #'concat (nreverse parts)) results)) (nreverse results))) -(defun minuet--make-context-filter-sequence (context len) - "Create a filtering string based on CONTEXT with maximum length LEN." - (if-let* ((is-string (stringp context)) - (is-positive (> len 0)) - (context (replace-regexp-in-string "\\`[\s\t\n]+" "" context)) - (should-filter (>= (length context) len)) - (context (substring context 0 len)) - (context (replace-regexp-in-string "[\s\t\n]+\\'" "" context))) - context +(cl-defun minuet--filter-text (item context) + "Filter ITEM based on CONTEXT using minuet-find-longest-match. +ITEM is a completion candidate string. +CONTEXT is a plist with :before-cursor and :after-cursor fields. +Returns the filtered item after trimming overlapping parts." + (when (null item) + (cl-return-from minuet--filter-text nil)) + + (when (null context) + (cl-return-from minuet--filter-text item)) + + (setq item (string-trim item)) + + (let* ((before-cursor (plist-get context :before-cursor)) + (after-cursor (plist-get context :after-cursor)) + (filtered-item item)) + + ;; Filter against before-cursor context (trim from prefix) + (when (and before-cursor + (> minuet-before-cursor-filter-length 0)) + (setq before-cursor (string-trim before-cursor)) + (let* ((match (minuet-find-longest-match filtered-item before-cursor))) + (when (and match + (not (string-empty-p match)) + (>= (length match) minuet-before-cursor-filter-length)) + (setq filtered-item (substring filtered-item (length match)))))) + + ;; Filter against after-cursor context (trim from suffix) + (when (and after-cursor + (> minuet-after-cursor-filter-length 0)) + (setq after-cursor (string-trim after-cursor)) + (let* ((match (minuet-find-longest-match after-cursor filtered-item))) + (when (and match + (not (string-empty-p match)) + (>= (length match) minuet-after-cursor-filter-length)) + (setq filtered-item (substring filtered-item 0 (- (length filtered-item) (length match))))))) + + filtered-item)) + +(cl-defun minuet-find-longest-match (a b) + "Find the longest string that is a prefix of A and a suffix of B. +The function iterates from the longest possible match length downwards +for efficiency. If A or B are not strings, it returns an empty +string." + (unless (and (stringp a) (stringp b)) + (cl-return-from minuet-find-longest-match "")) + (let* ((len-a (length a)) + (len-b (length b)) + (max-len (min len-a len-b))) + (cl-loop for i from max-len downto 1 + for prefix-a = (substring a 0 i) + for suffix-b = (substring b (- i)) + when (string= prefix-a suffix-b) + do (cl-return-from minuet-find-longest-match prefix-a)) "")) -(defun minuet--filter-text (text sequence) - "Remove the SEQUENCE and the rest part from TEXT." - (cond - ((or (null sequence) (null text)) text) - ((equal sequence "") text) - (t - (let ((start (string-match-p (regexp-quote sequence) text))) - (if start - (substring text 0 start) - text))))) - -(defun minuet--filter-sequence-in-items (items sequence) - "For each item in ITEMS, apply `minuet--filter-text' with SEQUENCE." - (mapcar (lambda (x) (minuet--filter-text x sequence)) - items)) - (defun minuet--filter-context-sequence-in-items (items context) "Apply the filter sequence in each item in ITEMS. The filter sequence is obtained from CONTEXT." - (minuet--filter-sequence-in-items - items (minuet--make-context-filter-sequence - (plist-get context :after-cursor) - minuet-after-cursor-filter-length))) + (cl-loop for item in items + for filtered-item = (minuet--filter-text item context) + when (and filtered-item + (not (string= filtered-item ""))) + collect filtered-item)) + (defun minuet--stream-decode (response get-text-fn) "Decode the RESPONSE using GET-TEXT-FN."