branch: externals/llm
commit d87261daa4243304193f0492778ba32f4c4e4af9
Author: Andrew Hyatt <[email protected]>
Commit: Andrew Hyatt <[email protected]>

    Fix one cause of missing tokens when streaming for ollama provider
    
    One issue is that if there was content streamed that was incomplete JSON, we
    would never parse the incomplete part.  Now we make sure to only advance 
when we
    successfully parse, and try to be more precise about getting only valid 
JSON.
    
    This does not completely solve the problem, however.  The other causes of
    missing content are currently unknown.
    
    This is a partial fix for https://github.com/s-kostyaev/ellama/issues/8.
---
 llm-ollama.el  | 33 ++++++++++++++++++++++-----------
 llm-request.el | 18 ++++++++++++------
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/llm-ollama.el b/llm-ollama.el
index c004679b49..624aadcbe0 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -145,17 +145,28 @@ STREAMING if non-nil, turn on response streaming."
         (last-position llm-ollama-last-position))
     (with-temp-buffer
       (insert response)
-      (goto-char last-position)
-      (while (search-forward "{" nil t)
-        (backward-char 1)
-        (ignore-errors
-          (let ((obj (json-read)))
-            (unless (eq (assoc-default 'done obj) :json-true)
-              (setq current-response
-                    (concat current-response (assoc-default 'response obj))))))
-        (setq last-position (point))))
-    (setq-local llm-ollama-current-response current-response)
-    (setq-local llm-ollama-last-position last-position)
+      ;; Responses in ollama are always one per line.
+      (let* ((start-pos (save-excursion (goto-char (1- last-position))
+                                        (when (search-forward-regexp (rx (seq 
line-start ?{)) nil t)
+                                          (1- (point)))))
+             (end-pos (save-excursion (goto-char (point-max))
+                                      (when (search-backward-regexp (rx (seq 
"done\":false}" line-end))
+                                                                    start-pos 
t)
+                                        (pos-eol)))))
+        (when (and start-pos end-pos)
+          (setq
+           current-response
+           (concat current-response
+                   (mapconcat
+                    ;; Skip any lines that aren't json objects.
+                    (lambda (line) (when (string-match-p (rx (seq string-start 
?{)) line)
+                                     (assoc-default 'response 
(json-read-from-string line))))
+                    (split-string (buffer-substring-no-properties start-pos 
end-pos) "\n" t))))
+          (setq last-position (1+ end-pos)))))
+    ;; If there is no new content, don't manipulate anything.
+    (when (> (length current-response) (length llm-ollama-current-response))
+      (setq-local llm-ollama-last-position last-position)
+      (setq-local llm-ollama-current-response current-response))
     current-response))
 
 (defun llm-ollama--get-final-response (response)
diff --git a/llm-request.el b/llm-request.el
index 8645b42144..bed467c378 100644
--- a/llm-request.el
+++ b/llm-request.el
@@ -85,12 +85,18 @@ TIMEOUT is the number of seconds to wait for a response."
                                                       :data data
                                                       :timeout timeout)))
 
-(defun llm-request--handle-new-content (&rest _)
-  "Handle new content in the current buffer."
-  (save-match-data
-    (save-excursion
-      (when llm-request--partial-callback
-          (funcall llm-request--partial-callback (llm-request--content))))))
+(defun llm-request--handle-new-content (_ _ pre-change)
+  "Handle new content in the current buffer.
+PRE-CHANGE is the length of text replaced, which for insertions
+is zero."
+  (when (= 0 pre-change)
+    (save-match-data
+      (save-excursion
+        ;; Make sure we actually have any content before invoking a callback.
+        (when (and llm-request--partial-callback
+                   (boundp 'url-http-end-of-headers)
+                   url-http-end-of-headers)
+          (funcall llm-request--partial-callback (llm-request--content)))))))
 
 (cl-defun llm-request-async (url &key headers data on-success on-success-raw 
on-error on-partial)
   "Make a request to URL.

Reply via email to