branch: externals/llm
commit 38a627409c28d10a0ed8ad51573e83837eebfe49
Author: Andrew Hyatt <[email protected]>
Commit: Andrew Hyatt <[email protected]>
Beginning of a streaming option, with only llm-vertex implemented
---
llm-tester.el | 23 ++++++
llm-vertex.el | 239 ++++++++++++++++++++++++++++++++++++++++++++++------------
llm.el | 8 ++
3 files changed, 220 insertions(+), 50 deletions(-)
diff --git a/llm-tester.el b/llm-tester.el
index 839b1e4627..e5c6eaa02a 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -108,6 +108,29 @@
(message "ERROR: Provider %s returned an empty response" (type-of
provider)))
(message "ERROR: Provider %s did not return any response" (type-of
provider)))))
+(defun llm-tester-chat-streaming (provider)
+ "Test that PROVIDER can stream back LLM chat responses."
+ (message "Testing provider %s for streaming chat" (type-of provider))
+ (let ((accum)
+ (counter 0))
+ (llm-chat-streaming
+ provider
+ (make-llm-chat-prompt
+ :interactions (list
+ (make-llm-chat-prompt-interaction
+ :role 'user
+ :content "Write a poem in iambic pentameter about the
pleasures of using Emacs. The poem should make snide references to vi."))
+ :temperature 0.5
+ :max-tokens 200)
+ (lambda (text)
+ (if text (progn (message "Chunk retrieved")
+ (cl-incf counter)
+ (setq accum text))
+ (message "SUCCESS: Provider %s provided a response %s in %d parts"
+ (type-of provider) accum counter)))
+ (lambda (type message)
+ (message "ERROR: Provider %s returned an error of type %s with message
%s" (type-of provider) type message)))))
+
(defun llm-tester-all (provider)
"Test all llm functionality for PROVIDER."
(llm-tester-embedding-sync provider)
diff --git a/llm-vertex.el b/llm-vertex.el
index 3c465421c8..4c1c6c134e 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -43,6 +43,12 @@ If the binary is not in the PATH, the full path must be
specified."
:type 'string
:group 'llm-vertex)
+(defcustom llm-vertex-example-prelude "Examples of how you should respond
follow."
+ "The prelude to use for examples in Vertex chat prompts.
+This is only used for streaming calls."
+ :type 'string
+ :group 'llm-vertex)
+
(cl-defstruct llm-vertex
"A struct representing a Vertex AI client.
@@ -81,7 +87,7 @@ PROVIDER, VECTOR-CALLBACK, ERROR-CALLBACK are all the same as
`llm-embedding-async'. SYNC, when non-nil, will wait until the
response is available to return."
(llm-vertex-refresh-key provider)
- (request (format
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict"
+ (request (format
"https://%s-aiplatform.googleapis.com/%s/projects/%s/locations/%s/publishers/google/models/%s:predict"
llm-vertex-gcloud-region
(llm-vertex-project provider)
llm-vertex-gcloud-region
@@ -112,66 +118,199 @@ response is available to return."
(lambda (_ error-message) (error error-message)) t)
response))
-(defun llm-vertex--chat (provider prompt response-callback error-callback sync)
- "Get the chat response for PROMPT.
-PROVIDER, RESPONSE-CALLBACK, ERROR-CALLBACK are all the same as
-`llm-chat-async'. SYNC, when non-nil, will wait until
-the response is available to return."
- (llm-vertex-refresh-key provider)
- (let ((request-alist))
+(defun llm-vertex--parameters-ui (prompt)
+ "Return a alist setting parameters, appropriate for the ui API.
+If nothing needs to be set, return nil."
+ (let ((param-struct-alist))
+ (when (llm-chat-prompt-temperature prompt)
+ (push `("temperature" . (("float_val" . ,(llm-chat-prompt-temperature
prompt)))) param-struct-alist))
+ (when (llm-chat-prompt-max-tokens prompt)
+ (push `("maxOutputTokens" . (("int_val" . ,(llm-chat-prompt-max-tokens
prompt)))) param-struct-alist))
+ ;; Wrap in the "parameters" and "struct_val" keys
+ (if param-struct-alist
+ `(("parameters" . (("struct_val" . ,param-struct-alist)))))))
+
+(defun llm-vertex--parameters-v1 (prompt)
+ "Return an alist setting parameters, appropriate for the v1 API.
+If nothing needs to be set, return nil."
+ (let ((param-struct-alist))
+ (when (llm-chat-prompt-temperature prompt)
+ (push `("temperature" . ,(llm-chat-prompt-temperature prompt))
param-struct-alist))
+ (when (llm-chat-prompt-max-tokens prompt)
+ (push `("maxOutputTokens" . ,(llm-chat-prompt-max-tokens prompt))
param-struct-alist))
+ ;; Wrap in the "parameters" and "struct_val" keys
+ (if param-struct-alist
+ `(("parameters" . ,param-struct-alist)))))
+
+(defun llm-vertex--input-ui (prompt)
+ "Return an alist with chat input, appropriate for ui API.
+PROMPT contains the input to the call to the chat API."
+ (let ((system-prompt))
(when (llm-chat-prompt-context prompt)
- (push `("context" . ,(llm-chat-prompt-context prompt)) request-alist))
+ (push (llm-chat-prompt-context prompt) system-prompt))
(when (llm-chat-prompt-examples prompt)
- (push `("examples" . ,(apply #'vector
- (mapcar (lambda (example)
+ (push (concat llm-vertex-example-prelude "\n"
+ (mapconcat (lambda (example)
+ (concat "User:\n" (car example)
"\nAssistant:\n" (cdr example)))
+ (llm-chat-prompt-examples prompt) "\n"))
+ system-prompt))
+ `(("inputs" . ((("struct_val" .
+ (("messages" .
+ (("list_val" .
+ ,(mapcar (lambda (interaction)
+ `(("struct_val" . (("content" .
+ (("string_val" .
+ (,(format "'\"%s\"'"
+
(llm-chat-prompt-interaction-content
+
interaction))))))
+ ("author" .
+ (("string_val" .
+ ,(format "'\"%s\"'"
+ (pcase
(llm-chat-prompt-interaction-role interaction)
+ ('user
"user")
+ ('system
"system")
+ ('assistant
"assistant"))))))))))
+ (if system-prompt
+ (cons (make-llm-chat-prompt-interaction
+ :role 'system
+ :content (mapconcat #'identity
(nreverse system-prompt) "\n"))
+ (llm-chat-prompt-interactions
prompt))
+ (llm-chat-prompt-interactions
prompt))))))))))))))
+
+(defun llm-vertex--input-v1 (prompt)
+ "Return an alist with chat input, appropriate for v1 API.
+PROMPT contains the input to the call to the chat API."
+ (let ((param-alist))
+ (when (llm-chat-prompt-context prompt)
+ (push `("context" . ,(llm-chat-prompt-context prompt)) param-alist))
+ (when (llm-chat-prompt-examples prompt)
+ (push `("examples" . ,(mapcar (lambda (example)
`(("input" . (("content" . ,(car
example))))
("output" . (("content" . ,(cdr
example))))))
- (llm-chat-prompt-examples prompt))))
- request-alist))
- (push `("messages" . ,(apply #'vector
- (mapcar (lambda (interaction)
- `(("author" . (pcase
(llm-chat-prompt-interaction-role interaction)
- ('user "user")
- ('system (error
"System role not supported"))
- ('assistant
"assistant")))
- ("content" .
,(llm-chat-prompt-interaction-content interaction))))
- (llm-chat-prompt-interactions
prompt))))
- request-alist)
- (when (llm-chat-prompt-temperature prompt)
- (push `("temperature" . ,(llm-chat-prompt-temperature prompt))
- request-alist))
- (when (llm-chat-prompt-max-tokens prompt)
- (push `("max_tokens" . ,(llm-chat-prompt-max-tokens prompt))
request-alist))
- (request (format
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict"
- llm-vertex-gcloud-region
- (llm-vertex-project provider)
- llm-vertex-gcloud-region
- (or (llm-vertex-chat-model provider)
"chat-bison"))
- :type "POST"
- :sync sync
- :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key
provider)))
- ("Content-Type" . "application/json"))
- :data (json-encode `(("instances" . [,request-alist])))
- :parser 'json-read
- :success (cl-function (lambda (&key data &allow-other-keys)
- (funcall response-callback
- (cdr (assoc 'content (aref (cdr (assoc
'candidates (aref (cdr (assoc 'predictions data)) 0))) 0))))))
- :error (cl-function (lambda (&key error-thrown data &allow-other-keys)
- (funcall error-callback 'error
- (error (format "Problem calling GCloud
AI: %s, status: %s message: %s (%s)"
- (cdr error-thrown)
- (assoc-default 'status
(assoc-default 'error data))
- (assoc-default 'message
(assoc-default 'error data))
- data))))))))
+ (llm-chat-prompt-examples prompt)))
+ param-alist))
+ (push `("messages" . ,(mapcar (lambda (interaction)
+ `(("author" . ,(pcase
(llm-chat-prompt-interaction-role interaction)
+ ('user "user")
+ ('system (error "System
role not supported"))
+ ('assistant
"assistant")))
+ ("content" .
,(llm-chat-prompt-interaction-content interaction))))
+ (llm-chat-prompt-interactions prompt)))
+ param-alist)
+ `(("instances" . (,param-alist)))))
+
+(defun llm-vertex--request-data-v1 (prompt)
+ "Return all request data to be passed to the v1 API.
+PROMPT contains the data that will be transformed into the result."
+ (append
+ (llm-vertex--input-v1 prompt)
+ (llm-vertex--parameters-v1 prompt)))
+
+(defun llm-vertex--request-data-ui (prompt)
+ "Return all request data to be passed to the ui API.
+PROMPT contains the data that will be transformed into the result."
+ (append
+ (llm-vertex--input-ui prompt)
+ (llm-vertex--parameters-ui prompt)))
+
+(defun llm-vertex--get-response-v1 (response)
+ "Return the actual response from the RESPONSE struct returned."
+ (cdr (assoc 'content (aref (cdr (assoc 'candidates (aref (cdr (assoc
'predictions response)) 0))) 0))))
+
+(defun llm-vertex--get-response-ui (response)
+ "Return the actual response from the RESPONSE struct returned."
+ (pcase (type-of response)
+ ('vector (mapconcat #'llm-vertex--get-response-ui
+ response ""))
+ ('cons (let* ((outputs (cdr (assoc 'outputs response)))
+ (structVal-list (cdr (assoc 'structVal (aref outputs 0))))
+ (candidates (cdr (assoc 'candidates structVal-list)))
+ (listVal (cdr (assoc 'listVal candidates)))
+ (structVal (cdr (assoc 'structVal (aref listVal 0))))
+ (content (cdr (assoc 'content structVal)))
+ (stringVal (aref (cdr (assoc 'stringVal content)) 0)))
+ stringVal))))
+
+(defun llm-vertex--chat (provider prompt response-callback error-callback mode)
+ "Get the chat response for PROMPT.
+PROVIDER, RESPONSE-CALLBACK, ERROR-CALLBACK are all the same as
+`llm-chat-async'.
+
+MODE, is either the symbols sync, async, or streaming. If async or
+streaming, the value will not be returned with the response, but
+sent to RESPONSE-CALLBACK."
+ (llm-vertex-refresh-key provider)
+ (let ((r (request (format
"https://%s-aiplatform.googleapis.com/%s/projects/%s/locations/%s/publishers/google/models/%s:%s"
+ llm-vertex-gcloud-region
+ (if (eq mode 'streaming) "ui" "v1")
+ (llm-vertex-project provider)
+ llm-vertex-gcloud-region
+ (or (llm-vertex-chat-model provider) "chat-bison")
+ (if (eq mode 'streaming) "serverStreamingPredict"
+ "predict"))
+ :type "POST"
+ :sync (eq mode 'sync)
+ :headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider)))
+ ("Content-Type" . "application/json"))
+ :data (json-encode (if (eq mode 'streaming)
+ (llm-vertex--request-data-ui prompt)
+ (llm-vertex--request-data-v1 prompt)))
+ :parser 'json-read
+ :success (cl-function (lambda (&key data &allow-other-keys)
+ ;; If it's streaming, pass back nil,
since we will have passed
+ ;; back everything else.
+ (funcall response-callback
+ (unless (eq mode 'streaming)
+ (llm-vertex--get-response-v1
data)))))
+ :error (cl-function (lambda (&key error-thrown data
&allow-other-keys)
+ (funcall error-callback 'error
+ (error (format "Problem calling
GCloud AI: %s, status: %s message: %s (%s)"
+ (cdr error-thrown)
+ (assoc-default
'status (assoc-default 'error data))
+ (assoc-default
'message (assoc-default 'error data))
+ data))))))))
+ (when (eq mode 'streaming)
+ (with-current-buffer (request-response--buffer r)
+ (add-hook 'after-change-functions
+ (lambda (_ _ _)
+ (let ((start (save-excursion
+ (goto-char (point-min))
+ (search-forward-regexp (rx (seq line-start
"[")) nil t)
+ (beginning-of-line)
+ (point)))
+ (end-of-valid-chunk
+ (save-excursion
+ (point-max)
+ (search-backward-regexp (rx (seq line-start ","))
nil t)
+ (point))))
+ (when (and start end-of-valid-chunk)
+ ;; It'd be nice if our little algorithm always worked,
but doesn't, so let's
+ ;; just ignore when it fails. As long as it mostly
succeeds, it should be fine.
+ (condition-case nil
+ (funcall response-callback
+ (llm-vertex--get-response-ui
(json-read-from-string
+ (concat
+
(buffer-substring-no-properties
+ start
end-of-valid-chunk)
+ ;; Close off
the json
+ "]"))))
+ (error (message "Unparseable buffer saved to
*llm-vertex-unparseable*")
+ (let ((s (buffer-string)))
+ (with-current-buffer (get-buffer-create
"*llm-vertex-unparseable*")
+ (erase-buffer)
+ (insert s)))))))) nil t)))))
(cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback
error-callback)
- (llm-vertex--chat provider prompt response-callback error-callback nil))
+ (llm-vertex--chat provider prompt response-callback error-callback 'async))
+
+(cl-defgeneric llm-chat-streaming (provider prompt response-callback
error-callback)
+ (llm-vertex--chat provider prompt response-callback error-callback
'streaming))
(cl-defmethod llm-chat ((provider llm-vertex) prompt)
(let ((response))
(llm-vertex--chat provider prompt
(lambda (result) (setq response result))
- (lambda (_ error-message) (error
error-message)) t)
+ (lambda (_ error-message) (error
error-message)) 'sync)
response))
(provide 'llm-vertex)
diff --git a/llm.el b/llm.el
index b6a7fe2b56..1087ab7f0a 100644
--- a/llm.el
+++ b/llm.el
@@ -129,6 +129,14 @@ ERROR-CALLBACK receives the error response."
(ignore provider prompt response-callback error-callback)
(signal 'not-implemented nil))
+(cl-defgeneric llm-chat-streaming (provider prompt response-callback
error-callback)
+ "Stream a response to PROMPT from PROVIDER.
+PROMPT is a `llm-chat-prompt'.
+RESPONSE-CALLBACK receives the each piece of the string response.
+ERROR-CALLBACK receives the error response."
+ (ignore provider prompt response-callback error-callback)
+ (signal 'not-implemented nil))
+
(cl-defmethod llm-chat-async ((_ (eql nil)) _ _ _)
"Catch trivial configuration mistake."
(error "LLM provider was nil. Please set the provider in the application
you are using"))