branch: externals/llm
commit 5145074ba0a0f858523e3b572cbbb6716f4e6dad
Merge: 759f689fae a090d3bdbd
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: Andrew Hyatt <ahy...@gmail.com>

    Merge branch 'main' into plz
---
 NEWS.org              |   7 +
 README.org            |   9 +-
 llm-claude.el         | 128 +++++------------
 llm-gemini.el         | 106 +++-----------
 llm-gpt4all.el        |  37 +----
 llm-llamacpp.el       | 137 ++----------------
 llm-ollama.el         | 104 ++++----------
 llm-openai.el         | 274 ++++++++++++-----------------------
 llm-provider-utils.el | 388 ++++++++++++++++++++++++++++++++++++++++++--------
 llm-request-plz.el    | 100 -------------
 llm-request.el        |  27 ++--
 llm-tester.el         |  33 ++++-
 llm-vertex.el         | 271 +++++++++++++----------------------
 llm.el                |  15 +-
 14 files changed, 665 insertions(+), 971 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index f5e6ce9fd1..8ebe3a47fd 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,5 +1,12 @@
+* Version 0.12.4
+- Refactor of providers to centralize embedding and chat logic.
+- Remove connection buffers after use.
+* Verson 0.12.3
+- Refactor of warn-non-nonfree methods.
+- Add non-free warnings for Gemini and Claude.
 * Version 0.12.2
 - Send connection issues to error callbacks, and fix an error handling issue 
in Ollama.
+- Fix issue where, in some cases, streaming does not work the first time 
attempted.
 * Version 0.12.1
 - Fix issue in =llm-ollama= with not using provider host for sync embeddings.
 - Fix issue in =llm-openai= where were incompatible with some Open 
AI-compatible backends due to assumptions about inconsequential JSON details.
diff --git a/README.org b/README.org
index 2646c043e9..8be41b680d 100644
--- a/README.org
+++ b/README.org
@@ -69,12 +69,7 @@ In addition to the provider, which you may want multiple of 
(for example, to cha
 ** llama.cpp
 [[https://github.com/ggerganov/llama.cpp][llama.cpp]] is a way to run large 
language models locally.  To use it with the =llm= package, you need to start 
the server (with the "--embedding" flag if you plan on using embeddings).  The 
server must be started with a model, so it is not possible to switch models 
until the server is restarted to use the new model.  As such, model is not a 
parameter to the provider, since the model choice is already set once the 
server starts.
 
-Llama.cpp does not have native chat interfaces, so is not as good at 
multi-round conversations as other solutions such as Ollama.  It will perform 
better at single-responses.  However, it does support Open AI's request format 
for models that are good at conversation.  If you are using one of those 
models, you should probably use the Open AI Compatible provider instead to 
connect to Llama CPP.
-
-The parameters default to optional values, so mostly users should just be 
creating a model with ~(make-llm-llamacpp)~.  The parameters are:
-- ~:scheme~: The scheme (http/https) for the connection to llama.cpp.  This 
default to "http".
-- ~:host~: The host that llama.cpp server is run on.  This is optional and 
will default to localhost.
-- ~:port~: The port that llama.cpp server is run on.  This is optional and 
will default to 8080, the default llama.cpp port.
+There is a deprecated provider, however it is no longer needed.  Instead, 
llama cpp is Open AI compatible, so the Open AI Compatible provider should work.
 ** Fake
 This is a client that makes no call, but it just there for testing and 
debugging.  Mostly this is of use to programmatic clients of the llm package, 
but end users can also use it to understand what will be sent to the LLMs.  It 
has the following parameters:
 - ~:output-to-buffer~: if non-nil, the buffer or buffer name to append the 
request sent to the LLM to.
@@ -128,7 +123,7 @@ Conversations can take place by repeatedly calling 
~llm-chat~ and its variants.
 ** Caution about ~llm-chat-prompt-interactions~
 The interactions in a prompt may be modified by conversation or by the 
conversion of the context and examples to what the LLM understands.  Different 
providers require different things from the interactions.  Some can handle 
system prompts, some cannot.  Some may have richer APIs for examples and 
context, some not.  Do not attempt to read or manipulate 
~llm-chat-prompt-interactions~ after initially setting it up for the first 
time, because you are likely to make changes that only work fo [...]
 ** Function calling
-*Note: function calling functionality is currently alpha quality.  If you want 
to use function calling, please watch the =llm= 
[discussion](https://github.com/ahyatt/llm/discussions) section for any 
announcements about changes.*
+*Note: function calling functionality is currently alpha quality.  If you want 
to use function calling, please watch the =llm= 
[[https://github.com/ahyatt/llm/discussions][discussions]] for any 
announcements about changes.*
 
 Function calling is a way to give the LLM a list of functions it can call, and 
have it call the functions for you.  The standard interaction has the following 
steps:
 1. The client sends the LLM a prompt with functions it can call.
diff --git a/llm-claude.el b/llm-claude.el
index 30d3912947..96f653d0e9 100644
--- a/llm-claude.el
+++ b/llm-claude.el
@@ -31,20 +31,19 @@
 (require 'rx)
 
 ;; Models defined at https://docs.anthropic.com/claude/docs/models-overview
-(cl-defstruct llm-claude
+(cl-defstruct (llm-claude (:include llm-standard-chat-provider))
   (key nil :read-only t)
   (chat-model "claude-3-opus-20240229" :read-only t))
 
-(defun llm-claude-check-key (provider)
+(cl-defmethod llm-nonfree-message-info ((_ llm-claude))
+  "https://www.anthropic.com/legal/consumer-terms";)
+
+(cl-defmethod llm-provider-prelude ((provider llm-claude))
   "Check if the API key is valid, error if not."
   (unless (llm-claude-key provider)
     (error "No API key provided for Claude")))
 
-(defun llm-claude-request (provider prompt stream)
-  "Return the request (as an elisp JSON-convertable object).
-PROVIDER contains the model name.
-PROMPT is a `llm-chat-prompt' struct.
-STREAM is a boolean indicating whether the response should be streamed."
+(cl-defmethod llm-provider-chat-request ((provider llm-claude) prompt stream)
   (let ((request `(("model" . ,(llm-claude-chat-model provider))
                    ("stream" . ,(if stream t :json-false))
                    ;; Claude requires max_tokens
@@ -61,98 +60,39 @@ STREAM is a boolean indicating whether the response should 
be streamed."
       (push `("temperature" . ,(llm-chat-prompt-temperature prompt)) request))
     request))
 
-(defun llm-claude-get-response (response)
-  "Return the content of the response from the returned value."
+(cl-defmethod llm-provider-chat-extract-result ((_ llm-claude) response)
   (let ((content (aref (assoc-default 'content response) 0)))
     (if (equal (assoc-default 'type content) "text")
         (assoc-default 'text content)
       (format "Unsupported non-text response: %s" content))))
 
-(cl-defmethod llm-chat ((provider llm-claude) prompt)
-  (llm-claude-check-key provider)
-  (let ((content (llm-claude-get-response
-                  (llm-request-plz-sync "https://api.anthropic.com/v1/messages";
-                                        :headers `(("x-api-key" . 
,(llm-claude-key provider))
-                                                   ("anthropic-version" . 
"2023-06-01"))
-                                        :data (llm-claude-request provider 
prompt nil)))))
-    (llm-provider-utils-append-to-prompt prompt content)
-    content))
-
-(cl-defmethod llm-chat-async ((provider llm-claude) prompt response-callback 
error-callback)
-  (llm-claude-check-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async
-     "https://api.anthropic.com/v1/messages";
-     :headers `(("x-api-key" . ,(llm-claude-key provider))
-                ("anthropic-version" . "2023-06-01"))
-     :data (llm-claude-request provider prompt nil)
-     :on-success
-     (lambda (response)
-       (let ((content (llm-claude-get-response response)))
-         (llm-provider-utils-append-to-prompt prompt content)
-         (llm-request-plz-callback-in-buffer
-          buf
-          response-callback
-          content)))
-     :on-error
-     (lambda (_ msg)
-       (message "Error: %s" msg)
-       (let ((error (assoc-default 'error msg)))
-         (llm-request-plz-callback-in-buffer
-          buf error-callback
-          'error
-          (format "%s: %s" (assoc-default 'type error)
-                  (assoc-default 'message error))))))))
-
-;; see https://docs.anthropic.com/claude/reference/messages-streaming
-(cl-defmethod llm-chat-streaming ((provider llm-claude) prompt partial-callback
-                                  response-callback error-callback)
-  (llm-claude-check-key provider)
-  (let ((buf (current-buffer))
-        (in-flight-message ""))
-    (llm-request-plz-event-stream
-     "https://api.anthropic.com/v1/messages";
-     :headers `(("x-api-key" . ,(llm-claude-key provider))
-                ("anthropic-version" . "2023-06-01"))
-     :data (llm-claude-request provider prompt t)
-     :event-stream-handlers
-     ;; We ignore many types of messages; these might become important if 
Claude
-     ;; sends a few different alternate contents, but for now they don't do
-     ;; that.
-     `((message_start . ,(lambda (_)))
-       (content_block_start . ,(lambda (_)))
-       (ping . ,(lambda (_)))
-       (message_stop . ,(lambda (_)))
-       (content_block_stop . ,(lambda (_)))
-       (content_block_delta .
-        ,(lambda (data)
-           (setq in-flight-message
-                 (concat in-flight-message
-                         (let* ((json (json-parse-string data :object-type 
'alist))
-                                (delta (assoc-default 'delta json))
-                                (type (assoc-default 'type delta)))
-                           (when (eql type 'text_delta)
-                             (assoc-default 'text delta)))))
-           (llm-request-plz-callback-in-buffer
-            buf
-            partial-callback
-            in-flight-message))))
-     :on-success
-     (lambda (_)
-       (llm-provider-utils-append-to-prompt prompt in-flight-message)
-       (llm-request-plz-callback-in-buffer
-        buf
-        response-callback
-        in-flight-message))
-     :on-error
-     (lambda (_ msg)
-       (message "Error: %s" msg)
-       (let ((error (assoc-default 'error msg)))
-         (llm-request-plz-callback-in-buffer
-          buf error-callback
-          'error
-          (format "%s: %s" (assoc-default 'type error)
-                  (assoc-default 'message error))))))))
+(cl-defmethod llm-provider-streaming-media-handler ((_ llm-claude) 
msg-receiver _)
+  (cons 'text/event-stream
+       (plz-event-source:text/event-stream
+        :events `((message_start . ignore)
+                  (content_block_start . ignore)
+                  (ping . ignore)
+                  (message_stop . ignore)
+                  (content_block_stop . ignore)
+                  (content_block_delta
+                   .
+                   ,(lambda (_ event)
+                      (let* ((data (plz-event-source-event-data event))
+                            (json (json-parse-string data :object-type 'alist))
+                             (delta (assoc-default 'delta json))
+                             (type (assoc-default 'type delta)))
+                        (when (equal type "text_delta")
+                          (funcall msg-receiver (assoc-default 'text 
delta))))))))))
+
+(cl-defmethod llm-provider-headers ((provider llm-claude))
+  `(("x-api-key" . ,(llm-claude-key provider))
+    ("anthropic-version" . "2023-06-01")))
+
+(cl-defmethod llm-provider-extact-error ((_ llm-claude) response)
+  (assoc-default 'error response))
+
+(cl-defmethod llm-provider-chat-url ((_ llm-claude))
+  "https://api.anthropic.com/v1/messages";)
 
 ;; See https://docs.anthropic.com/claude/docs/models-overview
 (cl-defmethod llm-chat-token-limit ((provider llm-claude))
diff --git a/llm-gemini.el b/llm-gemini.el
index 7e86b3b112..9f0d025372 100644
--- a/llm-gemini.el
+++ b/llm-gemini.el
@@ -33,46 +33,29 @@
 (require 'llm-provider-utils)
 (require 'json)
 
-(cl-defstruct llm-gemini
+(cl-defstruct (llm-gemini (:include llm-google))
   "A struct representing a Gemini client.
 
 KEY is the API key for the client.
 You can get this at https://makersuite.google.com/app/apikey.";
   key (embedding-model "embedding-001") (chat-model "gemini-pro"))
 
-(defun llm-gemini--embedding-url (provider)
+(cl-defmethod llm-nonfree-message-info ((_ llm-gemini))
+  "https://policies.google.com/terms/generative-ai";)
+
+(cl-defmethod llm-provider-embedding-url ((provider llm-gemini))
   "Return the URL for the EMBEDDING request for STRING from PROVIDER."
   (format 
"https://generativelanguage.googleapis.com/v1beta/models/%s:embedContent?key=%s";
           (llm-gemini-embedding-model provider)
           (llm-gemini-key provider)))
 
-(defun llm-gemini--embedding-request (provider string)
-  "Return the embedding request for STRING, using PROVIDER."
+(cl-defmethod llm-provider-embedding-request ((provider llm-gemini) string)
   `((model . ,(llm-gemini-embedding-model provider))
     (content . ((parts . (((text . ,string))))))))
 
-(defun llm-gemini--embedding-response-handler (response)
-  "Handle the embedding RESPONSE from Gemini."
+(cl-defmethod llm-provider-embedding-extract-result ((provider llm-gemini) 
response)
   (assoc-default 'values (assoc-default 'embedding response)))
 
-(cl-defmethod llm-embedding ((provider llm-gemini) string)
-  (llm-vertex--handle-response
-   (llm-request-plz-sync (llm-gemini--embedding-url provider)
-                         :data (llm-gemini--embedding-request provider string))
-   #'llm-gemini--embedding-response-handler))
-
-(cl-defmethod llm-embedding-async ((provider llm-gemini) string 
vector-callback error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async (llm-gemini--embedding-url provider)
-                           :data (llm-gemini--embedding-request provider 
string)
-                           :on-success (lambda (data)
-                                         (llm-request-callback-in-buffer
-                                          buf vector-callback 
(llm-gemini--embedding-response-handler data)))
-                           :on-error (lambda (_ data)
-                                       (llm-request-callback-in-buffer
-                                        buf error-callback
-                                        'error (llm-vertex--error-message 
data))))))
-
 ;; from https://ai.google.dev/tutorials/rest_quickstart
 (defun llm-gemini--chat-url (provider streaming-p)
   "Return the URL for the chat request, using PROVIDER.
@@ -82,7 +65,13 @@ If STREAMING-P is non-nil, use the streaming endpoint."
           (if streaming-p "streamGenerateContent" "generateContent")
           (llm-gemini-key provider)))
 
-(cl-defmethod llm-provider-utils-populate-function-calls ((_ llm-gemini) 
prompt calls)
+(cl-defmethod llm-provider-chat-url ((provider llm-gemini))
+  (llm-gemini--chat-url provider nil))
+
+(cl-defmethod llm-provider-chat-streaming-url ((provider llm-gemini))
+  (llm-gemini--chat-url provider t))
+
+(cl-defmethod llm-provider-populate-function-calls ((_ llm-gemini) prompt 
calls)
   (llm-provider-utils-append-to-prompt
    prompt
    ;; For Vertex there is just going to be one call
@@ -93,76 +82,17 @@ If STREAMING-P is non-nil, use the streaming endpoint."
                  (args . ,(llm-provider-utils-function-call-args fc))))))
            calls)))
 
-(defun llm-gemini--chat-request (prompt)
-  "Return the chat request for PROMPT."
+(cl-defmethod llm-provider-chat-request ((_ llm-gemini) _ _)
   (mapcar (lambda (c) (if (eq (car c) 'generation_config)
                           (cons 'generationConfig (cdr c))
                         c))
-          (llm-vertex--chat-request prompt)))
-
-(cl-defmethod llm-chat ((provider llm-gemini) prompt)
-  (llm-vertex--process-and-return
-   provider prompt
-   (llm-vertex--get-chat-response
-    (llm-request-plz-sync (llm-gemini--chat-url provider nil)
-                          :data (llm-gemini--chat-request prompt)))))
-
-(cl-defmethod llm-chat-async ((provider llm-gemini) prompt response-callback 
error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async (llm-gemini--chat-url provider nil)
-                           :data (llm-gemini--chat-request prompt)
-                           :on-success (lambda (data)
-                                         (llm-request-callback-in-buffer
-                                          buf response-callback
-                                          (llm-vertex--process-and-return
-                                           provider prompt
-                                           (llm-vertex--get-chat-response 
data))))
-                           :on-error (lambda (_ data)
-                                       (llm-request-callback-in-buffer buf 
error-callback 'error
-                                                                       
(llm-vertex--error-message data))))))
-
-(cl-defmethod llm-chat-streaming ((provider llm-gemini) prompt 
partial-callback response-callback error-callback)
-  (let ((buf (current-buffer))
-        (streamed-text "")
-        (function-call nil))
-    (llm-request-plz-json-array
-     (llm-gemini--chat-url provider t)
-     :data (llm-gemini--chat-request prompt)
-     :on-element (lambda (element)
-                   (if (alist-get 'error element)
-                       (llm-request-callback-in-buffer buf error-callback 
'error
-                                                       
(llm-vertex--error-message element))
-                     (when-let ((response (llm-vertex--get-chat-response 
element)))
-                       (if (stringp response)
-                           (when (> (length response) 0)
-                             (setq streamed-text (concat streamed-text 
response))
-                             (llm-request-callback-in-buffer buf 
partial-callback streamed-text))
-                         (setq function-call response)))))
-     :on-success (lambda (data)
-                   (llm-request-callback-in-buffer
-                    buf response-callback
-                    (llm-vertex--process-and-return
-                     provider prompt (or function-call
-                                         (if (> (length streamed-text) 0)
-                                             streamed-text
-                                           (llm-vertex--get-chat-response 
data))))))
-     :on-error (lambda (_ data)
-                 (llm-request-callback-in-buffer buf error-callback 'error
-                                                 (llm-vertex--error-message 
data))))))
-
-(defun llm-gemini--count-token-url (provider)
-  "Return the URL for the count token call, using PROVIDER."
+          (cl-call-next-method)))
+
+(cl-defmethod llm-google-count-tokens-url ((provider llm-gemini))
   (format 
"https://generativelanguage.googleapis.com/v1beta/models/%s:countTokens?key=%s";
           (llm-gemini-chat-model provider)
           (llm-gemini-key provider)))
 
-(cl-defmethod llm-count-tokens ((provider llm-gemini) string)
-  (llm-vertex--handle-response
-   (llm-request-plz-sync (llm-gemini--count-token-url provider)
-                         :data (llm-vertex--to-count-token-request
-                                (llm-vertex--chat-request 
(llm-make-simple-chat-prompt string))))
-   #'llm-vertex--count-tokens-extract-response))
-
 (cl-defmethod llm-name ((_ llm-gemini))
   "Return the name of PROVIDER."
   "Gemini")
diff --git a/llm-gpt4all.el b/llm-gpt4all.el
index 45ed81ab19..c482abb2c6 100644
--- a/llm-gpt4all.el
+++ b/llm-gpt4all.el
@@ -35,7 +35,7 @@
 (require 'llm-openai)
 (require 'llm-provider-utils)
 
-(cl-defstruct llm-gpt4all
+(cl-defstruct (llm-gpt4all (:include llm-openai-compatible))
   "A structure for holding information needed by GPT4All.
 
 CHAT-MODEL is the model to use for chat queries. It must be set.
@@ -44,39 +44,12 @@ URL is the host to connect to.  If unset, it will default 
to http://localhost.
 
 PORT is the port to connect to (an integer). If unset, it will
 default the default GPT4all port."
-  chat-model host port)
+  host port)
 
-(defun llm-gpt4all--url (provider path)
+(cl-defmethod llm-provider-chat-url ((provider llm-gpt4all))
   "Return the URL for PATH, given the settings in PROVIDER."
-  (format "http://%s:%d/v1/%s"; (or (llm-gpt4all-host provider) "localhost")
-          (or (llm-gpt4all-port provider) 4891) path))
-
-(cl-defmethod llm-chat ((provider llm-gpt4all) prompt)
-  (let ((response (llm-openai--handle-response
-                   (llm-request-sync (llm-gpt4all--url provider 
"chat/completions")
-                                     :data (llm-openai--chat-request 
(llm-gpt4all-chat-model provider) prompt))
-                   #'llm-openai--extract-chat-response)))
-    (setf (llm-chat-prompt-interactions prompt)
-          (append (llm-chat-prompt-interactions prompt)
-                  (list (make-llm-chat-prompt-interaction :role 'assistant 
:content response))))
-    response))
-
-(cl-defmethod llm-chat-async ((provider llm-gpt4all) prompt response-callback 
error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-async (llm-gpt4all--url provider "chat/completions")
-                       :data (llm-openai--chat-request (llm-gpt4all-chat-model 
provider) prompt)
-      :on-success (lambda (data)
-                    (let ((response (llm-openai--extract-chat-response data)))
-                      (setf (llm-chat-prompt-interactions prompt)
-                            (append (llm-chat-prompt-interactions prompt)
-                                    (list (make-llm-chat-prompt-interaction 
:role 'assistant :content response))))
-                      (llm-request-callback-in-buffer buf response-callback 
response)))
-      :on-error (lambda (_ data)
-                  (let ((errdata (cdr (assoc 'error data))))
-                    (llm-request-callback-in-buffer buf error-callback 'error
-                             (format "Problem calling GPT4All: %s message: %s"
-                                     (cdr (assoc 'type errdata))
-                                     (cdr (assoc 'message errdata)))))))))
+  (format "http://%s:%d/v1/chat/completions"; (or (llm-gpt4all-host provider) 
"localhost")
+          (or (llm-gpt4all-port provider) 4891)))
 
 (cl-defmethod llm-chat-streaming ((provider llm-gpt4all) prompt 
_partial-callback response-callback error-callback)
   ;; GPT4All does not implement streaming, so instead we just use the async 
method.
diff --git a/llm-llamacpp.el b/llm-llamacpp.el
index a1952b12ba..8ddd01fd45 100644
--- a/llm-llamacpp.el
+++ b/llm-llamacpp.el
@@ -27,10 +27,13 @@
 
 (require 'llm)
 (require 'cl-lib)
+(require 'llm-openai)
 (require 'llm-request)
 (require 'llm-provider-utils)
 (require 'json)
 
+(lwarn 'llm-llamacpp :warning "The LlamaCPP module is deprecated, you should 
use the Open AI Compatible provider instead")
+
 (defgroup llm-llamacpp nil
   "LLM implementation for llama.cpp."
   :group 'llm)
@@ -45,11 +48,12 @@
 This is needed because there is no API support for previous chat conversation."
   :type 'string)
 
-(cl-defstruct llm-llamacpp
+;; Obsolete, llm-openai-compatible can be used directly instead.
+(cl-defstruct (llm-llamacpp (:include llm-openai-compatible))
   "A struct representing a llama.cpp instance."
   (scheme "http") (host "localhost") (port 8080))
 
-(defun llm-llamacpp-url (provider path)
+(defun llm-llamacpp--url (provider path)
   "From PROVIDER, return the URL for llama.cpp.
 PATH is the path to append to the URL, not prefixed with a slash."
   (let ((scheme (llm-llamacpp-scheme provider))
@@ -57,130 +61,11 @@ PATH is the path to append to the URL, not prefixed with a 
slash."
         (port (llm-llamacpp-port provider)))
     (format "%s://%s:%d/%s" scheme host port path)))
 
-(defun llm-llamacpp-get-embedding-from-response (response)
-  "From JSON RESPONSE, return the embedding."
-  (let ((embedding (assoc-default 'embedding response)))
-    (when (and (= 0 (aref embedding 0)) (= 0 (aref embedding 1)))
-      (error "llm-llamacpp: embedding might be all 0s, make sure you are 
starting the server with the --embedding flag"))
-    embedding))
-
-(cl-defmethod llm-embedding ((provider llm-llamacpp) string)
-  (llm-llamacpp-get-embedding-from-response
-   (llm-request-sync (llm-llamacpp-url provider "embedding")
-                     :data `((content . ,string)))))
-
-(cl-defmethod llm-embedding-async ((provider llm-llamacpp) string 
vector-callback error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-async (llm-llamacpp-url provider "embedding")
-                       :data `((content . ,string))
-                       :on-success (lambda (data)
-                                   (llm-request-callback-in-buffer
-                                    buf vector-callback 
(llm-llamacpp-get-embedding-from-response data)))
-                       :on-error (lambda (_ _)
-                                   (llm-request-callback-in-buffer
-                                    buf error-callback 'error "Unknown error 
calling llm-llamacpp")))))
-
-(defun llm-llamacpp--prompt-to-text (prompt)
-  "From PROMPT, return the text to send to llama.cpp."
-  (llm-provider-utils-combine-to-user-prompt prompt 
llm-llamacpp-example-prelude)
-  (llm-provider-utils-collapse-history prompt llm-llamacpp-history-prelude)
-  (llm-chat-prompt-interaction-content (car (last 
(llm-chat-prompt-interactions prompt)))))
-
-(defun llm-llamacpp--chat-request (prompt)
-  "From PROMPT, create the chat request data to send."
-  (append
-   `((prompt . ,(llm-llamacpp--prompt-to-text prompt)))
-   (when (llm-chat-prompt-max-tokens prompt)
-     `((max_tokens . ,(llm-chat-prompt-max-tokens prompt))))
-   (when (llm-chat-prompt-temperature prompt)
-     `((temperature . ,(llm-chat-prompt-temperature prompt))))))
-
-(cl-defmethod llm-chat ((provider llm-llamacpp) prompt)
-  (let ((output (assoc-default
-                 'content
-                 (llm-request-sync (llm-llamacpp-url provider "completion")
-                                   :data (llm-llamacpp--chat-request 
prompt)))))
-    (setf (llm-chat-prompt-interactions prompt)
-          (append (llm-chat-prompt-interactions prompt)
-                  (list (make-llm-chat-prompt-interaction
-                         :role 'assistant
-                         :content output))))
-    output))
-
-(cl-defmethod llm-chat-async ((provider llm-llamacpp) prompt response-callback 
error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-async (llm-llamacpp-url provider "completion")
-                       :data (llm-llamacpp--chat-request prompt)
-                       :on-success (lambda (data)
-                                     (let ((response (assoc-default 'content 
data)))
-                                       (setf (llm-chat-prompt-interactions 
prompt)
-                                             (append 
(llm-chat-prompt-interactions prompt)
-                                                     (list 
(make-llm-chat-prompt-interaction
-                                                            :role 'assistant
-                                                            :content 
response))))
-                                       (llm-request-callback-in-buffer
-                                        buf response-callback response)))
-                       :on-error (lambda (_ _)
-                                   (llm-request-callback-in-buffer
-                                    buf error-callback 'error "Unknown error 
calling llm-llamacpp")))))
-
-(defvar-local llm-llamacpp-current-response ""
-  "The response so far from the server.")
-
-(defvar-local llm-llamacpp-last-response 0
-  "The number of the last streaming response we read.
-The responses from OpenAI are not numbered, but we just number
-them from 1 to however many are sent.")
-
-(defun llm-llamacpp--get-partial-chat-response (response)
-  "From raw streaming output RESPONSE, return the partial chat response."
-  (let ((current-response llm-llamacpp-current-response)
-        (last-response llm-llamacpp-last-response))
-    (with-temp-buffer
-      (insert response)
-      (let* ((end-of-chunk-rx (rx (seq "\"stop\":" (0+ space) "false}")))
-             (end-pos (save-excursion (goto-char (point-max))
-                                      (when (search-backward-regexp
-                                             end-of-chunk-rx
-                                             nil t)
-                                        (line-end-position)))))
-        (when end-pos
-          (let ((all-lines (seq-filter
-                            (lambda (line) (string-match-p end-of-chunk-rx 
line))
-                            (split-string (buffer-substring-no-properties 1 
end-pos) "\n"))))
-            (setq current-response
-                  (concat current-response
-                          (mapconcat (lambda (line)
-                                       (assoc-default 'content
-                                                      (json-read-from-string
-                                                       
(replace-regexp-in-string "data: " "" line))))
-                                     (seq-subseq all-lines last-response) "")))
-            (setq last-response (length all-lines))))))
-    (when (> (length current-response) (length llm-llamacpp-current-response))
-        (setq llm-llamacpp-current-response current-response)
-        (setq llm-llamacpp-last-response last-response))
-    current-response))
-
-(cl-defmethod llm-chat-streaming ((provider llm-llamacpp) prompt 
partial-callback response-callback error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-async (llm-llamacpp-url provider "completion")
-                       :data (append (llm-llamacpp--chat-request prompt) 
'((stream . t)))
-                       :on-success-raw (lambda (data)
-                                     (let ((response 
(llm-llamacpp--get-partial-chat-response data)))
-                                       (setf (llm-chat-prompt-interactions 
prompt)
-                                             (append 
(llm-chat-prompt-interactions prompt)
-                                                     (list 
(make-llm-chat-prompt-interaction
-                                                            :role 'assistant
-                                                            :content 
response))))
-                                       (llm-request-callback-in-buffer
-                                        buf response-callback response)))
-                       :on-partial (lambda (data)
-                                     (when-let ((response 
(llm-llamacpp--get-partial-chat-response data)))
-                                       (llm-request-callback-in-buffer
-                                        buf partial-callback response)))
-                       :on-error (lambda (_ _)
-                                   (llm-request-callback-in-buffer
-                                    buf error-callback 'error "Unknown error 
calling llm-llamacpp")))))
+(cl-defmethod llm-provider-embedding-url ((provider llm-llamacpp))
+  (llm-llamacpp--url provider "embedding"))
+
+(cl-defmethod llm-provider-chat-url ((provider llm-llamacpp))
+  (llm-llamacpp--url provider "chat/completions"))
 
 (cl-defmethod llm-name ((_ llm-llamacpp))
   ;; We don't actually know the name of the model, so we have to just name 
Llama
diff --git a/llm-ollama.el b/llm-ollama.el
index f3d7ff4d72..ba409eb040 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -45,7 +45,7 @@
   :type 'integer
   :group 'llm-ollama)
 
-(cl-defstruct llm-ollama
+(cl-defstruct (llm-ollama (:include llm-standard-full-provider))
   "A structure for holding information needed by Ollama's API.
 
 SCHEME is the http scheme to use, a string. It is optional and
@@ -72,41 +72,30 @@ EMBEDDING-MODEL is the model to use for embeddings.  It is 
required."
   (format "%s://%s:%d/api/%s" (llm-ollama-scheme provider )(llm-ollama-host 
provider)
           (llm-ollama-port provider) method))
 
-(defun llm-ollama--embedding-request (provider string)
+(cl-defmethod llm-provider-embedding-url ((provider llm-ollama))
+  (llm-ollama--url provider "embeddings"))
+
+(cl-defmethod llm-provider-chat-url ((provider llm-ollama))
+  (llm-ollama--url provider "chat"))
+
+(cl-defmethod llm-provider-chat-timeout ((_ llm-ollama))
+  llm-ollama-chat-timeout)
+
+(cl-defmethod llm-provider-embedding-request ((provider llm-ollama) string)
   "Return the request to the server for the embedding of STRING.
 PROVIDER is the llm-ollama provider."
   `(("prompt" . ,string)
     ("model" . ,(llm-ollama-embedding-model provider))))
 
-(defun llm-ollama--embedding-extract-response (response)
+(cl-defmethod llm-provider-embedding-extract-result ((_ llm-ollama) response)
   "Return the embedding from the server RESPONSE."
   (assoc-default 'embedding response))
 
-(defun llm-ollama--error-message (data)
-  "Return the error message from DATA."
-  (if (stringp data) data (assoc-default 'error data)))
-
-(cl-defmethod llm-embedding-async ((provider llm-ollama) string 
vector-callback error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async (llm-ollama--url provider "embeddings")
-                           :data (llm-ollama--embedding-request provider 
string)
-                           :on-success (lambda (data)
-                                         (llm-request-callback-in-buffer
-                                          buf vector-callback 
(llm-ollama--embedding-extract-response data)))
-                           :on-error (lambda (type err)
-                                       (llm-request-callback-in-buffer
-                                        buf error-callback type
-                                        (llm-ollama--error-message))))))
-
-(cl-defmethod llm-embedding ((provider llm-ollama) string)
-  (llm-ollama--embedding-extract-response
-   (llm-request-plz-sync (llm-ollama--url provider "embeddings")
-                         :data (llm-ollama--embedding-request provider 
string))))
-
-(defun llm-ollama--chat-request (provider prompt streaming)
-  "From PROMPT, create the chat request data to send.
-PROVIDER is the llm-ollama provider to use.
-STREAMING is a boolean to control whether to stream the response."
+(cl-defmethod llm-provider-chat-extract-result ((_ llm-ollama) response)
+  "Return the chat response from the server RESPONSE"
+  (assoc-default 'content (assoc-default 'message response)))
+
+(cl-defmethod llm-provider-chat-request ((provider llm-ollama) prompt 
streaming)
   (let (request-alist messages options)
     (setq messages
           (mapcar (lambda (interaction)
@@ -127,57 +116,14 @@ STREAMING is a boolean to control whether to stream the 
response."
     (when options (push `("options" . ,options) request-alist))
     request-alist))
 
-(defun llm-ollama--get-response (response)
-  "Return the response from the parsed json RESPONSE."
-  (assoc-default 'content (assoc-default 'message response)))
-
-(cl-defmethod llm-chat ((provider llm-ollama) prompt)
-  ;; We expect to be in a new buffer with the response, which we use to store
-  ;; local variables. The temp buffer won't have the response, but that's fine,
-  ;; we really just need it for the local variables.
-  (with-temp-buffer
-    (let ((output (llm-ollama--get-response
-                   (llm-request-plz-sync-raw-output 
-                    (llm-ollama--url provider "chat")
-                    :data (llm-ollama--chat-request provider prompt nil)
-                    ;; ollama is run on a user's machine, and it can take a 
while.
-                    :timeout llm-ollama-chat-timeout))))
-      (llm-provider-utils-append-to-prompt prompt output)
-      output)))
-
-(cl-defmethod llm-chat-async ((provider llm-ollama) prompt response-callback 
error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async
-     (llm-ollama--url provider "chat")
-     :data (llm-ollama--chat-request provider prompt nil)
-     :timeout llm-ollama-chat-timeout
-     :on-success (lambda (data)
-                   (let ((response (llm-ollama--get-response data)))
-                     (llm-provider-utils-append-to-prompt prompt response)
-                     (llm-request-plz-callback-in-buffer buf response-callback 
response)))
-     :on-error (lambda (code data)
-                 (llm-request-plz-callback-in-buffer
-                  buf error-callback 'error
-                  (llm-ollama--error-message data))))))
-
-(cl-defmethod llm-chat-streaming ((provider llm-ollama) prompt 
partial-callback response-callback error-callback)
-  (let ((buf (current-buffer))
-        (response-text ""))
-    (llm-request-plz-ndjson
-     (llm-ollama--url provider "chat")
-      :data (llm-ollama--chat-request provider prompt t)
-      :on-success (lambda (response)
-                    (llm-provider-utils-append-to-prompt prompt response-text)
-                    (llm-request-callback-in-buffer
-                     buf response-callback
-                     response-text))
-      :on-object (lambda (data)
-                   (when-let ((response (llm-ollama--get-response data)))
-                     (setq response-text (concat response-text response))
-                     (llm-request-callback-in-buffer buf partial-callback 
response-text)))
-      :on-error (lambda (type msg)
-                  (llm-request-callback-in-buffer buf error-callback type
-                                                  (llm-ollama--error-message 
msg))))))
+(cl-defmethod llm-provider-streaming-media-handler ((_ llm-ollama) 
msg-receiver _)
+  (cons 'application/x-ndjson
+        (plz-media-type:application/x-ndjson
+         :handler (lambda (data)
+                     (when-let ((response (assoc-default
+                                           'content
+                                           (assoc-default 'message data))))
+                       (funcall msg-receiver response))))))
 
 (cl-defmethod llm-name ((provider llm-ollama))
   (llm-ollama-chat-model provider))
diff --git a/llm-openai.el b/llm-openai.el
index 7ee30a24b7..107e010356 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -40,7 +40,7 @@
   :type 'string
   :group 'llm-openai)
 
-(cl-defstruct llm-openai
+(cl-defstruct (llm-openai (:include llm-standard-full-provider))
   "A structure for holding information needed by Open AI's API.
 
 KEY is the API key for Open AI, which is required.
@@ -61,34 +61,22 @@ https://api.example.com/v1/chat, then URL should be
 \"https://api.example.com/v1/\".";
   url)
 
-(cl-defmethod llm-nonfree-message-info ((provider llm-openai))
-  (ignore provider)
-  (cons "Open AI" "https://openai.com/policies/terms-of-use";))
+(cl-defmethod llm-nonfree-message-info ((_ llm-openai))
+  "https://openai.com/policies/terms-of-use";)
 
-(defun llm-openai--embedding-request (model string)
+(cl-defmethod llm-provider-embedding-request ((provider llm-openai) string)
   "Return the request to the server for the embedding of STRING.
 MODEL is the embedding model to use, or nil to use the default.."
   `(("input" . ,string)
-    ("model" . ,(or model "text-embedding-3-small"))))
+    ("model" . ,(or (llm-openai-embedding-model provider)
+                    "text-embedding-3-small"))))
 
-(defun llm-openai--embedding-extract-response (response)
+(cl-defmethod llm-provider-embedding-extract-result ((_ llm-openai) response)
   "Return the embedding from the server RESPONSE."
-  (cdr (assoc 'embedding (aref (cdr (assoc 'data response)) 0))))
-
-(defun llm-openai--error-message (err-response)
-  "Return a user-visible error message from ERR-RESPONSE."
-  (if (stringp err-response)
-      err-response
-    (let ((errdata (cdr (assoc 'error err-response))))
-      (format "Open AI returned error: %s message: %s"
-              (cdr (assoc 'type errdata))
-              (cdr (assoc 'message errdata))))))
-
-(defun llm-openai--handle-response (response extractor)
-  "If RESPONSE is an error, throw it, else call EXTRACTOR."
-  (if (cdr (assoc 'error response))
-      (error (llm-openai--error-message response))
-    (funcall extractor response)))
+  (assoc-default 'embedding (aref (assoc-default 'data response) 0)))
+
+(cl-defgeneric llm-openai--check-key (provider)
+  "Check that the key is set for the Open AI provider.")
 
 (cl-defmethod llm-openai--check-key ((provider llm-openai))
   (unless (llm-openai-key provider)
@@ -98,6 +86,10 @@ MODEL is the embedding model to use, or nil to use the 
default.."
   ;; It isn't always the case that a key is needed for Open AI compatible APIs.
   )
 
+(cl-defmethod llm-provider-request-prelude ((provider llm-openai))
+  (llm-openai--check-key provider))
+
+;; Obsolete, but we keep them here for backward compatibility.
 (cl-defgeneric llm-openai--headers (provider)
   "Return the headers to use for a request from PROVIDER.")
 
@@ -105,43 +97,41 @@ MODEL is the embedding model to use, or nil to use the 
default.."
   (when (llm-openai-key provider)
     `(("Authorization" . ,(format "Bearer %s" (llm-openai-key provider))))))
 
+(cl-defmethod llm-provider-headers ((provider llm-openai))
+  (llm-openai--headers provider))
+
+;; Obsolete, but we keep them here for backward compatibility.
 (cl-defgeneric llm-openai--url (provider command)
   "Return the URL for COMMAND for PROVIDER.")
 
 (cl-defmethod llm-openai--url ((_ llm-openai) command)
   (concat "https://api.openai.com/v1/"; command))
 
+(cl-defmethod llm-provider-embedding-url ((provider llm-openai))
+  (llm-openai--url provider "embeddings"))
+
+(cl-defmethod llm-provider-chat-url ((provider llm-openai))
+  (llm-openai--url provider "chat/completions"))
+
 (cl-defmethod llm-openai--url ((provider llm-openai-compatible) command)
   "Return the URL for COMMAND for PROVIDER."
   (concat (llm-openai-compatible-url provider)
           (unless (string-suffix-p "/" (llm-openai-compatible-url provider))
             "/") command))
 
-(cl-defmethod llm-embedding-async ((provider llm-openai) string 
vector-callback error-callback)
-  (llm-openai--check-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async (llm-openai--url provider "embeddings")
-                           :headers (llm-openai--headers provider)
-                           :data (llm-openai--embedding-request 
(llm-openai-embedding-model provider) string)
-                           :on-success (lambda (data)
-                                         (llm-request-plz-callback-in-buffer
-                                          buf vector-callback 
(llm-openai--embedding-extract-response data)))
-                           :on-error (lambda (_ data)
-                                       (llm-request-plz-callback-in-buffer
-                                        buf error-callback 'error
-                                        (llm-openai--error-message data))))))
-
-(cl-defmethod llm-embedding ((provider llm-openai) string)
-  (llm-openai--check-key provider)
-  (llm-openai--handle-response
-   (llm-request-plz-sync (llm-openai--url provider "embeddings")
-                         :headers (llm-openai--headers provider)
-                         :data (llm-openai--embedding-request 
(llm-openai-embedding-model provider) string))
-   #'llm-openai--embedding-extract-response))
-
-(defun llm-openai--chat-request (model prompt &optional streaming)
+(cl-defmethod llm-provider-embedding-error-extractor ((_ llm-openai) 
err-response)
+  (let ((errdata (assoc-default 'error err-response)))
+      (when errdata
+        (format "Open AI returned error: %s message: %s"
+                (cdr (assoc 'type errdata))
+                (cdr (assoc 'message errdata))))))
+
+(cl-defmethod llm-provider-chat-extract-error ((provider llm-openai) 
err-response)
+  (llm-provider-embedding-error-extractor provider err-response))
+
+(cl-defmethod llm-provider-chat-request ((provider llm-openai) prompt 
streaming)
   "From PROMPT, create the chat request data to send.
-MODEL is the model name to use.
+PROVIDER is the Open AI provider.
 FUNCTIONS is a list of functions to call, or nil if none.
 STREAMING if non-nil, turn on response streaming."
   (let (request-alist)
@@ -163,7 +153,8 @@ STREAMING if non-nil, turn on response streaming."
                            `(("name" . 
,(llm-chat-prompt-function-call-result-function-name fc)))))))
                      (llm-chat-prompt-interactions prompt)))
           request-alist)
-    (push `("model" . ,(or model "gpt-3.5-turbo-0613")) request-alist)
+    (push `("model" . ,(or (llm-openai-chat-model provider)
+                          "gpt-3.5-turbo-0613")) request-alist)
     (when (llm-chat-prompt-temperature prompt)
       (push `("temperature" . ,(/ (llm-chat-prompt-temperature prompt) 2.0)) 
request-alist))
     (when (llm-chat-prompt-max-tokens prompt)
@@ -174,19 +165,22 @@ STREAMING if non-nil, turn on response streaming."
             request-alist))
     request-alist))
 
-(defun llm-openai--extract-chat-response (response)
-  "Return chat response from server RESPONSE."
-  (let ((result (cdr (assoc 'content
-                            (cdr (assoc
-                                  'message
-                                  (aref (cdr (assoc 'choices response)) 0))))))
-        (func-result (assoc-default
-                      'tool_calls
-                      (assoc-default 'message
-                                     (aref (assoc-default 'choices response) 
0)))))
-    (or func-result result)))
-
-(cl-defmethod llm-provider-utils-populate-function-calls ((_ llm-openai) 
prompt calls)
+(cl-defmethod llm-provider-chat-extract-result ((_ llm-openai) response)
+  (assoc-default 'content
+                 (assoc-default 'message (aref (cdr (assoc 'choices response)) 
0))))
+
+(cl-defmethod llm-provider-extract-function-calls ((_ llm-openai) response)
+  (mapcar (lambda (call)
+            (let ((function (cdr (nth 2 call))))
+             (make-llm-provider-utils-function-call
+                 :id (assoc-default 'id call)
+                 :name (assoc-default 'name function)
+                 :args (json-read-from-string (assoc-default 'arguments 
function)))))
+          (assoc-default 'tool_calls
+                         (assoc-default 'message
+                                        (aref (assoc-default 'choices 
response) 0)))))
+
+(cl-defmethod llm-provider-populate-function-calls ((_ llm-openai) prompt 
calls)
   (llm-provider-utils-append-to-prompt
    prompt
    (mapcar (lambda (call)
@@ -196,67 +190,6 @@ STREAMING if non-nil, turn on response streaming."
                                         (llm-provider-utils-function-call-args 
call))))))
            calls)))
 
-(defun llm-openai--normalize-function-calls (response)
-  "Transform RESPONSE from what Open AI returns to our neutral format."
-  (if (vectorp response)
-      (mapcar (lambda (call)
-                (let ((function (cl-third call)))
-                  (make-llm-provider-utils-function-call
-                   :id (assoc-default 'id call)
-                   :name (assoc-default 'name function)
-                   :args (json-read-from-string (assoc-default 'arguments 
function)))))
-              response)
-    response))
-
-(defun llm-openai--process-and-return (provider prompt response &optional 
error-callback)
-  "Process RESPONSE from the PROVIDER.
-
-This function adds the response to the prompt, executes any
-functions, and returns the value that the client should get back.
-
-PROMPT is the prompt that needs to be updated with the response."
-  (if (and (consp response) (cdr (assoc 'error response)))
-      (progn
-        (when error-callback
-          (funcall error-callback 'error (llm-openai--error-message response)))
-        response)
-    ;; When it isn't an error
-    (llm-provider-utils-process-result
-     provider prompt
-     (llm-openai--normalize-function-calls
-      (if response
-          (llm-openai--extract-chat-response response)
-        (llm-openai--get-partial-chat-response nil))))))
-
-(cl-defmethod llm-chat-async ((provider llm-openai) prompt response-callback 
error-callback)
-  (llm-openai--check-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async
-     (llm-openai--url provider "chat/completions")
-     :headers (llm-openai--headers provider)
-     :data (llm-openai--chat-request (llm-openai-chat-model provider) prompt)
-     :on-success (lambda (data)
-                   (llm-request-plz-callback-in-buffer
-                    buf response-callback
-                    (llm-openai--process-and-return
-                     provider prompt data error-callback)))
-     :on-error (lambda (_ data)
-                 (llm-request-plz-callback-in-buffer buf error-callback 'error
-                                                       
(llm-openai--error-message data))))))
-
-(cl-defmethod llm-chat ((provider llm-openai) prompt)
-  (llm-openai--check-key provider)
-  (llm-openai--process-and-return
-   provider prompt
-   (llm-request-plz-sync
-    (llm-openai--url provider "chat/completions")
-    :headers (llm-openai--headers provider)
-    :data (llm-openai--chat-request (llm-openai-chat-model provider)
-                                    prompt))))
-
-(defvar-local llm-openai-current-response ""
-  "The response so far from the server.")
-
 (defun llm-openai--get-partial-chat-response (response)
   "Return the text in the partial chat response from RESPONSE.
 RESPONSE can be nil if the response is complete."
@@ -266,66 +199,43 @@ RESPONSE can be nil if the response is complete."
                     (assoc-default 'delta (aref choices 0))))
            (content-or-call (or (assoc-default 'content delta)
                                 (assoc-default 'tool_calls delta))))
-      (when content-or-call
-        (if (stringp content-or-call)
-            (setq llm-openai-current-response
-                  (concat llm-openai-current-response content-or-call))
-          (when (equal "" llm-openai-current-response)
-            (setq llm-openai-current-response (make-vector (length 
content-or-call) nil)))
-          (cl-loop for call in (append content-or-call nil) do
-                   (let* ((index (assoc-default 'index call))
-                          (plist (aref llm-openai-current-response index))
-                          (function (assoc-default 'function call))
-                          (name (assoc-default 'name function))
-                          (id (assoc-default 'id call))
-                          (arguments (assoc-default 'arguments function)))
-                     (when name (setq plist (plist-put plist :name name)))
-                     (when id (setq plist (plist-put plist :id id)))
-                     (setq plist (plist-put plist :arguments
-                                            (concat (plist-get plist 
:arguments)
-                                                    arguments)))
-                     (aset llm-openai-current-response index plist)))))))
-  (if (vectorp llm-openai-current-response)
-      (apply #'vector
-             (mapcar (lambda (plist)
-                       `((id . ,(plist-get plist :id))
-                         (type . function)
-                         (function
-                          .
-                          ((name . ,(plist-get plist :name))
-                           (arguments . ,(plist-get plist :arguments))))))
-                     llm-openai-current-response))
-    llm-openai-current-response))
-
-(cl-defmethod llm-chat-streaming ((provider llm-openai) prompt partial-callback
-                                  response-callback error-callback)
-  (llm-openai--check-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-event-stream
-     (llm-openai--url provider "chat/completions")
-     :headers (llm-openai--headers provider)
-     :data (llm-openai--chat-request (llm-openai-chat-model provider) prompt t)
-     :event-stream-handlers
-     `((message . ,(lambda (data)
-                       (when (not (equal data "[DONE]"))
-                         (when-let ((response 
(llm-openai--get-partial-chat-response
-                                               (json-read-from-string data))))
-                           (when (stringp response)
-                             (llm-request-plz-callback-in-buffer buf 
partial-callback response))))))
-       (error . ,(lambda (data)
-                     (llm-request-plz-callback-in-buffer
-                      buf error-callback 'error data))))
-     :on-error (lambda (_ data)
-                 (llm-request-plz-callback-in-buffer
-                    buf error-callback 'error
-                    (llm-openai--error-message data)))
-     :on-success (lambda (_)
-                   (llm-request-plz-callback-in-buffer
-                    buf
-                    response-callback
-                    (llm-openai--process-and-return
-                     provider prompt nil
-                     error-callback))))))
+      content-or-call)))
+
+(cl-defmethod llm-provider-streaming-media-handler ((_ llm-openai) 
msg-receiver fc-receiver)
+  (cons 'text/event-stream
+       (plz-event-source:text/event-stream
+         :events `((message
+                   .
+                  ,(lambda (_ event)
+                     (let ((data (plz-event-source-event-data event)))
+                       (unless (equal data "[DONE]")
+                         (when-let ((response 
(llm-openai--get-partial-chat-response
+                                               (json-read-from-string data))))
+                            (funcall (if (stringp response) msg-receiver 
fc-receiver) response))))))))))
+
+(cl-defmethod llm-provider-collect-streaming-function-data ((_ llm-openai) 
data)
+  (let ((cvec (make-vector (length (car data)) nil)))
+    (dotimes (i (length (car data)))
+      (setf (aref cvec i) (make-llm-provider-utils-function-call)))
+    (cl-loop for part in data do
+            (cl-loop for call in (append part nil) do
+                     (let* ((index (assoc-default 'index call))
+                            (id (assoc-default 'id call))
+                            (function (assoc-default 'function call))
+                            (name (assoc-default 'name function))
+                            (arguments (assoc-default 'arguments function)))
+                       (when id
+                         (setf (llm-provider-utils-function-call-id (aref cvec 
index)) id))
+                       (when name
+                         (setf (llm-provider-utils-function-call-name (aref 
cvec index)) name))
+                       (setf (llm-provider-utils-function-call-args (aref cvec 
index))
+                             (concat (llm-provider-utils-function-call-args 
(aref cvec index))
+                                     arguments)))))
+    (cl-loop for call in (append cvec nil)
+             do (setf (llm-provider-utils-function-call-args call)
+                      (json-read-from-string 
(llm-provider-utils-function-call-args call)))
+             finally return (when (> (length cvec) 0)
+                  (append cvec nil)))))
 
 (cl-defmethod llm-name ((_ llm-openai))
   "Open AI")
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index 4eb6009d2f..2e6e82e9d6 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -22,8 +22,258 @@
 ;;; Code:
 
 (require 'llm)
+(require 'llm-request-plz)
 (require 'seq)
 
+(cl-defstruct llm-standard-provider
+  "A struct indicating that this is a standard provider.
+This is for dispatch purposes, so this contains no actual data.
+
+This represents any provider, regardless of what it implements.
+
+This should not be used outside of this file.")
+
+(cl-defstruct (llm-standard-chat-provider (:include llm-standard-provider))
+  "A struct for indicating a provider that implements chat.")
+
+(cl-defstruct (llm-standard-full-provider (:include 
llm-standard-chat-provider))
+  "A struct for providers that implements chat and embeddings.")
+
+;; Methods necessary for both embedding and chat requests.
+
+(cl-defgeneric llm-provider-request-prelude (provider)
+  "Execute any prelude code necessary before running a request.")
+
+(cl-defmethod llm-provider-request-prelude ((_ llm-standard-provider))
+  "Do nothing for the standard provider."
+  nil)
+
+(cl-defgeneric llm-provider-headers (provider)
+  "Return the headers for the PROVIDER.")
+
+(cl-defmethod llm-provider-headers ((_ llm-standard-provider))
+  "By default, the standard provider has no headers."
+  nil)
+
+;; Methods for embeddings
+(cl-defgeneric llm-provider-embedding-url (provider)
+  "Return the URL for embeddings for the PROVIDER.")
+
+(cl-defgeneric llm-provider-embedding-request (provider string)
+  "Return the request for the PROVIDER for STRING.")
+
+(cl-defgeneric llm-provider-embedding-extract-error (provider response)
+  "Return an error message from RESPONSE for the PROVIDER.
+
+RESPONSE is a parsed JSON object.
+
+Return nil if there is no error.")
+
+(cl-defmethod llm-provider-embedding-extract-error ((_ 
llm-standard-full-provider) _)
+  "By default, the standard provider has no error extractor."
+  nil)
+
+(cl-defgeneric llm-provider-embedding-extract-result (provider response)
+  "Return the result from RESPONSE for the PROVIDER.")
+
+;; Methods for chat
+
+(cl-defgeneric llm-provider-chat-url (provider)
+  "Return the URL for chat for the PROVIDER.")
+
+(cl-defgeneric llm-provider-chat-streaming-url (provider)
+  "Return the URL for streaming chat for the PROVIDER.")
+
+(cl-defmethod llm-provider-chat-streaming-url ((provider 
llm-standard-chat-provider))
+  "By default, use the same URL as normal chat."
+  (llm-provider-chat-url provider))
+
+(cl-defgeneric llm-provider-chat-timeout (provider)
+  "Return the seconds of timeout for PROVIDER.
+Return nil for the standard timeout.")
+
+(cl-defmethod llm-provider-chat-timeout ((_ llm-standard-provider))
+  "By default, the standard provider has the standard timeout."
+  nil)
+
+(cl-defgeneric llm-provider-chat-request (provider prompt streaming)
+  "Return the request for the PROVIDER for PROMPT.
+STREAMING is true if this is a streaming request.")
+
+(cl-defgeneric llm-provider-chat-extract-error (provider response)
+  "Return an error message from RESPONSE for the PROVIDER.")
+
+(cl-defmethod llm-provider-chat-extract-error ((_ llm-standard-chat-provider) 
_)
+  "By default, the standard provider has no error extractor."
+  nil)
+
+(cl-defgeneric llm-provider-chat-extract-result (provider response)
+  "Return the result from RESPONSE for the PROVIDER.")
+
+(cl-defgeneric llm-provider-append-to-prompt (provider prompt result 
func-results)
+  "Append RESULT to PROMPT for the PROVIDER.
+FUNC-RESULTS is a list of function results, if any.")
+
+(cl-defmethod llm-provider-append-to-prompt ((_ llm-standard-chat-provider) 
prompt result
+                                             &optional func-results)
+  "By default, the standard provider appends to the prompt."
+  (llm-provider-utils-append-to-prompt prompt result func-results))
+
+(cl-defgeneric llm-provider-streaming-media-handler (provider msg-receiver 
fc-receiver)
+  "Return a function that will handle streaming media for PROVIDER.
+
+This should be a cons of the media type as a symbol, and a plist
+of the particular data the media type needs to process the
+streaming media.")
+
+(cl-defmethod llm-provider-streaming-media-handler ((_ 
llm-standard-chat-provider) msg-receiver fc-receiver)
+  "By default, the standard provider has no streaming media handler."
+  nil)
+
+;; Methods for chat function calling
+
+(cl-defgeneric llm-provider-extract-function-calls (provider response)
+  "Return the function calls from RESPONSE for the PROVIDER.
+If there are no function calls, return nil.  If there are
+function calls, return a list of
+`llm-provider-utils-function-call'.")
+
+(cl-defmethod llm-provider-extract-function-calls ((_ 
llm-standard-chat-provider) _)
+  "By default, the standard provider has no function call extractor."
+  nil)
+
+(cl-defgeneric llm-provider-populate-function-calls (provider prompt calls)
+  "For PROVIDER, in PROMPT, record that function CALLS were received.
+This is the recording before the calls were executed.
+CALLS are a list of `llm-provider-utils-function-call'.")
+
+(cl-defgeneric llm-provider-collect-streaming-function-data (provider data)
+  "Transform a list of streaming function call DATA responses.
+
+The DATA responses are a list of whatever is sent to the function
+call handler in `llm-provider-streaming-media-handler'.  This should
+return a list of `llm-chat-function-call' structs.")
+
+(cl-defmethod llm-provider-collect-streaming-function-data ((provider 
llm-standard-chat-provider) data)
+  "By default, there is no streaming function calling."
+  nil)
+
+;; Standard provider implementations of llm functionality
+
+(cl-defmethod llm-embedding ((provider llm-standard-full-provider) string)
+  (llm-provider-request-prelude provider)
+  (let ((response (llm-request-plz-sync
+                   (llm-provider-embedding-url provider)
+                   :timeout (llm-provider-chat-timeout provider)
+                   :headers (llm-provider-headers provider)
+                   :data (llm-provider-embedding-request provider string))))
+    (if-let ((err-msg (llm-provider-embedding-extract-error provider 
response)))
+        (error err-msg)
+      (llm-provider-embedding-extract-result provider response))))
+
+(cl-defmethod llm-embedding-async ((provider llm-standard-full-provider) 
string vector-callback error-callback)
+  (llm-provider-request-prelude provider)
+  (let ((buf (current-buffer)))
+    (llm-request-plz-async
+     (llm-provider-embedding-url provider)
+     :headers (llm-provider-headers provider)
+     :data (llm-provider-embedding-request provider string)
+     :on-success (lambda (data)
+                   (if-let ((err-msg (llm-provider-embedding-extract-error 
provider data)))
+                       (llm-request-callback-in-buffer
+                        buf error-callback 'error
+                        err-msg)
+                     (llm-provider-utils-callback-in-buffer
+                      buf vector-callback
+                      (llm-provider-embedding-extract-result provider data))))
+     :on-error (lambda (_ data)
+                 (llm-provider-utils-callback-in-buffer
+                  buf error-callback 'error
+                  (if (stringp data)
+                      data
+                    (or (llm-provider-embedding-extract-error
+                         provider data)
+                                   "Unknown error")))))))
+
+(cl-defmethod llm-chat ((provider llm-standard-chat-provider) prompt)
+  (llm-provider-request-prelude provider)
+  (let ((response (llm-request-plz-sync (llm-provider-chat-url provider)
+                                        :headers (llm-provider-headers 
provider)
+                                        :data (llm-provider-chat-request 
provider prompt nil))))
+    (if-let ((err-msg (llm-provider-chat-extract-error provider response)))
+        (error err-msg)
+      (llm-provider-utils-process-result provider prompt
+                                         (llm-provider-chat-extract-result
+                                          provider response)
+                                         (llm-provider-extract-function-calls
+                                          provider response)))))
+
+(cl-defmethod llm-chat-async ((provider llm-standard-chat-provider) prompt 
success-callback
+                              error-callback)
+  (llm-provider-request-prelude provider)
+  (let ((buf (current-buffer)))
+    (llm-request-plz-async
+     (llm-provider-chat-url provider)
+     :headers (llm-provider-headers provider)
+     :data (llm-provider-chat-request provider prompt nil)
+     :on-success (lambda (data)
+                   (if-let ((err-msg (llm-provider-chat-extract-error provider 
data)))
+                       (llm-provider-utils-callback-in-buffer
+                        buf error-callback 'error
+                        err-msg)
+                     (llm-provider-utils-callback-in-buffer
+                      buf success-callback
+                      (llm-provider-utils-process-result
+                       provider prompt
+                       (llm-provider-chat-extract-result provider data)
+                       (llm-provider-extract-function-calls provider data)))))
+     :on-error (lambda (_ data)
+                 (llm-provider-utils-callback-in-buffer
+                  buf error-callback 'error
+                  (if (stringp data)
+                      data
+                    (or (llm-provider-chat-extract-error
+                         provider data))
+                    "Unknown error"))))))
+
+(cl-defmethod llm-chat-streaming ((provider llm-standard-chat-provider) prompt 
partial-callback
+                                  response-callback error-callback)
+  (llm-provider-request-prelude provider)
+  (let ((buf (current-buffer))
+        (current-text "")
+        (fc nil))
+    (llm-request-plz-async
+     (llm-provider-chat-streaming-url provider)
+     :headers (llm-provider-headers provider)
+     :data (llm-provider-chat-request provider prompt t)
+     :media-type (llm-provider-streaming-media-handler
+                  provider
+                  (lambda (s)
+                           (when (> (length s) 0)
+                      (setq current-text
+                            (concat current-text s))
+                      (when partial-callback
+                                   (llm-provider-utils-callback-in-buffer
+                                    buf partial-callback current-text))))
+                  (lambda (fc-new) (push fc-new fc)))
+     :on-success
+     (lambda (data)
+       (llm-provider-utils-callback-in-buffer
+        buf response-callback
+        (llm-provider-utils-process-result
+         provider prompt
+         current-text
+         (llm-provider-collect-streaming-function-data
+                 provider (nreverse fc)))))
+     :on-error (lambda (_ data)
+                 (llm-provider-utils-callback-in-buffer
+                  buf error-callback 'error
+                  (if (stringp data)
+                      data
+                    (or (llm-provider-chat-extract-error
+                         provider data))
+                    "Unknown error"))))))
+
 (defun llm-provider-utils-get-system-prompt (prompt &optional example-prelude)
   "From PROMPT, turn the context and examples into a string.
 EXAMPLE-PRELUDE is a string to prepend to the examples."
@@ -59,15 +309,16 @@ If there is an assistance response, do nothing."
                             (eq (llm-chat-prompt-interaction-role interaction) 
'system))
                           (llm-chat-prompt-interactions prompt)))
           (system-content (llm-provider-utils-get-system-prompt prompt 
example-prelude)))
-      (if system-prompt
-          (setf (llm-chat-prompt-interaction-content system-prompt)
-                (concat (llm-chat-prompt-interaction-content system-prompt)
-                        "\n"
-                        system-content))
-        (push (make-llm-chat-prompt-interaction
-               :role 'system
-               :content system-content)
-              (llm-chat-prompt-interactions prompt))))))
+      (when (and system-content (> (length system-content) 0))
+       (if system-prompt
+            (setf (llm-chat-prompt-interaction-content system-prompt)
+                  (concat (llm-chat-prompt-interaction-content system-prompt)
+                          "\n"
+                          system-content))
+          (push (make-llm-chat-prompt-interaction
+                :role 'system
+                :content system-content)
+               (llm-chat-prompt-interactions prompt)))))))
 
 (defun llm-provider-utils-combine-to-user-prompt (prompt &optional 
example-prelude)
   "Add context and examples to a user prompt in PROMPT.
@@ -89,7 +340,7 @@ things.  Providers should probably issue a warning when 
using this."
     (setf (llm-chat-prompt-interactions prompt)
           (list (make-llm-chat-prompt-interaction
                  :role 'user
-                 :content 
+                 :content
                  (concat (or history-prelude "Previous interactions:") "\n\n"
                          (mapconcat (lambda (interaction)
                                       (format "%s: %s" (pcase 
(llm-chat-prompt-interaction-role interaction)
@@ -205,32 +456,37 @@ NAME is the function name.
 ARG is an alist of arguments to values."
   id name args)
 
-(cl-defgeneric llm-provider-utils-populate-function-calls (provider prompt 
calls)
-  "For PROVIDER, in PROMPT, record that function CALLS were received.
-This is the recording before the calls were executed.
-CALLS are a list of `llm-provider-utils-function-call'."
-  (ignore provider prompt calls)
-  (signal 'not-implemented nil))
+(defun llm-provider-utils-process-result (provider prompt text funcalls)
+  "Process the RESPONSE from the provider for PROMPT.
+This execute function calls if there are any, does any result
+appending to the prompt, and returns an appropriate response for
+the client.
+
+FUNCALLS is a list of function calls, if any.
+
+TEXT is the text output from the provider, if any.  There should
+be either FUNCALLS or TEXT."
+  (if-let ((funcalls funcalls))
+      ;; If we have function calls, execute them and return the results, and
+      ;; it talso takes care of updating the prompt.
+      (llm-provider-utils-execute-function-calls provider prompt funcalls)
+    (llm-provider-append-to-prompt provider prompt text)
+    text))
 
-(defun llm-provider-utils-populate-function-results (prompt func result)
+(defun llm-provider-utils-populate-function-results (provider prompt func 
result)
   "Append the RESULT of FUNC to PROMPT.
 FUNC is a `llm-provider-utils-function-call' struct."
-  (llm-provider-utils-append-to-prompt
-   prompt result (make-llm-chat-prompt-function-call-result
-                  :call-id (llm-provider-utils-function-call-id func)
-                  :function-name (llm-provider-utils-function-call-name func)
-                  :result result)))
+  (llm-provider-append-to-prompt
+   provider prompt result
+   (make-llm-chat-prompt-function-call-result
+    :call-id (llm-provider-utils-function-call-id func)
+    :function-name (llm-provider-utils-function-call-name func)
+    :result result)))
 
-(defun llm-provider-utils-process-result (provider prompt response)
-  "From RESPONSE, execute function call.
+(defun llm-provider-utils-execute-function-calls (provider prompt funcalls)
+  "Execute FUNCALLS, a list of `llm-provider-utils-function-calls'.
 
-RESPONSE is either a string or list of
-`llm-provider-utils-function-calls'.
-
-This should be called with any response that might have function
-calls. If the response is a string, nothing will happen, but in
-either case, the response suitable for returning to the client
-will be returned.
+A response suitable for returning to the client will be returned.
 
 PROVIDER is the provider that supplied the response.
 
@@ -240,38 +496,44 @@ function call, the result.
 
 This returns the response suitable for output to the client; a
 cons of functions called and their output."
-  (if (consp response)
-      (progn
-        ;; Then this must be a function call, return the cons of a the funcion
-        ;; called and the result.
-        (llm-provider-utils-populate-function-calls provider prompt response)
-        (cl-loop for func in response collect
-                        (let* ((name (llm-provider-utils-function-call-name 
func))
-                               (arguments 
(llm-provider-utils-function-call-args func))
-                               (function (seq-find
-                                          (lambda (f) (equal name 
(llm-function-call-name f)))
-                                          (llm-chat-prompt-functions prompt))))
-                          (cons name
-                                (let* ((args (cl-loop for arg in 
(llm-function-call-args function)
-                                                      collect (cdr (seq-find 
(lambda (a)
-                                                                               
(eq (intern
-                                                                               
     (llm-function-arg-name arg))
-                                                                               
    (car a)))
-                                                                             
arguments))))
-                                       (result (apply 
(llm-function-call-function function) args)))
-                                  (llm-provider-utils-populate-function-results
-                                   prompt func result)
-                                  (llm--log
-                                   'api-funcall
-                                   :provider provider
-                                   :msg (format "%s --> %s"
-                                                (format "%S"
-                                                        (cons 
(llm-function-call-name function)
-                                                              args))
-                                                (format "%s" result)))
-                                  result)))))
-    (llm-provider-utils-append-to-prompt prompt response)
-    response))
+  (llm-provider-populate-function-calls provider prompt funcalls)
+  (cl-loop for func in funcalls collect
+           (let* ((name (llm-provider-utils-function-call-name func))
+                  (arguments (llm-provider-utils-function-call-args func))
+                  (function (seq-find
+                             (lambda (f) (equal name (llm-function-call-name 
f)))
+                             (llm-chat-prompt-functions prompt))))
+             (cons name
+                   (let* ((args (cl-loop for arg in (llm-function-call-args 
function)
+                                         collect (cdr (seq-find (lambda (a)
+                                                                  (eq (intern
+                                                                       
(llm-function-arg-name arg))
+                                                                      (car a)))
+                                                                arguments))))
+                          (result (apply (llm-function-call-function function) 
args)))
+                     (llm-provider-utils-populate-function-results
+                     provider prompt func result)
+                     (llm--log
+                      'api-funcall
+                      :provider provider
+                      :msg (format "%s --> %s"
+                                   (format "%S"
+                                           (cons (llm-function-call-name 
function)
+                                                 args))
+                                   (format "%s" result)))
+                     result)))))
+
+
+;; This is a useful method for getting out of the request buffer when it's time
+;; to make callbacks.
+(defun llm-provider-utils-callback-in-buffer (buf f &rest args)
+  "Run F with ARSG in the context of BUF.
+But if BUF has been killed, use a temporary buffer instead.
+If F is nil, nothing is done."
+  (when f
+    (if (buffer-live-p buf)
+        (with-current-buffer buf (apply f args))
+      (with-temp-buffer (apply f args)))))
 
 (provide 'llm-provider-utils)
 ;;; llm-provider-utils.el ends here
diff --git a/llm-request-plz.el b/llm-request-plz.el
index ea733f8a31..7224e6dced 100644
--- a/llm-request-plz.el
+++ b/llm-request-plz.el
@@ -159,106 +159,6 @@ only used by other methods in this file."
               (llm-request-plz--handle-error error on-error)))
     :timeout (or timeout llm-request-plz-timeout)))
 
-(cl-defun llm-request-plz-json-array (url &key headers data on-error on-success
-                                          on-element timeout)
-  "Make a request to URL.
-
-HEADERS will be added in the Authorization header, in addition to
-standard json header. This is optional.
-
-DATA will be jsonified and sent as the request body.
-This is required.
-
-ON-SUCCESS will be called with the response body as a json
-object. This is optional in the case that ON-SUCCESS-DATA is set,
-and required otherwise.
-
-ON-ELEMENT will be called with each new element in the enclosing
-JSON array that is being streamed.
-
-ON-ERROR will be called with the error code and a response-body.
-This is required.
-"
-  (llm-request-plz-async url
-                         :headers headers
-                         :data data
-                         :on-error on-error
-                         :on-success on-success
-                         :timeout timeout
-                         :media-type
-                         (cons 'application/json
-                               (plz-media-type:application/json-array
-                                :handler on-element))))
-
-(cl-defun llm-request-plz-ndjson (url &key headers data on-error on-success
-                                      on-object timeout)
-  "Make a request to URL.
-
-HEADERS will be added in the Authorization header, in addition to
-standard json header. This is optional.
-
-DATA will be jsonified and sent as the request body.
-This is required.
-
-ON-SUCCESS will be called with the response body as a json
-object. This is optional in the case that ON-SUCCESS-DATA is set,
-and required otherwise.
-
-ON-OBJECT will be called with each new object received.
-
-ON-ERROR will be called with the error code and a response-body.
-This is required.
-"
-  (llm-request-plz-async url
-                         :headers headers
-                         :data data
-                         :on-error on-error
-                         :on-success on-success
-                         :timeout timeout
-                         :media-type
-                         (cons 'application/x-ndjson
-                               (plz-media-type:application/x-ndjson
-                                :handler on-object))))
-
-(cl-defun llm-request-plz-event-stream (url &key headers data on-error 
on-success
-                                            event-stream-handlers timeout)
-  "Make a request to URL.
-Nothing will be returned.
-
-HEADERS will be added in the Authorization header, in addition to
-standard json header. This is optional.
-
-DATA will be jsonified and sent as the request body.
-This is required.
-
-ON-SUCCESS will be called with the response body as a json
-object. This is optional in the case that ON-SUCCESS-DATA is set,
-and required otherwise.
-
-EVENT-STREAM-HANDLERS are an alist of event names to functions
-that handle the event's corresponding data, which will be called
-with the new event data as a string.
-
-ON-ERROR will be called with the error code and a response-body.
-This is required.
-"
-  (llm-request-plz-async url
-                         :headers headers
-                         :data data
-                         :on-error on-error
-                         :on-success on-success
-                         :timeout timeout
-                         :media-type
-                         (cons 'text/event-stream
-                                (plz-event-source:text/event-stream
-                                 ;; Convert so that each event handler gets 
the body, not the
-                                 ;; `plz-response' itself.
-                                 :events (mapcar
-                                          (lambda (cons)
-                                            (cons (car cons)
-                                                  (lambda (_ resp) (funcall 
(cdr cons) (plz-event-source-event-data resp)))))
-                                          event-stream-handlers)))))
-
 ;; This is a useful method for getting out of the request buffer when it's time
 ;; to make callbacks.
 (defun llm-request-plz-callback-in-buffer (buf f &rest args)
diff --git a/llm-request.el b/llm-request.el
index 6329e89664..449680d460 100644
--- a/llm-request.el
+++ b/llm-request.el
@@ -135,18 +135,24 @@ response body, and expect the response content. This is an
 optional argument, and mostly useful for streaming.  If not set,
 the buffer is turned into JSON and passed to ON-SUCCESS."
   (let ((url-request-method "POST")
-        ;; This is necessary for streaming, otherwise we get gzip'd data that 
is
-        ;; unparseable until the end. The responses should be small enough that
-        ;; this should not be any big loss.
-        (url-mime-encoding-string "identity")
         (url-request-extra-headers
          (append headers '(("Content-Type" . "application/json"))))
-        (url-request-data (encode-coding-string (json-encode data) 'utf-8)))
+        (url-request-data (encode-coding-string (json-encode data) 'utf-8))
+        (old-mime-encoding url-mime-encoding-string))
+    ;; This is necessary for streaming, otherwise we get gzip'd data that is
+    ;; unparseable until the end. The responses should be small enough that 
this
+    ;; should not be any big loss.  We can't use let-binding here, since the 
use
+    ;; of this variable happens asynchronously, so not enclosed by the
+    ;; let-binding.
+    (setq url-mime-encoding-string "identity")
     (let ((buffer
            (url-retrieve
             url
             ;; For some reason the closure you'd expect did not work here.
             (lambda (_ on-success on-error)
+              ;; Restore the old mime encoding.  This may cause race conditions
+              ;; if we try to stream two things at around the same time.
+              (setq url-mime-encoding-string old-mime-encoding)
               ;; No matter what, we need to stop listening for changes.
               (remove-hook 'after-change-functions 
#'llm-request--handle-new-content t)
               (condition-case error
@@ -168,16 +174,5 @@ the buffer is turned into JSON and passed to ON-SUCCESS."
                     nil t)))
       buffer)))
 
-;; This is a useful method for getting out of the request buffer when it's time
-;; to make callbacks.
-(defun llm-request-callback-in-buffer (buf f &rest args)
-  "Run F with ARSG in the context of BUF.
-But if BUF has been killed, use a temporary buffer instead.
-If F is nil, nothing is done."
-  (when f
-    (if (buffer-live-p buf)
-        (with-current-buffer buf (apply f args))
-      (with-temp-buffer (apply f args)))))
-
 (provide 'llm-request)
 ;;; llm-request.el ends here
diff --git a/llm-tester.el b/llm-tester.el
index 01f1ec1474..2688818148 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -145,7 +145,7 @@
        (llm-tester-log "SUCCESS: Provider %s provided a streamed response in 
%d parts:\n%s" (type-of provider) counter streamed)
        (when (and (member 'streaming (llm-capabilities provider))
                   (not (string= streamed text)))
-           (llm-tester-log "ERROR: Provider %s returned a streamed response 
that was not equal to the final response.  Streamed text %s" (type-of provider) 
streamed))
+           (llm-tester-log "ERROR: Provider %s returned a streamed response 
that was not equal to the final response.  Streamed text:\n%sFinal 
response:\n%s" (type-of provider) streamed text))
        (when (and (member 'streaming (llm-capabilities provider)) (= 0 
counter))
            (llm-tester-log "WARNING: Provider %s returned no partial updates!" 
(type-of provider))))
      (lambda (type message)
@@ -153,6 +153,27 @@
          (llm-tester-log "ERROR: Provider %s returned a response not in the 
original buffer" (type-of provider)))
        (llm-tester-log "ERROR: Provider %s returned an error of type %s with 
message %s" (type-of provider) type message)))))
 
+(defun llm-tester-verify-prompt (prompt)
+  "Test PROMPT to make sure there are no obvious problems"
+  (mapc (lambda (i)
+             (when (equal (llm-chat-prompt-interaction-content i) "")
+               (llm-tester-log "ERROR: prompt had an empty interaction")))
+           (llm-chat-prompt-interactions prompt))
+  (when (> (length (seq-filter
+                           (lambda (i)
+                             (eq
+                              (llm-chat-prompt-interaction-role i) 'system))
+                           (llm-chat-prompt-interactions prompt)))
+              1)
+    (llm-tester-log "ERROR: prompt had more than one system interaction"))
+  ;; Test that we don't have two of the same role in a row
+  (let ((last nil))
+    (mapc (lambda (i)
+            (when (eq (llm-chat-prompt-interaction-role i) last)
+              (llm-tester-log "ERROR: prompt had two interactions in a row 
with the same role"))
+            (setq last (llm-chat-prompt-interaction-role i)))
+          (llm-chat-prompt-interactions prompt))))
+
 (defun llm-tester-chat-conversation-sync (provider)
   "Test that PROVIDER can handle a conversation."
   (llm-tester-log "Testing provider %s for conversation" (type-of provider))
@@ -160,10 +181,13 @@
                  "I'm currently testing conversational abilities.  Please 
respond to each message with the ordinal number of your response, so just '1' 
for the first response, '2' for the second, and so on.  It's important that I 
can verify that you are working with the full conversation history, so please 
let me know if you seem to be missing anything."))
         (outputs nil))
     (push (llm-chat provider prompt) outputs)
+    (llm-tester-verify-prompt prompt)
     (llm-chat-prompt-append-response prompt "This is the second message.")
     (push (llm-chat provider prompt) outputs)
+    (llm-tester-verify-prompt prompt)
     (llm-chat-prompt-append-response prompt "This is the third message.")
     (push (llm-chat provider prompt) outputs)
+    (llm-tester-verify-prompt prompt)
     (llm-tester-log "SUCCESS: Provider %s provided a conversation with 
responses %s" (type-of provider)
              (nreverse outputs))))
 
@@ -178,15 +202,19 @@
                     (lambda (response)
                       (push response outputs)
                       (llm-chat-prompt-append-response prompt "This is the 
second message.")
+                     (llm-tester-verify-prompt prompt)
                       (llm-chat-async provider prompt
                                       (lambda (response)
                                         (unless (eq buf (current-buffer))
                                           (llm-tester-log "ERROR: Provider %s 
returned a response not in the original buffer" (type-of provider)))
                                         (push response outputs)
                                         (llm-chat-prompt-append-response 
prompt "This is the third message.")
+                                       (llm-tester-verify-prompt prompt)
                                         (llm-chat-async provider prompt
                                                         (lambda (response)
                                                           (push response 
outputs)
+                                                         
(llm-tester-verify-prompt prompt)
+
                                                           (llm-tester-log 
"SUCCESS: Provider %s provided a conversation with responses %s" (type-of 
provider) (nreverse outputs)))
                                                         (lambda (type message)
                                                           (llm-tester-log 
"ERROR: Provider %s returned an error of type %s with message %s" (type-of 
provider) type message))))
@@ -210,16 +238,19 @@
        (lambda ()
          (goto-char (point-max)) (insert "\n")
          (llm-chat-prompt-append-response prompt "This is the second message.")
+        (llm-tester-verify-prompt prompt)
          (llm-chat-streaming-to-point
           provider prompt
           buf (with-current-buffer buf (point-max))
           (lambda ()
             (goto-char (point-max)) (insert "\n")
             (llm-chat-prompt-append-response prompt "This is the third 
message.")
+           (llm-tester-verify-prompt prompt)
             (llm-chat-streaming-to-point
              provider prompt buf (with-current-buffer buf (point-max))
              (lambda ()
                (llm-tester-log "SUCCESS: Provider %s provided a conversation 
with responses %s" (type-of provider) (buffer-string))
+              (llm-tester-verify-prompt prompt)
                (kill-buffer buf))))))))))
 
 (defun llm-tester-create-test-function-prompt ()
diff --git a/llm-vertex.el b/llm-vertex.el
index 3ee7083f0e..63828c6983 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -1,6 +1,6 @@
 ;;; llm-vertex.el --- LLM implementation of Google Cloud Vertex AI -*- 
lexical-binding: t; package-lint-main-file: "llm.el"; -*-
 
-;; Copyright (c) 2023  Free Software Foundation, Inc.
+;; Copyright (c) 2023  Free Foundation Software, Inc.
 
 ;; Author: Andrew Hyatt <ahy...@gmail.com>
 ;; Homepage: https://github.com/ahyatt/llm
@@ -64,7 +64,11 @@ for more specialized uses."
   :type 'string
   :group 'llm-vertex)
 
-(cl-defstruct llm-vertex
+(cl-defstruct (llm-google (:include llm-standard-full-provider))
+  "A base class for functionality that is common to both Vertex and
+Gemini.")
+
+(cl-defstruct (llm-vertex (:include llm-google))
   "A struct representing a Vertex AI client.
 
 KEY is the temporary API key for the Vertex AI. It is required to
@@ -81,7 +85,9 @@ KEY-GENTIME keeps track of when the key was generated, 
because the key must be r
   (chat-model llm-vertex-default-chat-model)
   key-gentime)
 
-(defun llm-vertex-refresh-key (provider)
+;; API reference: 
https://cloud.google.com/vertex-ai/docs/generative-ai/multimodal/send-chat-prompts-gemini#gemini-chat-samples-drest
+
+(cl-defmethod llm-provider-request-prelude ((provider llm-vertex))
   "Refresh the key in the vertex PROVIDER, if needed."
   (unless (and (llm-vertex-key provider)
                (> (* 60 60)
@@ -94,64 +100,38 @@ KEY-GENTIME keeps track of when the key was generated, 
because the key must be r
       (setf (llm-vertex-key provider) (encode-coding-string result 'utf-8)))
     (setf (llm-vertex-key-gentime provider) (current-time))))
 
-(cl-defmethod llm-nonfree-message-info ((provider llm-vertex))
-  (ignore provider)
-  (cons "Google Cloud Vertex" 
"https://policies.google.com/terms/generative-ai";))
+(cl-defmethod llm-nonfree-message-info ((_ llm-vertex))
+  "https://policies.google.com/terms/generative-ai";)
 
-(defun llm-vertex--embedding-url (provider)
-  "From the PROVIDER, return the URL to use for embeddings"
+(cl-defmethod llm-provider-embedding-url ((provider llm-vertex))
   (format 
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict";
-                             llm-vertex-gcloud-region
-                             (llm-vertex-project provider)
-                             llm-vertex-gcloud-region
-                             (or (llm-vertex-embedding-model provider) 
"textembedding-gecko")))
-
-(defun llm-vertex--embedding-extract-response (response)
-  "Return the embedding contained in RESPONSE."
-  (cdr (assoc 'values (cdr (assoc 'embeddings (aref (cdr (assoc 'predictions 
response)) 0))))))
-
-(defun llm-vertex--error-message (err-response)
-  "Return a user-visible error message from ERR-RESPONSE."
-  (let ((err (assoc-default 'error err-response)))
+          llm-vertex-gcloud-region
+          (llm-vertex-project provider)
+          llm-vertex-gcloud-region
+          (or (llm-vertex-embedding-model provider) "textembedding-gecko")))
+
+(cl-defmethod llm-provider-embedding-extract-result ((_ llm-vertex) response)
+  (assoc-default 'values (assoc-default 'embeddings (aref (assoc-default 
'predictions response) 0))))
+
+(cl-defmethod llm-provider-embedding-extract-error ((provider llm-google) 
err-response)
+  (llm-provider-chat-extract-error provider err-response))
+
+(cl-defmethod llm-provider-chat-extract-error ((_ llm-google) err-response)
+  (when-let ((err (assoc-default 'error err-response)))
     (format "Problem calling GCloud Vertex AI: status: %s message: %s"
             (assoc-default 'code err)
             (assoc-default 'message err))))
 
-(defun llm-vertex--handle-response (response extractor)
-  "If RESPONSE is an errorp, throw it, else call EXTRACTOR."
-  (if (assoc 'error response)
-      (error (llm-vertex--error-message response))
-    (funcall extractor response)))
-
-(cl-defmethod llm-embedding-async ((provider llm-vertex) string 
vector-callback error-callback)
-  (llm-vertex-refresh-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async
-     (llm-vertex--embedding-url provider)
-     :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key 
provider))))
-     :data `(("instances" . [(("content" . ,string))]))
-     :on-success (lambda (data)
-                   (llm-request-callback-in-buffer
-                    buf vector-callback 
(llm-vertex--embedding-extract-response data)))
-     :on-error (lambda (_ data)
-                 (llm-request-callback-in-buffer
-                  buf error-callback
-                  'error (llm-vertex--error-message data))))))
-
-(cl-defmethod llm-embedding ((provider llm-vertex) string)
-  (llm-vertex-refresh-key provider)
-  (llm-vertex--handle-response
-   (llm-request-plz-sync (llm-vertex--embedding-url provider)
-                         :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                         :data `(("instances" . [(("content" . ,string))])))
-   #'llm-vertex--embedding-extract-response))
-
-(defun llm-vertex--get-chat-response (response)
-  "Return the actual response from the RESPONSE struct returned.
-This handles different kinds of models."
+(cl-defmethod llm-provider-embedding-request ((provider llm-vertex) string)
+  `(("instances" . [(("content" . ,string))])))
+
+(cl-defmethod llm-provider-headers ((provider llm-vertex))
+  `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key provider)))))
+
+(cl-defmethod llm-provider-chat-extract-result ((_ llm-google) response)
   (pcase (type-of response)
     ('vector (when (> (length response) 0)
-               (let ((parts (mapcar #'llm-vertex--get-chat-response response)))
+               (let ((parts (mapcar #'llm-provider-chat-extract-response 
response)))
                  (if (stringp (car parts))
                      (mapconcat #'identity parts "")
                    (car parts)))))
@@ -160,20 +140,29 @@ This handles different kinds of models."
                              'parts
                              (assoc-default 'content
                                             (aref (assoc-default 'candidates 
response) 0)))))
-                 (if parts
-                      (or (assoc-default 'text (aref parts 0))
-                         ;; Change function calling from almost Open AI's
-                         ;; standard format to exactly the format.
-                         (mapcar (lambda (call)
-                                   `(function . ,(mapcar (lambda (c) (if (eq 
(car c) 'args) (cons 'arguments (cdr c)) c))
-                                                         (cdar call))))
-                                 parts))
-                   ""))
+                 (when parts
+                   (assoc-default 'text (aref parts 0))))
              "NOTE: No response was sent back by the LLM, the prompt may have 
violated safety checks."))))
 
-(defun llm-vertex--chat-request (prompt)
-  "Return an alist with chat input for the streaming API.
-PROMPT contains the input to the call to the chat API."
+(cl-defmethod llm-provider-extract-function-calls ((provider llm-google) 
response)
+  (if (vectorp response)
+      (llm-provider-extract-function-calls provider (aref response 0))
+    (mapcar (lambda (call)
+              (make-llm-provider-utils-function-call
+               :name (assoc-default 'name call)
+               :args (assoc-default 'args call)))
+            (mapcan (lambda (maybe-call)
+                      (when-let ((fc (assoc-default 'functionCall maybe-call)))
+                       (list fc)))
+                    (assoc-default
+                     'parts (assoc-default
+                             'content
+                             (aref (assoc-default 'candidates response) 
0)))))))
+
+(cl-defmethod llm-provider-extract-streamed-function-calls ((provider 
llm-google) response)
+  (llm-provider-extract-function-calls provider (json-read-from-string 
response)))
+
+(cl-defmethod llm-provider-chat-request ((_ llm-google) prompt _)
   (llm-provider-utils-combine-to-user-prompt prompt llm-vertex-example-prelude)
   (append
    `((contents
@@ -228,17 +217,7 @@ nothing to add, in which case it is nil."
     (when params-alist
       `((generation_config . ,params-alist)))))
 
-(defun llm-vertex--normalize-function-calls (response)
-  "If RESPONSE has function calls, transform them to our common format."
-  (if (consp response)
-      (mapcar (lambda (f)
-                (make-llm-provider-utils-function-call
-                 :name (assoc-default 'name (cdr f))
-                 :args (assoc-default 'arguments (cdr f))))
-              response)
-    response))
-
-(cl-defmethod llm-provider-utils-populate-function-calls ((_ llm-vertex) 
prompt calls)
+(cl-defmethod llm-provider-populate-function-calls ((_ llm-vertex) prompt 
calls)
   (llm-provider-utils-append-to-prompt
    prompt
    ;; For Vertex there is just going to be one call
@@ -249,32 +228,22 @@ nothing to add, in which case it is nil."
                  (args . ,(llm-provider-utils-function-call-args fc))))))
            calls)))
 
-(defun llm-vertex--process-and-return (provider prompt response &optional 
error-callback)
-  "Process RESPONSE from the PROVIDER.
-
-This returns the response to be given to the client.
-
-Any functions will be executed.
-
-The response will be added to PROMPT.
-
-Provider is the llm provider, for logging purposes.
+(cl-defmethod llm-provider-streaming-media-handler ((provider llm-google)
+                                                    msg-receiver fc-receiver)
+  (cons 'application/json
+        (plz-media-type:application/json-array
+         :handler
+         (lambda (element)
+           (if-let ((response (llm-provider-chat-extract-result provider 
element)))
+              (funcall msg-receiver response)
+            (when-let ((fc (llm-provider-extract-function-calls provider 
element)))
+                 (funcall fc-receiver fc)))))))
 
-ERROR-CALLBACK is called when an error is detected."
-  (if (and (consp response)
-           (assoc-default 'error response))
-      (progn
-        (when error-callback
-          (funcall error-callback 'error (llm-vertex--error-message response)))
-        response))
-  (let ((return-val
-         (llm-provider-utils-process-result
-          provider prompt
-          (llm-vertex--normalize-function-calls response))))
-    return-val))
+(cl-defmethod llm-provider-collect-streaming-function-data ((_ llm-google) 
data)
+  (car data))
 
 (defun llm-vertex--chat-url (provider &optional streaming)
-"Return the correct url to use for PROVIDER.
+  "Return the correct url to use for PROVIDER.
 If STREAMING is non-nil, use the URL for the streaming API."
   (format 
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s";
           llm-vertex-gcloud-region
@@ -283,77 +252,15 @@ If STREAMING is non-nil, use the URL for the streaming 
API."
           (llm-vertex-chat-model provider)
           (if streaming "streamGenerateContent" "generateContent")))
 
-;; API reference: 
https://cloud.google.com/vertex-ai/docs/generative-ai/multimodal/send-chat-prompts-gemini#gemini-chat-samples-drest
-(cl-defmethod llm-chat ((provider llm-vertex) prompt)
-  ;; Gemini just has a streaming response, but we can just call it 
synchronously.
-  (llm-vertex-refresh-key provider)
-  (llm-vertex--process-and-return
-   provider prompt
-   (llm-vertex--get-chat-response
-    (llm-request-plz-sync (llm-vertex--chat-url provider)
-                          :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                          :data (llm-vertex--chat-request prompt)))))
-
-(cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback 
error-callback)
-  (llm-vertex-refresh-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async
-     (llm-vertex--chat-url provider)
-     :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key 
provider))))
-     :data (llm-vertex--chat-request prompt)
-     :on-success (lambda (data)
-                   (llm-request-callback-in-buffer
-                    buf response-callback
-                    (llm-vertex--process-and-return
-                     provider prompt (llm-vertex--get-chat-response data))))
-     :on-error (lambda (_ data)
-                 (llm-request-callback-in-buffer buf error-callback 'error
-                                                 (llm-vertex--error-message 
data))))))
-
-(cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt 
partial-callback response-callback error-callback)
-  (llm-vertex-refresh-key provider)
-  (let ((buf (current-buffer))
-        (streamed-text "")
-        (function-call nil))
-    (llm-request-plz-json-array
-     (llm-vertex--chat-url provider t)
-     :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key 
provider))))
-     :data (llm-vertex--chat-request prompt)
-     :on-element (lambda (element)
-                   (if (alist-get 'error element)
-                       (llm-request-callback-in-buffer buf error-callback 
'error
-                                                       
(llm-vertex--error-message element))
-                     (when-let ((response (llm-vertex--get-chat-response 
element)))
-                       (if (stringp response)
-                           (when (> (length response) 0)
-                             (setq streamed-text (concat streamed-text 
response))
-                             (llm-request-callback-in-buffer buf 
partial-callback streamed-text))
-                         (setq function-call response)))))
-     :on-success (lambda (data)
-                   (llm-request-callback-in-buffer
-                    buf response-callback
-                    (llm-vertex--process-and-return
-                     provider prompt (or function-call
-                                         (if (> (length streamed-text) 0)
-                                             streamed-text
-                                           (llm-vertex--get-chat-response 
data))))))
-     :on-error (lambda (_ data)
-                 (llm-request-callback-in-buffer buf error-callback 'error
-                                                 (llm-vertex--error-message 
data))))))
+(cl-defmethod llm-provider-chat-url ((provider llm-vertex))
+  (llm-vertex--chat-url provider))
+
+(cl-defmethod llm-provider-chat-streaming-url ((provider llm-vertex))
+  (llm-vertex--chat-url provider t))
 
 ;; Token counts
 ;; https://cloud.google.com/vertex-ai/docs/generative-ai/get-token-count
 
-(defun llm-vertex--count-token-url (provider)
-  "Return the URL to use for the Vertex API.
-PROVIDER is the llm provider.
-MODEL "
-  (format 
"https://%s-aiplatform.googleapis.com/v1beta1/projects/%s/locations/%s/publishers/google/models/%s:countTokens";
-          llm-vertex-gcloud-region
-          (llm-vertex-project provider)
-          llm-vertex-gcloud-region
-          (llm-vertex-chat-model provider)))
-
 (defun llm-vertex--to-count-token-request (request)
   "Return a version of REQUEST that is suitable for counting tokens."
   (seq-filter (lambda (c) (and (not (equal (car c) "parameters"))
@@ -363,15 +270,29 @@ MODEL "
   "Extract the token count from the response."
   (assoc-default 'totalTokens response))
 
-(cl-defmethod llm-count-tokens ((provider llm-vertex) string)
-  (llm-vertex-refresh-key provider)
-  (llm-vertex--handle-response
-   (llm-request-sync (llm-vertex--count-token-url provider)
-                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--to-count-token-request
-                            (llm-vertex--chat-request
-                             (llm-make-simple-chat-prompt string))))
-   #'llm-vertex--count-tokens-extract-response))
+(cl-defgeneric llm-google-count-tokens-url (provider)
+  "The URL for PROVIDER to count tokens.")
+
+(cl-defmethod llm-google-count-tokens-url ((provider llm-vertex))
+  (format 
"https://%s-aiplatform.googleapis.com/v1beta1/projects/%s/locations/%s/publishers/google/models/%s:countTokens";
+          llm-vertex-gcloud-region
+          (llm-vertex-project provider)
+          llm-vertex-gcloud-region
+          (llm-vertex-chat-model provider)))
+
+(cl-defmethod llm-count-tokens ((provider llm-google) string)
+  (llm-provider-request-prelude provider)
+  (let ((response (llm-request-sync 
+                   (llm-google-count-tokens-url provider)
+                   :headers (llm-provider-headers provider)
+                   :data (llm-vertex--to-count-token-request
+                          (llm-provider-chat-request
+                           provider
+                           (llm-make-simple-chat-prompt string)
+                           nil)))))
+    (when-let ((err (llm-provider-chat-extract-error provider response)))
+      (error err))
+    (llm-vertex--count-tokens-extract-response response)))
 
 (cl-defmethod llm-name ((_ llm-vertex))
   "Gemini")
diff --git a/llm.el b/llm.el
index 380b3d6b69..e3d79cf725 100644
--- a/llm.el
+++ b/llm.el
@@ -5,7 +5,7 @@
 ;; Author: Andrew Hyatt <ahy...@gmail.com>
 ;; Homepage: https://github.com/ahyatt/llm
 ;; Package-Requires: ((emacs "28.1"))
-;; Package-Version: 0.12.1
+;; Package-Version: 0.12.3
 ;; SPDX-License-Identifier: GPL-3.0-or-later
 ;;
 ;; This program is free software; you can redistribute it and/or
@@ -207,8 +207,7 @@ ROLE default to `user', which should almost always be what 
is needed."
 
 (cl-defgeneric llm-nonfree-message-info (provider)
   "If PROVIDER is non-free, return info for a warning.
-This should be a cons of the name of the LLM, and the URL of the
-terms of service.
+This should be the string URL of the terms of service.
 
 If the LLM is free and has no restrictions on use, this should
 return nil.  Since this function already returns nil, there is no
@@ -237,7 +236,7 @@ conversation so far."
 (cl-defmethod llm-chat :before (provider _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
-    (llm--warn-on-nonfree (car info) (cdr info))))
+    (llm--warn-on-nonfree (llm-name provider) info)))
 
 (cl-defmethod llm-chat :around (provider prompt)
   "Log the input to llm-chat."
@@ -342,7 +341,7 @@ be passed to `llm-cancel-request'."
 (cl-defmethod llm-chat-streaming :before (provider _ _ _ _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
-    (llm--warn-on-nonfree (car info) (cdr info))))
+    (llm--warn-on-nonfree (llm-name provider) info)))
 
 (cl-defmethod llm-chat-streaming :around (provider prompt partial-callback 
response-callback error-callback)
   "Log the input to llm-chat-async."
@@ -402,7 +401,7 @@ be passed to `llm-cancel-request'."
 (cl-defmethod llm-chat-async :before (provider _ _ _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
-    (llm--warn-on-nonfree (car info) (cdr info))))
+    (llm--warn-on-nonfree (llm-name provider) info)))
 
 (cl-defgeneric llm-capabilities (provider)
   "Return a list of the capabilities of PROVIDER.
@@ -443,7 +442,7 @@ value that should be a reasonable lower bound."
 (cl-defmethod llm-embedding :before (provider _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
-    (llm--warn-on-nonfree (car info) (cdr info))))
+    (llm--warn-on-nonfree (llm-name provider) info)))
 
 (cl-defgeneric llm-embedding-async (provider string vector-callback 
error-callback)
   "Calculate a vector embedding of STRING from PROVIDER.
@@ -463,7 +462,7 @@ be passed to `llm-cancel-request'."
 (cl-defmethod llm-embedding-async :before (provider _ _ _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
-    (llm--warn-on-nonfree (car info) (cdr info))))
+    (llm--warn-on-nonfree (llm-name provider) info)))
 
 (cl-defgeneric llm-count-tokens (provider string)
   "Return the number of tokens in STRING from PROVIDER.

Reply via email to