branch: elpa/gptel
commit d9ce451f21812576b2a906382214cc81fce3fc45
Author: Karthik Chikmagalur <karthikchikmaga...@gmail.com>
Commit: Karthik Chikmagalur <karthikchikmaga...@gmail.com>

    gptel-anthropic: Add prompt caching (#355)
    
    * gptel.el (gptel-cache): Add user option to control prompt
    caching by the LLM.  Client-configurable caching is only availble
    with the Anthropic API right now, so this option has no effect on
    the behavior of other backends.
    
    * gptel-anthropic.el: (gptel--request-data, gptel--parse-list,
    gptel--parse-buffer, gptel--anthropic-models): Cache
    prompts (messages), system message and/or tool definitions
    according to `gptel-cache'.
    
    * README.org: Mention `gptel-cache'.
---
 README.org         |  1 +
 gptel-anthropic.el | 46 +++++++++++++++++++++++++++++++++++++---------
 gptel.el           | 29 +++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/README.org b/README.org
index 2986056bf4..0928457a62 100644
--- a/README.org
+++ b/README.org
@@ -1316,6 +1316,7 @@ Other Emacs clients for LLMs prescribe the format of the 
interaction (a comint s
 | =gptel-directives=    | Alist of system directives, can switch on the fly.   
   |
 | =gptel-max-tokens=    | Maximum token count (in query + response).           
   |
 | =gptel-temperature=   | Randomness in response text, 0 to 2.                 
   |
+| =gptel-cache=         | Cache prompts, system message or tools (Anthropic 
only) |
 | =gptel-use-context=   | How/whether to include additional context            
   |
 | =gptel-use-tools=     | Disable, allow or force LLM tool-use                 
   |
 | =gptel-tools=         | List of tools to include with requests               
   |
diff --git a/gptel-anthropic.el b/gptel-anthropic.el
index bdaebf73a0..95ef4feb03 100644
--- a/gptel-anthropic.el
+++ b/gptel-anthropic.el
@@ -203,15 +203,25 @@ Mutate state INFO with response metadata."
            :max_tokens ,(or gptel-max-tokens 1024)
            :messages [,@prompts])))
     (when gptel--system-message
-      (plist-put prompts-plist :system gptel--system-message))
+      (if (and (or (eq gptel-cache t) (memq 'system gptel-cache))
+               (gptel--model-capable-p 'cache))
+          ;; gptel--system-message is guaranteed to be a string
+          (plist-put prompts-plist :system
+                     `[(:type "text" :text ,gptel--system-message
+                        :cache_control (:type "ephemeral"))])
+        (plist-put prompts-plist :system gptel--system-message)))
     (when gptel-temperature
       (plist-put prompts-plist :temperature gptel-temperature))
     (when gptel-use-tools
       (when (eq gptel-use-tools 'force)
         (plist-put prompts-plist :tool_choice '(:type "any")))
       (when gptel-tools
-        (plist-put prompts-plist :tools
-                   (gptel--parse-tools backend gptel-tools))))
+        (let ((tools-array (gptel--parse-tools backend gptel-tools)))
+          (plist-put prompts-plist :tools tools-array)
+          (when (and (or (eq gptel-cache t) (memq 'tool gptel-cache))
+                     (gptel--model-capable-p 'cache))
+            (nconc (aref tools-array (1- (length tools-array)))
+                   '(:cache_control (:type "ephemeral")))))))
     ;; Merge request params with model and backend params.
     (gptel--merge-plists
      prompts-plist
@@ -294,12 +304,20 @@ TOOL-USE is a list of plists containing tool names, 
arguments and call results."
         (message "Unexpected tool_call_id format: %s" tool-id)
         tool-id)))
 
-(cl-defmethod gptel--parse-list ((_backend gptel-anthropic) prompt-list)
+(cl-defmethod gptel--parse-list ((backend gptel-anthropic) prompt-list)
   (cl-loop for text in prompt-list
            for role = t then (not role)
-           if text collect
-           (list :role (if role "user" "assistant")
-                 :content `[(:type "text" :text ,text)])))
+           if text
+           collect (list :role (if role "user" "assistant")
+                         :content `[(:type "text" :text ,text)])
+           into prompts
+           finally do
+           ;; cache messages if required: add cache_control to the last message
+           (if (and (or (eq gptel-cache t) (memq 'message gptel-cache))
+                    (gptel--model-capable-p 'cache))
+               (nconc (aref (plist-get (car (last prompts)) :content) 0)
+                      '(:cache_control (:type "ephemeral"))))
+           finally return prompts))
 
 (cl-defmethod gptel--parse-buffer ((backend gptel-anthropic) &optional 
max-entries)
   (let ((prompts) (prev-pt (point))
@@ -360,6 +378,16 @@ TOOL-USE is a list of plists containing tool names, 
arguments and call results."
         ;; XXX fails if content is empty.  The correct error behavior is left 
to
         ;; a future discussion.
         (push (list :role "user" :content content) prompts)))
+    ;; Cache messages if required: add cache_control to the last message
+    (if (and (or (eq gptel-cache t) (memq 'message gptel-cache))
+             (gptel--model-capable-p 'cache))
+        (let ((last-message (plist-get (car (last prompts)) :content)))
+          (if (stringp last-message)
+              (plist-put
+               (car (last prompts)) :content
+               `[(:type "text" :text ,last-message :cache_control (:type 
"ephemeral"))])
+            (nconc (aref (plist-get (car (last prompts)) :content) 0)
+                   '(:cache_control (:type "ephemeral"))))))
     prompts))
 
 (defun gptel--anthropic-parse-multipart (parts)
@@ -467,7 +495,7 @@ files in the context."
      :cutoff-date "2024-04")
     (claude-3-5-haiku-20241022
      :description "Intelligence at blazing speeds"
-     :capabilities (tool-use)
+     :capabilities (tool-use cache)
      :context-window 200
      :input-cost 1.00
      :output-cost 5.00
@@ -490,7 +518,7 @@ files in the context."
      :cutoff-date "2023-08")
     (claude-3-haiku-20240307
      :description "Fast and most compact model for near-instant responsiveness"
-     :capabilities (tool-use)
+     :capabilities (tool-use cache)
      :context-window 200
      :input-cost 0.25
      :output-cost 1.25
diff --git a/gptel.el b/gptel.el
index 798bcb02c3..1fddf22c90 100644
--- a/gptel.el
+++ b/gptel.el
@@ -479,6 +479,35 @@ To set the temperature for a chat session interactively 
call
   :safe #'always
   :type 'number)
 
+(defcustom gptel-cache nil
+  "Whether the LLM should cache request content.
+
+Some LLM backends can cache content sent to it by gptel, so that
+only the newly included part of the text needs to be processed on
+subsequent conversation turns.  This results in faster and
+significantly cheaper processing.
+
+NOTE: Manual or client-configurable caching is currently
+supported only by the Anthropic API and thus the
+`gptel-anthropic' backend.  This variable has no effect on the
+behavior of other backends.
+
+This variable controls which parts of the query will be cached,
+and can be the symbols t or nil to cache everything or nothing
+respectively. It can also be a list of symbols:
+
+- message: Cache conversation messages
+- system: Cache the system message
+- tool: Cache tool definitions
+
+Examples:
+
+Setting it to (message system) will cache the system message and
+the conversation text.
+
+Setting it to (message system tool) will cache everything and is
+the same as t.")
+
 (defvar gptel--known-backends)
 
 (defconst gptel--openai-models

Reply via email to