branch: elpa/gptel commit d9ce451f21812576b2a906382214cc81fce3fc45 Author: Karthik Chikmagalur <karthikchikmaga...@gmail.com> Commit: Karthik Chikmagalur <karthikchikmaga...@gmail.com>
gptel-anthropic: Add prompt caching (#355) * gptel.el (gptel-cache): Add user option to control prompt caching by the LLM. Client-configurable caching is only availble with the Anthropic API right now, so this option has no effect on the behavior of other backends. * gptel-anthropic.el: (gptel--request-data, gptel--parse-list, gptel--parse-buffer, gptel--anthropic-models): Cache prompts (messages), system message and/or tool definitions according to `gptel-cache'. * README.org: Mention `gptel-cache'. --- README.org | 1 + gptel-anthropic.el | 46 +++++++++++++++++++++++++++++++++++++--------- gptel.el | 29 +++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/README.org b/README.org index 2986056bf4..0928457a62 100644 --- a/README.org +++ b/README.org @@ -1316,6 +1316,7 @@ Other Emacs clients for LLMs prescribe the format of the interaction (a comint s | =gptel-directives= | Alist of system directives, can switch on the fly. | | =gptel-max-tokens= | Maximum token count (in query + response). | | =gptel-temperature= | Randomness in response text, 0 to 2. | +| =gptel-cache= | Cache prompts, system message or tools (Anthropic only) | | =gptel-use-context= | How/whether to include additional context | | =gptel-use-tools= | Disable, allow or force LLM tool-use | | =gptel-tools= | List of tools to include with requests | diff --git a/gptel-anthropic.el b/gptel-anthropic.el index bdaebf73a0..95ef4feb03 100644 --- a/gptel-anthropic.el +++ b/gptel-anthropic.el @@ -203,15 +203,25 @@ Mutate state INFO with response metadata." :max_tokens ,(or gptel-max-tokens 1024) :messages [,@prompts]))) (when gptel--system-message - (plist-put prompts-plist :system gptel--system-message)) + (if (and (or (eq gptel-cache t) (memq 'system gptel-cache)) + (gptel--model-capable-p 'cache)) + ;; gptel--system-message is guaranteed to be a string + (plist-put prompts-plist :system + `[(:type "text" :text ,gptel--system-message + :cache_control (:type "ephemeral"))]) + (plist-put prompts-plist :system gptel--system-message))) (when gptel-temperature (plist-put prompts-plist :temperature gptel-temperature)) (when gptel-use-tools (when (eq gptel-use-tools 'force) (plist-put prompts-plist :tool_choice '(:type "any"))) (when gptel-tools - (plist-put prompts-plist :tools - (gptel--parse-tools backend gptel-tools)))) + (let ((tools-array (gptel--parse-tools backend gptel-tools))) + (plist-put prompts-plist :tools tools-array) + (when (and (or (eq gptel-cache t) (memq 'tool gptel-cache)) + (gptel--model-capable-p 'cache)) + (nconc (aref tools-array (1- (length tools-array))) + '(:cache_control (:type "ephemeral"))))))) ;; Merge request params with model and backend params. (gptel--merge-plists prompts-plist @@ -294,12 +304,20 @@ TOOL-USE is a list of plists containing tool names, arguments and call results." (message "Unexpected tool_call_id format: %s" tool-id) tool-id))) -(cl-defmethod gptel--parse-list ((_backend gptel-anthropic) prompt-list) +(cl-defmethod gptel--parse-list ((backend gptel-anthropic) prompt-list) (cl-loop for text in prompt-list for role = t then (not role) - if text collect - (list :role (if role "user" "assistant") - :content `[(:type "text" :text ,text)]))) + if text + collect (list :role (if role "user" "assistant") + :content `[(:type "text" :text ,text)]) + into prompts + finally do + ;; cache messages if required: add cache_control to the last message + (if (and (or (eq gptel-cache t) (memq 'message gptel-cache)) + (gptel--model-capable-p 'cache)) + (nconc (aref (plist-get (car (last prompts)) :content) 0) + '(:cache_control (:type "ephemeral")))) + finally return prompts)) (cl-defmethod gptel--parse-buffer ((backend gptel-anthropic) &optional max-entries) (let ((prompts) (prev-pt (point)) @@ -360,6 +378,16 @@ TOOL-USE is a list of plists containing tool names, arguments and call results." ;; XXX fails if content is empty. The correct error behavior is left to ;; a future discussion. (push (list :role "user" :content content) prompts))) + ;; Cache messages if required: add cache_control to the last message + (if (and (or (eq gptel-cache t) (memq 'message gptel-cache)) + (gptel--model-capable-p 'cache)) + (let ((last-message (plist-get (car (last prompts)) :content))) + (if (stringp last-message) + (plist-put + (car (last prompts)) :content + `[(:type "text" :text ,last-message :cache_control (:type "ephemeral"))]) + (nconc (aref (plist-get (car (last prompts)) :content) 0) + '(:cache_control (:type "ephemeral")))))) prompts)) (defun gptel--anthropic-parse-multipart (parts) @@ -467,7 +495,7 @@ files in the context." :cutoff-date "2024-04") (claude-3-5-haiku-20241022 :description "Intelligence at blazing speeds" - :capabilities (tool-use) + :capabilities (tool-use cache) :context-window 200 :input-cost 1.00 :output-cost 5.00 @@ -490,7 +518,7 @@ files in the context." :cutoff-date "2023-08") (claude-3-haiku-20240307 :description "Fast and most compact model for near-instant responsiveness" - :capabilities (tool-use) + :capabilities (tool-use cache) :context-window 200 :input-cost 0.25 :output-cost 1.25 diff --git a/gptel.el b/gptel.el index 798bcb02c3..1fddf22c90 100644 --- a/gptel.el +++ b/gptel.el @@ -479,6 +479,35 @@ To set the temperature for a chat session interactively call :safe #'always :type 'number) +(defcustom gptel-cache nil + "Whether the LLM should cache request content. + +Some LLM backends can cache content sent to it by gptel, so that +only the newly included part of the text needs to be processed on +subsequent conversation turns. This results in faster and +significantly cheaper processing. + +NOTE: Manual or client-configurable caching is currently +supported only by the Anthropic API and thus the +`gptel-anthropic' backend. This variable has no effect on the +behavior of other backends. + +This variable controls which parts of the query will be cached, +and can be the symbols t or nil to cache everything or nothing +respectively. It can also be a list of symbols: + +- message: Cache conversation messages +- system: Cache the system message +- tool: Cache tool definitions + +Examples: + +Setting it to (message system) will cache the system message and +the conversation text. + +Setting it to (message system tool) will cache everything and is +the same as t.") + (defvar gptel--known-backends) (defconst gptel--openai-models