branch: externals/llm commit 0f25d08218bdd3ea808d6d15328482327a551471 Merge: efe218ac13 6054da6b58 Author: Andrew Hyatt <ahy...@gmail.com> Commit: Andrew Hyatt <ahy...@gmail.com>
Merge branch 'plz' --- llm-claude.el | 42 +-- llm-gemini.el | 2 +- llm-ollama.el | 93 ++----- llm-openai.el | 132 ++++------ llm-provider-utils.el | 108 +++++--- llm-request-plz.el | 169 ++++++++++++ llm-vertex.el | 31 +-- llm.el | 7 +- plz-event-source.el | 477 +++++++++++++++++++++++++++++++++ plz-media-type.el | 712 ++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 1529 insertions(+), 244 deletions(-) diff --git a/llm-claude.el b/llm-claude.el index 816be8d194..7ad49fb410 100644 --- a/llm-claude.el +++ b/llm-claude.el @@ -28,6 +28,7 @@ (require 'llm) (require 'llm-request) (require 'llm-provider-utils) +(require 'plz-event-source) (require 'rx) ;; Models defined at https://docs.anthropic.com/claude/docs/models-overview @@ -98,27 +99,26 @@ (assoc-default 'text content) (format "Unsupported non-text response: %s" content)))) -;; see https://docs.anthropic.com/claude/reference/messages-streaming -(cl-defmethod llm-provider-extract-partial-response ((_ llm-claude) response) - "Return the partial response from text RESPONSE." - (let ((regex (rx (seq "\"text\":" (0+ whitespace) - (group-n 1 ?\" (0+ anychar) ?\") (0+ whitespace) ?} (0+ whitespace) ?})))) - (with-temp-buffer - (insert response) - ;; We use the quick and dirty solution of just looking for any line that - ;; has a "text" field. - (let ((matched-lines)) - (goto-char (point-min)) - (while (re-search-forward "\"text\":" nil t) - (push (buffer-substring-no-properties - (line-beginning-position) - (line-end-position)) - matched-lines)) - (mapconcat (lambda (line) - (if (string-match regex line) - (read (match-string 1 line)) - (warn "Could not parse streaming response: %s" line))) - (nreverse matched-lines) ""))))) +(cl-defmethod llm-provider-streaming-media-handler ((_ llm-claude) + msg-receiver _ err-receiver) + (cons 'text/event-stream + (plz-event-source:text/event-stream + :events `((message_start . ignore) + (content_block_start . ignore) + (ping . ignore) + (message_stop . ignore) + (content_block_stop . ignore) + (error . ,(lambda (event) + (funcall err-receiver (plz-event-source-event-data event)))) + (content_block_delta + . + ,(lambda (event) + (let* ((data (plz-event-source-event-data event)) + (json (json-parse-string data :object-type 'alist)) + (delta (assoc-default 'delta json)) + (type (assoc-default 'type delta))) + (when (equal type "text_delta") + (funcall msg-receiver (assoc-default 'text delta)))))))))) (cl-defmethod llm-provider-headers ((provider llm-claude)) `(("x-api-key" . ,(llm-claude-key provider)) diff --git a/llm-gemini.el b/llm-gemini.el index 0e1a0451bd..cd1ec171ff 100644 --- a/llm-gemini.el +++ b/llm-gemini.el @@ -28,7 +28,7 @@ (require 'cl-lib) (require 'llm) -(require 'llm-request) +(require 'llm-request-plz) (require 'llm-vertex) (require 'llm-provider-utils) (require 'json) diff --git a/llm-ollama.el b/llm-ollama.el index 2e86bfdb22..a7c2dae7fa 100644 --- a/llm-ollama.el +++ b/llm-ollama.el @@ -27,9 +27,10 @@ (require 'cl-lib) (require 'llm) -(require 'llm-request) +(require 'llm-request-plz) (require 'llm-provider-utils) (require 'json) +(require 'plz-media-type) (defgroup llm-ollama nil "LLM implementation for Ollama." @@ -91,7 +92,11 @@ PROVIDER is the llm-ollama provider." "Return the embedding from the server RESPONSE." (assoc-default 'embedding response)) -(cl-defmethod llm-provider-chat-request ((provider llm-ollama) prompt _) +(cl-defmethod llm-provider-chat-extract-result ((_ llm-ollama) response) + "Return the chat response from the server RESPONSE" + (assoc-default 'content (assoc-default 'message response))) + +(cl-defmethod llm-provider-chat-request ((provider llm-ollama) prompt streaming) (let (request-alist messages options) (setq messages (mapcar (lambda (interaction) @@ -104,6 +109,7 @@ PROVIDER is the llm-ollama provider." messages)) (push `("messages" . ,messages) request-alist) (push `("model" . ,(llm-ollama-chat-model provider)) request-alist) + (push `("stream" . ,(if streaming t :json-false)) request-alist) (when (llm-chat-prompt-temperature prompt) (push `("temperature" . ,(llm-chat-prompt-temperature prompt)) options)) (when (llm-chat-prompt-max-tokens prompt) @@ -111,81 +117,14 @@ PROVIDER is the llm-ollama provider." (when options (push `("options" . ,options) request-alist)) request-alist)) -(cl-defmethod llm-provider-embedding-extract-error ((_ llm-ollama) err-response) - (assoc-default 'error err-response)) - -(cl-defmethod llm-provider-chat-extract-error ((provider llm-ollama) err-response) - (llm-provider-embedding-extract-error provider err-response)) - -(defvar-local llm-ollama-current-response "" - "The response so far from the server.") - -(defvar-local llm-ollama-last-response 0 - "The last response number we've read.") - -(cl-defmethod llm-provider-extract-partial-response ((_ llm-ollama) response) - "Return the text in the partial chat response from RESPONSE." - ;; To begin with, we should still be in the buffer with the actual response. - (let ((current-response llm-ollama-current-response) - (last-response llm-ollama-last-response)) - (with-temp-buffer - (insert response) - ;; Responses in ollama are always one per line. - (let* ((end-pos (save-excursion (goto-char (point-max)) - (when (search-backward-regexp - (rx (seq "done\":false}" line-end)) - nil t) - (line-end-position))))) - (when end-pos - (let ((all-lines (seq-filter - (lambda (line) (string-match-p (rx (seq string-start ?{)) line)) - (split-string (buffer-substring-no-properties 1 end-pos) "\n" t)))) - (setq - current-response - (concat current-response - (mapconcat - (lambda (line) (assoc-default 'content (assoc-default 'message (json-read-from-string line)))) - ;; Take from response output last-response to the end. This - ;; counts only valid responses, so we need to throw out all - ;; other lines that aren't valid JSON. - (seq-subseq all-lines last-response) ""))) - (setq last-response (length all-lines)))))) - ;; If there is no new content, don't manipulate anything. - (when (> (length current-response) (length llm-ollama-current-response)) - (setq llm-ollama-last-response last-response) - (setq llm-ollama-current-response current-response)) - current-response)) - -(defun llm-ollama--get-final-response (response) - "Return the final post-streaming json output from RESPONSE." - (with-temp-buffer - (insert response) - ;; Find the last json object in the buffer. - (goto-char (point-max)) - (search-backward "{" nil t) - (json-read))) - -;; Ollama chat is a streaming API, so we need to handle it differently tha normal. - -(cl-defmethod llm-chat ((provider llm-ollama) prompt) - ;; We expect to be in a new buffer with the response, which we use to store - ;; local variables. The temp buffer won't have the response, but that's fine, - ;; we really just need it for the local variables. - (with-temp-buffer - (let ((output (llm-request-sync-raw-output - (llm-provider-chat-url provider) - :data (llm-provider-chat-request provider prompt t) - ;; ollama is run on a user's machine, and it can take a while. - :timeout llm-ollama-chat-timeout))) - (setf (llm-chat-prompt-interactions prompt) - (append (llm-chat-prompt-interactions prompt) - (list (make-llm-chat-prompt-interaction - :role 'assistant - :content (assoc-default 'context (llm-ollama--get-final-response output)))))) - (llm-provider-extract-partial-response provider output)))) - -(cl-defmethod llm-chat-async ((provider llm-ollama) prompt response-callback error-callback) - (llm-chat-streaming provider prompt #'ignore response-callback error-callback)) +(cl-defmethod llm-provider-streaming-media-handler ((_ llm-ollama) msg-receiver _ _) + (cons 'application/x-ndjson + (plz-media-type:application/x-ndjson + :handler (lambda (data) + (when-let ((response (assoc-default + 'content + (assoc-default 'message data)))) + (funcall msg-receiver response)))))) (cl-defmethod llm-name ((provider llm-ollama)) (llm-ollama-chat-model provider)) diff --git a/llm-openai.el b/llm-openai.el index 6020acba1a..ba9b007b39 100644 --- a/llm-openai.el +++ b/llm-openai.el @@ -27,9 +27,10 @@ (require 'cl-lib) (require 'llm) -(require 'llm-request) +(require 'llm-request-plz) (require 'llm-provider-utils) (require 'json) +(require 'plz-event-source) (defgroup llm-openai nil "LLM implementation for Open AI." @@ -75,6 +76,9 @@ MODEL is the embedding model to use, or nil to use the default.." "Return the embedding from the server RESPONSE." (assoc-default 'embedding (aref (assoc-default 'data response) 0))) +(cl-defgeneric llm-openai--check-key (provider) + "Check that the key is set for the Open AI provider.") + (cl-defmethod llm-openai--check-key ((provider llm-openai)) (unless (llm-openai-key provider) (error "To call Open AI API, add a key to the `llm-openai' provider."))) @@ -192,94 +196,52 @@ STREAMING if non-nil, turn on response streaming." (llm-provider-utils-function-call-args call)))))) calls))) -(defvar-local llm-openai-current-response "" - "The response so far from the server.") - -(defvar-local llm-openai-last-response 0 - "The number of the last streaming response we read. -The responses from OpenAI are not numbered, but we just number -them from 1 to however many are sent.") - -(defun llm-openai--get-unparsed-json (response) - "Return the unparsed JSON from RESPONSE. -The response is a list of all the responses, regardless of -whether they have been parsed before or not." - (with-temp-buffer - (insert response) - (let* ((complete-rx (rx (seq line-start "data: "))) - (end-pos (save-excursion (goto-char (point-max)) - (when (search-backward-regexp - complete-rx - nil t) - (line-end-position))))) - (when end-pos - (mapcar (lambda (line) (replace-regexp-in-string "data: " "" line)) - (seq-filter - (lambda (line) - (and (string-match-p complete-rx line) - (not (string-match-p (rx (seq line-start "data: [DONE]")) - line)))) - (split-string (buffer-substring-no-properties 1 end-pos) "\n"))))))) - -(cl-defmethod llm-provider-extract-partial-response ((_ llm-openai) response) - "Return the text in the partial chat response from RESPONSE." - ;; To begin with, we should still be in the buffer with the actual response. - (let ((current-response llm-openai-current-response) - (last-response llm-openai-last-response)) - (let* ((all-lines (llm-openai--get-unparsed-json response)) - (processed-lines - (mapcar (lambda (json) - (assoc-default 'content - (assoc-default - 'delta - (aref (assoc-default - 'choices - (json-read-from-string json)) - 0)))) - (seq-subseq all-lines last-response)))) - (when (stringp (car processed-lines)) - ;; The data is a string - a normal response, which we just - ;; append to current-response (assuming it's also a string, - ;; which it should be). - (setq current-response - (concat current-response (string-join processed-lines "")))) - (setq last-response (length all-lines))) - (when (>= (length current-response) (length llm-openai-current-response)) - (setq llm-openai-current-response current-response) - (setq llm-openai-last-response last-response)) - (when (> (length llm-openai-current-response) 0) - llm-openai-current-response))) - -(cl-defmethod llm-provider-extract-streamed-function-calls ((_ llm-openai) response) - (let* ((pieces (mapcar (lambda (json) - (assoc-default 'tool_calls - (assoc-default - 'delta - (aref (assoc-default - 'choices - (json-read-from-string json)) - 0)))) - (llm-openai--get-unparsed-json response))) - (cvec (make-vector (length (car pieces)) (make-llm-provider-utils-function-call)))) - (cl-loop for piece in pieces do - (cl-loop for call in (append piece nil) do - (let* ((index (assoc-default 'index call)) - (id (assoc-default 'id call)) - (function (assoc-default 'function call)) - (name (assoc-default 'name function)) - (arguments (assoc-default 'arguments function))) - (when id - (setf (llm-provider-utils-function-call-id (aref cvec index)) id)) - (when name - (setf (llm-provider-utils-function-call-name (aref cvec index)) name)) - (setf (llm-provider-utils-function-call-args (aref cvec index)) - (concat (llm-provider-utils-function-call-args (aref cvec index)) - arguments))))) +(defun llm-openai--get-partial-chat-response (response) + "Return the text in the partial chat response from RESPONSE. +RESPONSE can be nil if the response is complete." + (when response + (let* ((choices (assoc-default 'choices response)) + (delta (when (> (length choices) 0) + (assoc-default 'delta (aref choices 0)))) + (content-or-call (or (assoc-default 'content delta) + (assoc-default 'tool_calls delta)))) + content-or-call))) + +(cl-defmethod llm-provider-streaming-media-handler ((_ llm-openai) msg-receiver fc-receiver _) + (cons 'text/event-stream + (plz-event-source:text/event-stream + :events `((message + . + ,(lambda (event) + (let ((data (plz-event-source-event-data event))) + (unless (equal data "[DONE]") + (when-let ((response (llm-openai--get-partial-chat-response + (json-read-from-string data)))) + (funcall (if (stringp response) msg-receiver fc-receiver) response)))))))))) + +(cl-defmethod llm-provider-collect-streaming-function-data ((_ llm-openai) data) + (let ((cvec (make-vector (length (car data)) nil))) + (dotimes (i (length (car data))) + (setf (aref cvec i) (make-llm-provider-utils-function-call))) + (cl-loop for part in data do + (cl-loop for call in (append part nil) do + (let* ((index (assoc-default 'index call)) + (id (assoc-default 'id call)) + (function (assoc-default 'function call)) + (name (assoc-default 'name function)) + (arguments (assoc-default 'arguments function))) + (when id + (setf (llm-provider-utils-function-call-id (aref cvec index)) id)) + (when name + (setf (llm-provider-utils-function-call-name (aref cvec index)) name)) + (setf (llm-provider-utils-function-call-args (aref cvec index)) + (concat (llm-provider-utils-function-call-args (aref cvec index)) + arguments))))) (cl-loop for call in (append cvec nil) do (setf (llm-provider-utils-function-call-args call) (json-read-from-string (llm-provider-utils-function-call-args call))) finally return (when (> (length cvec) 0) - (append cvec nil))))) + (append cvec nil))))) (cl-defmethod llm-name ((_ llm-openai)) "Open AI") diff --git a/llm-provider-utils.el b/llm-provider-utils.el index 825121adca..5b752f50c2 100644 --- a/llm-provider-utils.el +++ b/llm-provider-utils.el @@ -22,7 +22,7 @@ ;;; Code: (require 'llm) -(require 'llm-request) +(require 'llm-request-plz) (require 'seq) (cl-defstruct llm-standard-provider @@ -119,9 +119,21 @@ FUNC-RESULTS is a list of function results, if any.") "By default, the standard provider appends to the prompt." (llm-provider-utils-append-to-prompt prompt result func-results)) -(cl-defgeneric llm-provider-extract-partial-response (provider response) - "Extract the result string from partial RESPONSE for the PROVIDER. -This should return the entire string so far.") +(cl-defgeneric llm-provider-streaming-media-handler (provider msg-receiver fc-receiver err-receiver) + "Define how to handle streaming media for the PROVIDER. + +This should return a cons of the media type and an instance that +handle objects of that type. + +The handlers defined can call MSG-RECEIVER when they receive part +of a text message for the client (a chat response). If they +receive a function call, they should call FC-RECEIVER with the +function call. If they receive an error, they should call +ERR-RECEIVER with the error message.") + +(cl-defmethod llm-provider-streaming-media-handler ((_ llm-standard-chat-provider) _ _ _) + "By default, the standard provider has no streaming media handler." + nil) ;; Methods for chat function calling @@ -140,21 +152,26 @@ function calls, return a list of This is the recording before the calls were executed. CALLS are a list of `llm-provider-utils-function-call'.") -(cl-defgeneric llm-provider-extract-streamed-function-calls (provider response) - "Extract the result string from partial RESPONSE for the PROVIDER.") +(cl-defgeneric llm-provider-collect-streaming-function-data (provider data) + "Transform a list of streaming function call DATA responses. -(cl-defmethod llm-provider-extract-streamed-function-calls ((_ llm-standard-chat-provider) _) - "By default, there are no function calls." +The DATA responses are a list of whatever is sent to the function +call handler in `llm-provider-streaming-media-handler'. This should +return a list of `llm-chat-function-call' structs.") + +(cl-defmethod llm-provider-collect-streaming-function-data ((provider llm-standard-chat-provider) data) + "By default, there is no streaming function calling." nil) ;; Standard provider implementations of llm functionality (cl-defmethod llm-embedding ((provider llm-standard-full-provider) string) (llm-provider-request-prelude provider) - (let ((response (llm-request-sync (llm-provider-embedding-url provider) - :timeout (llm-provider-chat-timeout provider) - :headers (llm-provider-headers provider) - :data (llm-provider-embedding-request provider string)))) + (let ((response (llm-request-plz-sync + (llm-provider-embedding-url provider) + :timeout (llm-provider-chat-timeout provider) + :headers (llm-provider-headers provider) + :data (llm-provider-embedding-request provider string)))) (if-let ((err-msg (llm-provider-embedding-extract-error provider response))) (error err-msg) (llm-provider-embedding-extract-result provider response)))) @@ -162,7 +179,7 @@ CALLS are a list of `llm-provider-utils-function-call'.") (cl-defmethod llm-embedding-async ((provider llm-standard-full-provider) string vector-callback error-callback) (llm-provider-request-prelude provider) (let ((buf (current-buffer))) - (llm-request-async + (llm-request-plz-async (llm-provider-embedding-url provider) :headers (llm-provider-headers provider) :data (llm-provider-embedding-request provider string) @@ -173,8 +190,7 @@ CALLS are a list of `llm-provider-utils-function-call'.") err-msg) (llm-provider-utils-callback-in-buffer buf vector-callback - (llm-provider-embedding-extract-result provider data))) - (kill-current-buffer)) + (llm-provider-embedding-extract-result provider data)))) :on-error (lambda (_ data) (llm-provider-utils-callback-in-buffer buf error-callback 'error @@ -182,14 +198,13 @@ CALLS are a list of `llm-provider-utils-function-call'.") data (or (llm-provider-embedding-extract-error provider data) - "Unknown error"))) - (kill-current-buffer))))) + "Unknown error"))))))) (cl-defmethod llm-chat ((provider llm-standard-chat-provider) prompt) (llm-provider-request-prelude provider) - (let ((response (llm-request-sync (llm-provider-chat-url provider) - :headers (llm-provider-headers provider) - :data (llm-provider-chat-request provider prompt nil)))) + (let ((response (llm-request-plz-sync (llm-provider-chat-url provider) + :headers (llm-provider-headers provider) + :data (llm-provider-chat-request provider prompt nil)))) (if-let ((err-msg (llm-provider-chat-extract-error provider response))) (error err-msg) (llm-provider-utils-process-result provider prompt @@ -202,7 +217,7 @@ CALLS are a list of `llm-provider-utils-function-call'.") error-callback) (llm-provider-request-prelude provider) (let ((buf (current-buffer))) - (llm-request-async + (llm-request-plz-async (llm-provider-chat-url provider) :headers (llm-provider-headers provider) :data (llm-provider-chat-request provider prompt nil) @@ -216,8 +231,7 @@ CALLS are a list of `llm-provider-utils-function-call'.") (llm-provider-utils-process-result provider prompt (llm-provider-chat-extract-result provider data) - (llm-provider-extract-function-calls provider data)))) - (kill-current-buffer)) + (llm-provider-extract-function-calls provider data))))) :on-error (lambda (_ data) (llm-provider-utils-callback-in-buffer buf error-callback 'error @@ -225,35 +239,41 @@ CALLS are a list of `llm-provider-utils-function-call'.") data (or (llm-provider-chat-extract-error provider data) - "Unknown error"))) - (kill-current-buffer))))) + "Unknown error"))))))) (cl-defmethod llm-chat-streaming ((provider llm-standard-chat-provider) prompt partial-callback response-callback error-callback) (llm-provider-request-prelude provider) - (let ((buf (current-buffer))) - (llm-request-async + (let ((buf (current-buffer)) + (current-text "") + (fc nil)) + (llm-request-plz-async (llm-provider-chat-streaming-url provider) :headers (llm-provider-headers provider) :data (llm-provider-chat-request provider prompt t) - :on-partial - (lambda (data) - ;; We won't have a result if this is a streaming function call, - ;; so we don't call on-partial in that case. - (when-let ((result (llm-provider-extract-partial-response provider data))) - ;; Let's not be so strict, a partial response empty string - ;; should be equivalent to nil. - (when (> (length result) 0) - (llm-provider-utils-callback-in-buffer buf partial-callback result)))) - :on-success-raw + :media-type (llm-provider-streaming-media-handler + provider + (lambda (s) + (when (> (length s) 0) + (setq current-text + (concat current-text s)) + (when partial-callback + (llm-provider-utils-callback-in-buffer + buf partial-callback current-text)))) + (lambda (fc-new) (push fc-new fc)) + (lambda (err) + (llm-provider-utils-callback-in-buffer + buf error-callback 'error + err))) + :on-success (lambda (data) (llm-provider-utils-callback-in-buffer buf response-callback (llm-provider-utils-process-result provider prompt - (llm-provider-extract-partial-response provider data) - (llm-provider-extract-streamed-function-calls provider data))) - (kill-current-buffer)) + current-text + (llm-provider-collect-streaming-function-data + provider (nreverse fc))))) :on-error (lambda (_ data) (llm-provider-utils-callback-in-buffer buf error-callback 'error @@ -261,8 +281,7 @@ CALLS are a list of `llm-provider-utils-function-call'.") data (or (llm-provider-chat-extract-error provider data) - "Unknown error"))) - (kill-current-buffer))))) + "Unknown error"))))))) (defun llm-provider-utils-get-system-prompt (prompt &optional example-prelude) "From PROMPT, turn the context and examples into a string. @@ -459,7 +478,10 @@ be either FUNCALLS or TEXT." ;; If we have function calls, execute them and return the results, and ;; it talso takes care of updating the prompt. (llm-provider-utils-execute-function-calls provider prompt funcalls) - (llm-provider-append-to-prompt provider prompt text) + ;; We probably shouldn't be called if text is nil, but if we do, + ;; we shouldn't add something invalid to the prompt. + (when text + (llm-provider-append-to-prompt provider prompt text)) text)) (defun llm-provider-utils-populate-function-results (provider prompt func result) diff --git a/llm-request-plz.el b/llm-request-plz.el new file mode 100644 index 0000000000..feae69f049 --- /dev/null +++ b/llm-request-plz.el @@ -0,0 +1,169 @@ +;;; llm-request-plz.el --- Curl request handling code -*- lexical-binding: t; package-lint-main-file: "llm.el"; -*- + +;; Copyright (c) 2023 Free Software Foundation, Inc. + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 3 of the +;; License, or (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: +;; This file provides basic functions for providers who need to request data. It +;; assumes the server is using json. + +;;; Code: +(require 'cl-macs) +(require 'json) +(require 'plz-event-source) +(require 'plz-media-type) +(require 'rx) +(require 'url-http) + +(defcustom llm-request-plz-timeout nil + "The number of seconds to wait for a response from a HTTP server. + +When set to nil, don't timeout while receiving a response. +Request timings are depending on the request. Requests that need +more output may take more time, and there is other processing +besides just token generation that can take a while. Sometimes +the LLM can get stuck, and you don't want it to take too long. +This should be balanced to be good enough for hard requests but +not very long so that we can end stuck requests." + :type 'integer + :group 'llm) + +(defcustom llm-request-plz-connect-timeout 10 + "The number of seconds to wait for a connection to a HTTP server." + :type 'integer + :group 'llm) + +(defun llm-request-success (status) + "Return non-nil if STATUS is a successful HTTP status code." + (<= 200 status 299)) + +(cl-defun llm-request-plz-sync-raw-output (url &key headers data timeout) + "Make a request to URL. The raw text response will be returned. + +HEADERS will be added in the Authorization header, in addition to +standard json header. This is optional. + +DATA will be jsonified and sent as the request body. +This is required. + +TIMEOUT is the number of seconds to wait for a response." + (condition-case error + (let ((resp (plz-media-type-request + 'post url + :as `(media-types ,plz-media-types) + :body (when data + (encode-coding-string (json-encode data) 'utf-8)) + :connect-timeout llm-request-plz-connect-timeout + :headers (append headers '(("Content-Type" . "application/json"))) + :timeout (or timeout llm-request-plz-timeout)))) + (if (llm-request-success (plz-response-status resp)) + (plz-response-body resp) + (signal 'plz-http-error resp))) + (plz-error + (seq-let [error-sym message data] error + (cond + ((eq 'plz-http-error error-sym) + (let ((response (plz-error-response data))) + (error "LLM request failed with code %d: %s (additional information: %s)" + (plz-response-status response) + (nth 2 (assq (plz-response-status response) url-http-codes)) + (plz-response-body response)))) + ((and (eq 'plz-curl-error error-sym) + (eq 28 (car (plz-error-curl-error data)))) + (error "LLM request timed out")) + (t (signal error-sym (list message data)))))))) + +(cl-defun llm-request-plz-sync (url &key headers data timeout) + "Make a request to URL. The parsed response will be returned. + +HEADERS will be added in the Authorization header, in addition to +standard json header. This is optional. + +DATA will be jsonified and sent as the request body. +This is required. + +TIMEOUT is the number of seconds to wait for a response." + (llm-request-plz-sync-raw-output url + :headers headers + :data data + :timeout timeout)) + +(defun llm-request-plz--handle-error (error on-error) + "Handle the ERROR with the ON-ERROR callback." + (cond ((plz-error-curl-error error) + (let ((curl-error (plz-error-curl-error error))) + (funcall on-error 'error + (format "curl error code %d: %s" + (car curl-error) + (cdr curl-error))))) + ((plz-error-response error) + (when-let ((response (plz-error-response error)) + (status (plz-response-status response)) + (body (plz-response-body response))) + (funcall on-error 'error body))) + (t (user-error "Unexpected error: %s" error)))) + +(cl-defun llm-request-plz-async (url &key headers data on-success media-type + on-error timeout) + "Make a request to URL. +Nothing will be returned. + +HEADERS will be added in the Authorization header, in addition to +standard json header. This is optional. + +DATA will be jsonified and sent as the request body. +This is required. + +ON-SUCCESS will be called with the response body as a json +object. This is optional in the case that ON-SUCCESS-DATA is set, +and required otherwise. + +ON-ERROR will be called with the error code and a response-body. +This is required. + +MEDIA-TYPE is an optional argument that adds or overrides a media +type, useful for streaming formats. It is expected that this is +only used by other methods in this file." + (plz-media-type-request + 'post url + :as `(media-types ,(if media-type + (cons media-type plz-media-types) + plz-media-types)) + :body (when data + (encode-coding-string (json-encode data) 'utf-8)) + :connect-timeout llm-request-plz-connect-timeout + :headers (append headers + '(("Content-Type" . "application/json"))) + :then (lambda (response) + (when on-success + (funcall on-success (plz-response-body response)))) + :else (lambda (error) + (when on-error + (llm-request-plz--handle-error error on-error))) + :timeout (or timeout llm-request-plz-timeout))) + +;; This is a useful method for getting out of the request buffer when it's time +;; to make callbacks. +(defun llm-request-plz-callback-in-buffer (buf f &rest args) + "Run F with ARSG in the context of BUF. +But if BUF has been killed, use a temporary buffer instead. +If F is nil, nothing is done." + (when f + (if (buffer-live-p buf) + (with-current-buffer buf (apply f args)) + (with-temp-buffer (apply f args))))) + +(provide 'llm-request-plz) +;;; llm-request-plz.el ends here diff --git a/llm-vertex.el b/llm-vertex.el index 745609f2a1..4a4c3f6be8 100644 --- a/llm-vertex.el +++ b/llm-vertex.el @@ -25,7 +25,7 @@ (require 'cl-lib) (require 'llm) -(require 'llm-request) +(require 'llm-request-plz) (require 'llm-provider-utils) (require 'json) @@ -174,19 +174,6 @@ KEY-GENTIME keeps track of when the key was generated, because the key must be r (cl-defmethod llm-provider-extract-streamed-function-calls ((provider llm-google) response) (llm-provider-extract-function-calls provider (json-read-from-string response))) -(cl-defmethod llm-provider-extract-partial-response ((_ llm-google) response) - "Return the partial response from as much of RESPONSE as we can parse." - (with-temp-buffer - (insert response) - (let ((result "")) - ;; We just will parse every line that is "text": "..." and concatenate them. - (save-excursion - (goto-char (point-min)) - (while (re-search-forward (rx (seq (literal "\"text\": ") - (group-n 1 ?\" (* any) ?\") line-end)) nil t) - (setq result (concat result (json-read-from-string (match-string 1)))))) - result))) - (cl-defmethod llm-provider-chat-request ((_ llm-google) prompt _) (llm-provider-utils-combine-to-user-prompt prompt llm-vertex-example-prelude) (append @@ -253,8 +240,22 @@ nothing to add, in which case it is nil." (args . ,(llm-provider-utils-function-call-args fc)))))) calls))) +(cl-defmethod llm-provider-streaming-media-handler ((provider llm-google) + msg-receiver fc-receiver _) + (cons 'application/json + (plz-media-type:application/json-array + :handler + (lambda (element) + (if-let ((response (llm-provider-chat-extract-result provider element))) + (funcall msg-receiver response) + (when-let ((fc (llm-provider-extract-function-calls provider element))) + (funcall fc-receiver fc))))))) + +(cl-defmethod llm-provider-collect-streaming-function-data ((_ llm-google) data) + (car data)) + (defun llm-vertex--chat-url (provider &optional streaming) -"Return the correct url to use for PROVIDER. + "Return the correct url to use for PROVIDER. If STREAMING is non-nil, use the URL for the streaming API." (format "https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s" llm-vertex-gcloud-region diff --git a/llm.el b/llm.el index 14b5cfd715..5053c41c64 100644 --- a/llm.el +++ b/llm.el @@ -4,8 +4,8 @@ ;; Author: Andrew Hyatt <ahy...@gmail.com> ;; Homepage: https://github.com/ahyatt/llm -;; Package-Requires: ((emacs "28.1")) -;; Package-Version: 0.14.2 +;; Package-Requires: ((emacs "28.1") (plz "0.8")) +;; Package-Version: 0.14.3 ;; SPDX-License-Identifier: GPL-3.0-or-later ;; ;; This program is free software; you can redistribute it and/or @@ -519,6 +519,9 @@ methods." (kill-buffer-query-functions nil)) (kill-buffer buf))) +(cl-defmethod llm-cancel-request ((proc process)) + (delete-process proc)) + (cl-defgeneric llm-name (_) "Return the name of the model in PROVIDER. This is expected to be suitable for short labels. For example, if diff --git a/plz-event-source.el b/plz-event-source.el new file mode 100644 index 0000000000..f54ed85a5c --- /dev/null +++ b/plz-event-source.el @@ -0,0 +1,477 @@ +;;; plz-event-source.el --- Server Sent Event Source -*- lexical-binding: t; -*- + +;; Copyright (C) 2019-2023 Free Software Foundation, Inc. + +;; Author: r0man <ro...@burningswell.com> +;; Maintainer: r0man <ro...@burningswell.com> +;; URL: https://github.com/r0man/plz-event-source.el +;; Version: 0.1-pre +;; Package-Requires: ((emacs "26.3")) +;; Keywords: comm, network, http + +;; This file is part of GNU Emacs. + +;; It is temporarily vendored within the llm library. Please DO NOT +;; depend on it! It is subject to change. Once we think this package +;; is stable, we will release it to GNU ELPA. If no serious issues +;; are found, we plan to do this in Q4 of 2024. + +;;; License: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; This library provides a parser and an event source implementation +;; for the Server Sent Event (SSE) protocol. + +;; See: https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events + +;;; Code: + +(require 'cl-lib) +(require 'eieio) +(require 'plz) +(require 'plz-media-type) +(require 'rx) + +;; Event + +(defclass plz-event-source-event () + ((data + :accessor plz-event-source-event-data + :initarg :data + :initform nil + :documentation "The event data.") + (last-event-id + :accessor plz-event-source-event-last-event-id + :initarg :last-event-id + :initform nil + :documentation "The last event id." + :type (or null string)) + (origin + :accessor plz-event-source-event-origin + :initarg :origin + :initform nil + :documentation "The event origin." + :type (or null string)) + (type + :accessor plz-event-source-event-type + :initarg :type + :initform 'message + :documentation "The event type." + :type symbol)) + "The server sent event class.") + +;; Parser + +(defclass plz-event-source-parser () + ((buffer + :documentation "The name of the buffer to read events from." + :initarg :buffer + :type string) + (events + :initarg :events + :initform nil + :documentation "The queue of events to dispatch." + :type (list-of plz-event-source-event)) + (data-buffer + :initarg :data-buffer + :initform "" + :documentation "Data buffer." + :type string) + (event-type-buffer + :initarg :event-type-buffer + :initform "" + :documentation "Event type buffer." + :type string) + (last-event-id + :initarg :last-event-id + :initform "" + :documentation "Last event id." + :type string) + (last-event-id-buffer + :initarg :last-event-id-buffer + :initform "" + :documentation "Last event id buffer." + :type string) + (position + :initarg :position + :initform 0 + :type integer + :documentation "The position in the buffer." + :type integer)) + "The server sent event stream parser.") + +(defconst plz-event-source-parser--end-of-line-regexp + (rx (or "\r\n" "\n" "\r")) + "Regular expression matching the end of a line.") + +(defconst plz-event-source-parser--line-regexp + (rx (* not-newline) (or "\r\n" "\n" "\r")) + "Regular expression matching a line of the event source stream.") + +(defun plz-event-source-parser--parse-bom (line) + "Parse the Byte Order Mark (BOM) from LINE." + (if (string-prefix-p "\uFEFF" line) + (substring line 1) + line)) + +(defun plz-event-source-parser--looking-at-line-p () + "Return non-nil if the current line matches the event source line regexp." + (looking-at plz-event-source-parser--line-regexp)) + +(defun plz-event-source-parser--parse-line () + "Return non-nil if the current line matches the event source line regexp." + (when (looking-at plz-event-source-parser--line-regexp) + (string-trim-right (delete-and-extract-region (match-beginning 0) (match-end 0)) + plz-event-source-parser--end-of-line-regexp))) + +(defun plz-event-source-parser--dispatch-event (parser) + "Dispatch an event from PARSER to registered listeners." + (with-slots (data-buffer event-type-buffer events last-event-id last-event-id-buffer) parser + (setf last-event-id last-event-id-buffer) + (if (string-empty-p data-buffer) + (setf data-buffer "" + event-type-buffer "") + (progn + (setf data-buffer (string-trim-right data-buffer "\n")) + (let ((event (plz-event-source-event + :data data-buffer + :last-event-id (unless (string-blank-p last-event-id) + last-event-id) + :origin (buffer-name) + :type (if (string-blank-p event-type-buffer) + 'message + (intern event-type-buffer))))) + (setf data-buffer "" + event-type-buffer "") + (setf events (cons event events)) + event))))) + +(defun plz-event-source-parser--process-event (parser field value) + "Process the FIELD and VALUE from PARSER as a event." + (ignore field) + (with-slots (event-type-buffer) parser + (setf event-type-buffer value))) + +(defun plz-event-source-parser--process-data (parser field value) + "Process the FIELD and VALUE from PARSER as data." + (ignore field) + (with-slots (data-buffer) parser + (setf data-buffer (concat data-buffer value "\n")))) + +(defun plz-event-source-parser--process-id (parser field value) + "Process the FIELD and VALUE from PARSER as event id." + (ignore field) + (unless (string-match "\u0000" value) + (with-slots (last-event-id-buffer) parser + (setf last-event-id-buffer value)))) + +(defun plz-event-source--process-field (parser field value) + "Process the FIELD and VALUE from PARSER." + (cond ((equal "event" field) + (plz-event-source-parser--process-event parser field value)) + ((equal "data" field) + (plz-event-source-parser--process-data parser field value)) + ((equal "id" field) + (plz-event-source-parser--process-id parser field value)))) + +(defun plz-event-source-parser--process-line (parser line) + "Parse a LINE of the event stream PARSER and dispatch events." + (cond ((string-prefix-p ":" line)) + ((string-blank-p line) + (plz-event-source-parser--dispatch-event parser)) + ((string-match ":" line) + (let ((field (substring line 0 (match-beginning 0))) + (value (substring line (match-end 0)))) + (plz-event-source--process-field parser field + (if (string-prefix-p " " value) + (substring value 1) + value)))) + (t (plz-event-source--process-field parser line "")))) + +(defun plz-event-source-parser--insert (parser string) + "Insert STRING into the buffer of the event PARSER." + (with-slots (buffer events position) parser + (with-current-buffer (get-buffer buffer) + (insert string) + (while (plz-event-source-parser-parse-line parser)) + events))) + +(defun plz-event-source-parser--end-of-headers () + "Return the end of headers position in the current buffer." + (save-excursion + (goto-char (point-min)) + (re-search-forward plz-http-end-of-headers-regexp nil t) + (point))) + +(defun plz-event-source-parser-parse-line (parser) + "Parse a line from the event stream in the PARSER buffer." + (with-slots (buffer position) parser + (with-current-buffer buffer + (save-excursion + (goto-char position) + (when-let (line (plz-event-source-parser--parse-line)) + (setf position (point)) + (plz-event-source-parser--process-line parser line) + line))))) + +(defun plz-event-source-parser-parse (parser) + "Parse the event stream in the the PARSER buffer." + (with-slots (buffer handlers) parser + (with-current-buffer (get-buffer buffer) + (goto-char (point-min)) + (while (not (eobp)) + (when-let (line (plz-event-source-parser--parse-line)) + (plz-event-source-parser--process-line parser line)))))) + +;; Event Source + +(defclass plz-event-source () + ((errors + :initarg :errors + :documentation "The errors of the event source.") + (handlers + :initarg :handlers + :initform nil + :documentation "Registered event handlers.") + (last-event-id + :initarg :last-event-id + :initform "" + :documentation "Last event id.") + (options + :initarg :options + :documentation "The url of the event source." + :type list) + (ready-state + :documentation "The ready state of the event source." + :initarg :ready-state + :initform 'closed + :type (member closed connecting open)) + (url + :initarg :url + :documentation "The url of the event source." + :type (or null string))) + "The server sent event source class.") + +(cl-defgeneric plz-event-source-open (source) + "Open the event SOURCE.") + +(cl-defgeneric plz-event-source-close (source) + "Close the event SOURCE.") + +(cl-defgeneric plz-event-source--insert (source data) + "Insert DATA into the event SOURCE buffer, parse and dispatch events.") + +(defun plz-event-source-add-listener (source type listener) + "Add an event LISTENER for event TYPE to the event SOURCE." + (with-slots (handlers) source + (setf handlers (append handlers (list (cons type listener)))) + source)) + +(defun plz-event-source-remove-listener (source type listener) + "Remove an event LISTENER for event TYPE from the event SOURCE." + (with-slots (handlers) source + (setf handlers (cl-remove-if (lambda (pair) + (and (eq (car pair) type) + (eq (cdr pair) listener))) + handlers)) + source)) + +(defun plz-event-source--dispatch-event (source event) + "Dispatch the EVENT to the listeners of event SOURCE." + (with-slots (handlers) source + (dolist (pair handlers) + (when (equal (car pair) (oref event type)) + (let ((timer (timer-create))) + (timer-set-time timer (current-time)) + (timer-set-function timer + (lambda (handler event) + (with-temp-buffer + (funcall handler event))) + (list (cdr pair) event)) + (timer-activate timer)))))) + +(defun plz-event-source--dispatch-events (source events) + "Dispatch the EVENTS to the listeners of event SOURCE." + (dolist (event (reverse events)) + (plz-event-source--dispatch-event source event))) + +(defun plz-event-source--response-in-buffer-p () + "Return non-nil the if point is looking at a HTTP response." + (save-excursion + (goto-char (point-min)) + (re-search-forward plz-http-end-of-headers-regexp nil t))) + +;; Buffer event source + +(defclass plz-event-source-buffer (plz-event-source) + ((buffer + :initarg :buffer + :documentation "The event source buffer." + :type string) + (parser + :initarg :parser + :documentation "The event source parser." + :type (or null plz-event-source-parser))) + "A server sent event source using curl for HTTP.") + +(cl-defmethod plz-event-source--insert ((source plz-event-source-buffer) data) + "Insert DATA into the event SOURCE buffer, parse and dispatch events." + (with-slots (parser) source + (plz-event-source-parser--insert parser data) + (with-slots (events) parser + (plz-event-source--dispatch-events source events) + (setf events nil)))) + +(defun plz-event-source--buffer-start-position () + "Return the start position of the current buffer." + (save-excursion + (goto-char (point-min)) + (re-search-forward plz-http-end-of-headers-regexp nil t) + (point))) + +(cl-defmethod plz-event-source-open ((source plz-event-source-buffer)) + "Open a connection to the URL of the event SOURCE." + (with-slots (buffer errors options ready-state parser) source + (with-current-buffer (get-buffer-create buffer) + (let ((event (plz-event-source-event :type 'open))) + (setf ready-state 'connecting) + (setf parser (plz-event-source-parser + :buffer buffer + :position (plz-event-source--buffer-start-position))) + (setf ready-state 'open) + (plz-event-source--dispatch-event source event) + source)))) + +(cl-defmethod plz-event-source-close ((source plz-event-source-buffer)) + "Close the connection of the event SOURCE." + (with-slots (buffer ready-state) source + (let ((event (plz-event-source-event :type 'close))) + (setf ready-state 'closed) + (plz-event-source--dispatch-event source event) + source))) + +(defclass plz-event-source-http (plz-event-source) + ((process + :initarg :process + :documentation "The process of the event source." + :type (or null process)) + (response + :initarg :response + :documentation "The plz HTTP response." + :type (or null plz-response))) + "A server sent event source using curl for HTTP.") + +(defun plz-event-source--media-types (source) + "Return the media types of the event SOURCE." + (with-slots (handlers) source + (let ((media-type (plz-event-source:text/event-stream :events handlers))) + (cons (cons 'text/event-stream media-type) plz-media-types)))) + +(cl-defmethod plz-event-source-open ((source plz-event-source-http)) + "Open a connection to the URL of the event SOURCE." + (with-slots (errors options process ready-state response url) source + (setf ready-state 'connecting) + (setf response nil) + (setf process (plz-media-type-request + (or (alist-get 'method options) 'get) url + :as `(media-types ,(plz-event-source--media-types source)) + :body (alist-get 'body options) + :headers (alist-get 'headers options) + :then (lambda (object) + (setf response object)) + :else (lambda (object) + (setf errors (push object errors)) + (setf response (plz-error-response object))) + :finally (lambda () + (setf ready-state 'closed)))) + source)) + +(cl-defmethod plz-event-source-close ((source plz-event-source-http)) + "Close the connection of the event SOURCE." + (with-slots (process ready-state) source + (delete-process process) + (setf ready-state 'closed))) + +;; Content Type: text/event-stream + +(defclass plz-event-source:text/event-stream (plz-media-type:application/octet-stream) + ((coding-system :initform 'utf-8) + (type :initform 'text) + (subtype :initform 'event-stream) + (events :documentation "Association list from event type to handler." + :initarg :events + :initform nil + :type list)) + "Media type class that handles the processing of HTTP responses +in the server sent events format. The HTTP response is processed +in a streaming way. The :events slot of the class can be set to +an association list from event type symbol to a handler function. +Whenever a new event is parsed and emitted the handler for the +corresponding event type will be called with two arguments, an +instance of the underlying event source class and an event. The +body slot of the plz-response structure passed to the THEN and +ELSE callbacks will always be set to nil.") + +(defvar-local plz-event-source--current nil + "The event source of the current buffer.") + +(cl-defmethod plz-media-type-else ((_ plz-event-source:text/event-stream) error) + "Transform the ERROR into a format suitable for MEDIA-TYPE." + (let* ((source plz-event-source--current) + (event (plz-event-source-event :type 'error :data error))) + (plz-event-source-close source) + (plz-event-source--dispatch-event source event) + error)) + +(cl-defmethod plz-media-type-process ((media-type plz-event-source:text/event-stream) process chunk) + "Process the CHUNK according to MEDIA-TYPE using PROCESS." + (unless plz-event-source--current + (let* ((response (make-plz-response + :status (plz-response-status chunk) + :headers (plz-response-headers chunk))) + (source (plz-event-source-open + (plz-event-source-buffer + :buffer (buffer-name (process-buffer process)) + :handlers (seq-map + (lambda (pair) + (let ((type (car pair)) + (handler (cdr pair))) + (cond + ((equal 'open type) + (cons type (lambda (event) + (setf (oref event data) response) + (funcall handler event)))) + ((equal 'close type) + (cons type (lambda (event) + (setf (oref event data) response) + (funcall handler event)))) + (t pair)))) + (oref media-type events)))))) + (setq-local plz-event-source--current source))) + (let ((body (plz-media-type-decode-coding-string media-type (plz-response-body chunk)))) + (plz-event-source--insert plz-event-source--current body) + (set-marker (process-mark process) (point)))) + +(cl-defmethod plz-media-type-then ((media-type plz-event-source:text/event-stream) response) + "Transform the RESPONSE into a format suitable for MEDIA-TYPE." + (plz-event-source-close plz-event-source--current) + (cl-call-next-method media-type response) + (setf (plz-response-body response) nil) + response) + +(provide 'plz-event-source) +;;; plz-event-source.el ends here diff --git a/plz-media-type.el b/plz-media-type.el new file mode 100644 index 0000000000..0421cb48cf --- /dev/null +++ b/plz-media-type.el @@ -0,0 +1,712 @@ +;;; plz-media-type.el --- plz content types -*- lexical-binding: t; -*- + +;; Copyright (C) 2019-2023 Free Software Foundation, Inc. + +;; Author: r0man <ro...@burningswell.com> +;; Maintainer: r0man <ro...@burningswell.com> +;; URL: https://github.com/r0man/plz-media-type.el +;; Version: 0.1-pre +;; Package-Requires: ((emacs "26.3")) +;; Keywords: comm, network, http + +;; This file is part of GNU Emacs. + +;; It is temporarily vendored within the llm library. Please DO NOT +;; depend on it! It is subject to change. Once we think this package +;; is stable, we will release it to GNU ELPA. If no serious issues +;; are found, we plan to do this in Q4 of 2024. + +;;; License: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; This library provides enhanced handling of MIME types for HTTP +;; requests within Emacs. It utilizes the 'plz' library for +;; networking calls, extending it to process responses based on the +;; Content-Type header. This library defines various classes and +;; methods for parsing and processing standard MIME types, including +;; JSON, XML, HTML, and binary data. It allows for extensible +;; processing of additional types through subclassing. + +;;; Code: + +;;;; Requirements + +(require 'cl-lib) +(require 'eieio) +(require 'plz) + +(defclass plz-media-type () + ((coding-system + :documentation "The coding system to use for the media type." + :initarg :coding-system + :initform nil + :type (or null symbol)) + (type + :documentation "The media type." + :initarg :type + :type symbol) + (subtype + :documentation "The media subtype." + :initarg :subtype + :type symbol) + (parameters + :documentation "The parameters of the media type." + :initarg :parameters + :initform nil + :type list)) + "A class that hold information about the type, subtype and +parameters of a media type. It is meant to be sub-classed to +handle the processing of different media types and supports the +processing of streaming and non-streaming HTTP responses. The +response will be decoded with the coding-system of the charset +parameter in the content type header, or the coding-sytem of the +media type. If the coding system of a media type is nil, the +response will not be decoded.") + +(cl-defgeneric plz-media-type-else (media-type error) + "Transform and handle the ERROR according to MEDIA-TYPE.") + +(cl-defgeneric plz-media-type-then (media-type response) + "Transform and handle the RESPONSE according to MEDIA-TYPE.") + +(cl-defgeneric plz-media-type-process (media-type process chunk) + "Process the CHUNK according to MEDIA-TYPE using PROCESS.") + +(cl-defmethod plz-media-type-else ((_ (eql nil)) error) + "Transform and handle the ERROR according to MEDIA-TYPE." + error) + +(defun plz-media-type-charset (media-type) + "Return the character set of the MEDIA-TYPE." + (with-slots (parameters) media-type + (alist-get "charset" parameters nil nil #'equal))) + +(defun plz-media-type-coding-system (media-type) + "Return the coding system of the MEDIA-TYPE." + (if-let (charset (plz-media-type-charset media-type)) + (coding-system-from-name charset) + (oref media-type coding-system))) + +(defun plz-media-type-decode-coding-string (media-type string) + "Decode STRING which is encoded in the coding system of MEDIA-TYPE." + (if-let (coding-system (plz-media-type-coding-system media-type)) + (decode-coding-string string coding-system) + string)) + +(defun plz-media-type-name (media-type) + "Return the name of the MEDIA-TYPE as a string." + (with-slots (type subtype) media-type + (format "%s/%s" type subtype))) + +(defun plz-media-type-symbol (media-type) + "Return the name of the MEDIA-TYPE as a symbol." + (intern (plz-media-type-name media-type))) + +(defun plz-media-type-of-response (media-types response) + "Lookup the content type of RESPONSE in MEDIA-TYPES." + (let ((media-type (plz-media-type--content-type response))) + (clone (plz-media-type--find media-types media-type) + :parameters (oref media-type parameters)))) + +(defun plz-media-type--parse (header) + "Parse the Content-Type HEADER and return a `plz-media-type' instance." + (unless (or (null header) (string-blank-p header)) + (let* ((components (split-string header ";")) + (mime-type (string-trim (car components))) + (parameters-list (cdr components)) + (parameters-alist '())) + (dolist (param parameters-list parameters-alist) + (let* ((key-value (split-string param "=")) + (key (string-trim (car key-value))) + (value (string-trim (cadr key-value) "\""))) + (setq parameters-alist (cons (cons key value) parameters-alist)))) + (let ((parts (split-string mime-type "/"))) + (plz-media-type + :type (intern (car parts)) + :subtype (intern (cadr parts)) + :parameters (nreverse parameters-alist)))))) + +(defun plz-media-type--content-type (response) + "Return the content type header of RESPONSE, or nil if it's not set." + (let ((headers (plz-response-headers response))) + (when-let (header (cdr (assoc 'content-type headers))) + (plz-media-type--parse header)))) + +(defun plz-media-type--find (media-types media-type) + "Lookup the MEDIA-TYPE in MEDIA-TYPES." + (or (alist-get (plz-media-type-symbol media-type) media-types) + (alist-get t media-types) + (plz-media-type:application/octet-stream))) + +(defvar-local plz-media-type--current nil + "The media type of the process buffer.") + +(defvar-local plz-media-type--position nil + "The position in the process buffer.") + +(defvar-local plz-media-type--response nil + "The response of the process buffer.") + +(defun plz-media-type--schedule (handler messages) + "Schedule MESSAGES to be processed with the HANDLER on a timer." + (cl-loop with time = (current-time) + for msg = (pop messages) while msg + do (let ((timer (timer-create))) + (timer-set-time timer time) + (timer-set-function timer + (lambda (handler msg) + (with-temp-buffer (funcall handler msg))) + (list handler msg)) + (timer-activate timer)))) + +(defun plz-media-type--parse-headers () + "Parse the HTTP response headers in the current buffer." + (forward-line 1) + (let ((limit (save-excursion + (re-search-forward plz-http-end-of-headers-regexp nil) + (point)))) + (cl-loop while (re-search-forward (rx bol (group (1+ (not (in ":")))) ":" (1+ blank) + (group (1+ (not (in "\r\n"))))) + limit t) + collect (cons (intern (downcase (match-string 1))) (match-string 2))))) + +(cl-defun plz-media-type--parse-response () + "Parse the response in the current buffer." + (when (re-search-forward plz-http-end-of-headers-regexp nil t) + (let ((end-of-headers (point))) + (goto-char (point-min)) + (when (looking-at plz-http-response-status-line-regexp) + (prog1 (make-plz-response + :version (string-to-number (match-string 1)) + :status (string-to-number (match-string 2)) + :headers (plz-media-type--parse-headers) + :body (buffer-substring end-of-headers (point-max))) + (goto-char end-of-headers)))))) + +(defun plz-media-type-process-filter (process media-types string) + "The process filter that handles different content types. + +PROCESS is the process. + +MEDIA-TYPES is an association list from media type to an +instance of a content type class. + +STRING which is output just received from the process." + (when (buffer-live-p (process-buffer process)) + (with-current-buffer (process-buffer process) + (let ((moving (= (point) (process-mark process)))) + (if-let (media-type plz-media-type--current) + (let ((response plz-media-type--response)) + (setf (plz-response-body response) string) + (plz-media-type-process media-type process response)) + (progn + (save-excursion + (goto-char (process-mark process)) + (insert string) + (set-marker (process-mark process) (point))) + (goto-char (point-min)) + (when-let (chunk (plz-media-type--parse-response)) + (delete-region (point) (point-max)) + (let ((media-type (plz-media-type-of-response media-types chunk))) + (setq-local plz-media-type--current media-type) + (setq-local plz-media-type--response + (make-plz-response + :headers (plz-response-headers chunk) + :status (plz-response-status chunk) + :version (plz-response-version chunk))) + (when-let (body (plz-response-body chunk)) + (when (> (length body) 0) + (setf (plz-response-body chunk) body) + (set-marker (process-mark process) (point)) + (plz-media-type-process media-type process chunk))))))) + (when moving + (goto-char (process-mark process))))))) + +;; Content Type: application/octet-stream + +(defclass plz-media-type:application/octet-stream (plz-media-type) + ((type :initform 'application) + (subtype :initform 'octet-stream)) + "Media type class that handles the processing of octet stream +HTTP responses. The media type sets the body slot of the +plz-response structure to the unmodified value of the HTTP response +body. It is used as the default media type processor.") + +(cl-defmethod plz-media-type-else + ((media-type plz-media-type:application/octet-stream) error) + "Transform the ERROR into a format suitable for MEDIA-TYPE." + (when-let (response (plz-error-response error)) + (setf (plz-error-response error) (plz-media-type-then media-type response))) + error) + +(cl-defmethod plz-media-type-then + ((media-type plz-media-type:application/octet-stream) response) + "Transform the RESPONSE into a format suitable for MEDIA-TYPE." + (ignore media-type) + (setf (plz-response-body response) (buffer-string)) + response) + +(cl-defmethod plz-media-type-process + ((media-type plz-media-type:application/octet-stream) process chunk) + "Process the CHUNK according to MEDIA-TYPE using PROCESS." + (ignore media-type) + (save-excursion + (goto-char (process-mark process)) + (insert (plz-media-type-decode-coding-string media-type (plz-response-body chunk))) + (set-marker (process-mark process) (point)))) + +;; Content Type: application/json + +(defclass plz-media-type:application/json (plz-media-type:application/octet-stream) + ((coding-system :initform 'utf-8) + (subtype :initform 'json) + (array-type + :documentation "Specifies which Lisp type is used to represent arrays. It can be +`array' (the default) or `list'." + :initarg :array-type + :initform 'array + :type symbol) + (false-object + :documentation "Specifies which object to use to represent a JSON false value. It +defaults to `:json-false'." + :initarg :false-object + :initform :json-false) + (null-object + :documentation "Specifies which object to use to represent a JSON null value. It +defaults to `nil`." + :initarg :null-object + :initform nil) + (object-type + :documentation "Specifies which Lisp type is used to represent objects. It can +be `hash-table', `alist' (the default) or `plist'." + :initarg :object-type + :initform 'alist + :type symbol)) + "Media type class that handles the processing of HTTP responses +in the JSON format. The HTTP response is processed in a +non-streaming way. After the response has been received, the +body of the plz-response structure is set to the result of parsing +the HTTP response body with the `json-parse-buffer' function. +The arguments to the `json-parse-buffer' can be customized by +making an instance of this class and setting its slots +accordingly.") + +(defun plz-media-type--parse-json-object (media-type) + "Parse the JSON object in the current buffer according to MEDIA-TYPE." + (with-slots (array-type false-object null-object object-type) media-type + (json-parse-buffer :array-type array-type + :false-object false-object + :null-object null-object + :object-type object-type)) ) + +(cl-defmethod plz-media-type-then + ((media-type plz-media-type:application/json) response) + "Transform the RESPONSE into a format suitable for MEDIA-TYPE." + (setf (plz-response-body response) (plz-media-type--parse-json-object media-type)) + response) + +;; Content Type: application/json (array of objects) + +(defclass plz-media-type:application/json-array (plz-media-type:application/json) + ((handler + :documentation "Function that will be called for each object in the JSON array." + :initarg :handler + :type (or function symbol))) + "Media type class that handles the processing of HTTP responses +in a JSON format that assumes that the object at the top level is +an array. The HTTP response is processed in a streaming way. +Each object in the top level array will be parsed with the +`json-parse-buffer' function. The function in the :handler slot +will be called each time a new object arrives. The body slot of +the plz-response structure passed to the THEN and ELSE callbacks +will always be set to nil.") + +(defun plz-media-type:application/json-array--parse-next (media-type) + "Parse a single line of the newline delimited JSON MEDIA-TYPE." + (let ((begin (point))) + (cond ((looking-at "\\[") + (forward-char 1) + (cons :array-start (buffer-substring begin (point)))) + ((looking-at ",") + (forward-char 1) + (cons :comma (buffer-substring begin (point)))) + ((looking-at "\n") + (forward-char 1) + (cons :line-feed (buffer-substring begin (point)))) + ((looking-at "\r") + (forward-char 1) + (cons :carriage-return (buffer-substring begin (point)))) + ((looking-at "\\]") + (forward-char 1) + (cons :array-end (buffer-substring begin (point)))) + ((not (eobp)) + (condition-case nil + (cons :array-element (plz-media-type--parse-json-object media-type)) + (json-error)))))) + +(defun plz-media-type:application/json-array--consume-next (media-type) + "Parse a single line of the newline delimited JSON MEDIA-TYPE." + (let ((begin (point))) + (prog1 (plz-media-type:application/json-array--parse-next media-type) + (delete-region begin (point)) + (setq-local plz-media-type--position (point))))) + +(defun plz-media-type:application/json-array--parse-stream (media-type) + "Parse all lines of the newline delimited JSON MEDIA-TYPE in the PROCESS buffer." + (let ((objects)) + (unless plz-media-type--position + (setq-local plz-media-type--position (point))) + (goto-char plz-media-type--position) + (when-let (result (plz-media-type:application/json-array--consume-next media-type)) + (while result + (when (equal :array-element (car result)) + (push (cdr result) objects)) + (setq result (plz-media-type:application/json-array--consume-next media-type)))) + objects)) + +(cl-defmethod plz-media-type-process + ((media-type plz-media-type:application/json-array) process chunk) + "Process the CHUNK according to MEDIA-TYPE using PROCESS." + (cl-call-next-method media-type process chunk) + (with-slots (handler) media-type + (let ((objects (plz-media-type:application/json-array--parse-stream media-type))) + (set-marker (process-mark process) (point-max)) + (plz-media-type--schedule handler objects)))) + +(cl-defmethod plz-media-type-then + ((media-type plz-media-type:application/json-array) response) + "Transform the RESPONSE into a format suitable for MEDIA-TYPE." + (ignore media-type) + (setf (plz-response-body response) nil) + response) + +;; Content Type: application/x-ndjson + +(defclass plz-media-type:application/x-ndjson (plz-media-type:application/json) + ((subtype :initform 'x-ndjson) + (handler + :documentation "Function that will be called for each line that contains a JSON object." + :initarg :handler + :initform nil + :type (or function null symbol))) + "Media type class that handles the processing of HTTP responses +in a JSON format that assumes that the object at the top level is +an array. The HTTP response is processed in a streaming way. +Each object in the top level array will be parsed with the +`json-parse-buffer' function. The function in the :handler slot +will be called each time a new object arrives. The body slot of +the plz-response structure passed to the THEN and ELSE callbacks +will always be set to nil.") + +(defconst plz-media-type:application/x-ndjson--line-regexp + (rx (* not-newline) (or "\r\n" "\n" "\r")) + "Regular expression matching a JSON Object line.") + +(defun plz-media-type:application/x-ndjson--parse-line (media-type) + "Parse a single line of the newline delimited JSON MEDIA-TYPE." + (when (looking-at plz-media-type:application/x-ndjson--line-regexp) + (prog1 (plz-media-type--parse-json-object media-type) + (delete-region (match-beginning 0) (match-end 0))))) + +(defun plz-media-type:application/x-ndjson--parse-stream (media-type) + "Parse all lines of the newline delimited JSON MEDIA-TYPE in the PROCESS buffer." + (with-slots (handler) media-type + (let (objects) + (unless plz-media-type--position + (setq-local plz-media-type--position (point))) + (goto-char plz-media-type--position) + (when-let (object (plz-media-type:application/x-ndjson--parse-line media-type)) + (while object + (setq-local plz-media-type--position (point)) + (push object objects) + (setq object (plz-media-type:application/x-ndjson--parse-line media-type)))) + objects))) + +(cl-defmethod plz-media-type-process + ((media-type plz-media-type:application/x-ndjson) process chunk) + "Process the CHUNK according to MEDIA-TYPE using PROCESS." + (cl-call-next-method media-type process chunk) + (with-slots (handler) media-type + (let ((objects (plz-media-type:application/x-ndjson--parse-stream media-type))) + (plz-media-type--schedule handler objects)))) + +(cl-defmethod plz-media-type-then + ((media-type plz-media-type:application/x-ndjson) response) + "Transform the RESPONSE into a format suitable for MEDIA-TYPE." + (ignore media-type) + (setf (plz-response-body response) nil) + response) + +;; Content Type: application/xml + +(defclass plz-media-type:application/xml (plz-media-type:application/octet-stream) + ((coding-system :initform 'utf-8) + (subtype :initform 'xml)) + "Media type class that handles the processing of HTTP responses +in the XML format. The HTTP response is processed in a +non-streaming way. After the response has been received, the +body of the plz-response structure is set to the result of parsing +the HTTP response body with the `libxml-parse-html-region' +function.") + +(cl-defmethod plz-media-type-then + ((media-type plz-media-type:application/xml) response) + "Transform the RESPONSE into a format suitable for MEDIA-TYPE." + (with-slots (array-type false-object null-object object-type) media-type + (setf (plz-response-body response) + (libxml-parse-html-region (point-min) (point-max) nil)) + response)) + +;; Content Type: text/html + +(defclass plz-media-type:text/html (plz-media-type:application/xml) + ((type :initform 'text) + (subtype :initform 'html)) + "Media type class that handles the processing of HTTP responses +in the HTML format. The HTTP response is processed in a +non-streaming way. After the response has been received, the +body of the plz-response structure is set to the result of parsing +the HTTP response body with the `libxml-parse-html-region' +function.") + +(defclass plz-media-type:text/xml (plz-media-type:application/xml) + ((coding-system :initform 'us-ascii) + (type :initform 'text) + (subtype :initform 'xml)) + "Media type class that handles the processing of HTTP responses +in the HTML format. The HTTP response is processed in a +non-streaming way. After the response has been received, the +body of the plz-response structure is set to the result of +parsing the HTTP response body with the +`libxml-parse-html-region' function.") + +(defvar plz-media-types + `((application/json . ,(plz-media-type:application/json)) + (application/octet-stream . ,(plz-media-type:application/octet-stream)) + (application/xml . ,(plz-media-type:application/xml)) + (text/html . ,(plz-media-type:text/html)) + (text/xml . ,(plz-media-type:text/xml)) + (t . ,(plz-media-type:application/octet-stream))) + "Association list from media type to content type.") + +(defun plz-media-type--handle-sync-http-error (error media-types) + "Handle the synchronous HTTP ERROR using MEDIA-TYPES." + (let* ((msg (cadr error)) + (plzerror (caddr error))) + (signal (car error) + (cond + ((plz-error-response plzerror) + (let ((response (plz-error-response plzerror))) + (if-let (media-type (plz-media-type-of-response media-types response)) + (list msg (with-temp-buffer + (when-let (body (plz-response-body response)) + (insert body) + (goto-char (point-min))) + (plz-media-type-else media-type plzerror))) + (cdr error)))))))) + +(defun plz-media-type--handle-sync-error (error media-types) + "Handle the synchronous ERROR using MEDIA-TYPES." + (cond + ((eq 'plz-http-error (car error)) + (plz-media-type--handle-sync-http-error error media-types)) + (t (signal (car error) (cdr error))))) + +(defun plz-media-type--handle-sync-response (buffer) + "Handle a successful synchronous response in BUFFER." + (unwind-protect + (with-current-buffer buffer + (plz-media-type-then plz-media-type--current plz-media-type--response)) + (when (buffer-live-p buffer) + (kill-buffer buffer)))) + +(cl-defun plz-media-type-request + (method + url + &rest rest &key headers body else finally noquery + (as 'string) + (body-type 'text) + (connect-timeout plz-connect-timeout) + (decode t decode-s) + (then 'sync) + (timeout plz-timeout)) + "Request METHOD from URL with curl. + +This function works in a similar way as the `plz' function, with +the additional functionality of handling streaming and +non-streaming media types with the :as (media-types MEDIA-TYPES) +option. Setting a process :filter by the user is not supported. +Instead this function will always install its own process filter +that will process the response until the HTTP headers arrived. +Once the headers arrived it will hand over control to a media +type based on the content type header of the response. The media +type is responsible for processing the HTTP body. + +Return the curl process object or, for a synchronous request, the +selected result. + +HEADERS may be an alist of extra headers to send with the +request. + +BODY may be a string, a buffer, or a list like `(file FILENAME)' +to upload a file from disk. + +BODY-TYPE may be `text' to send BODY as text, or `binary' to send +it as binary. + +AS selects the kind of result to pass to the callback function +THEN, or the kind of result to return for synchronous requests. +It may be: + +- `buffer' to pass the response buffer, which will be narrowed to + the response body and decoded according to DECODE. + +- `binary' to pass the response body as an un-decoded string. + +- `string' to pass the response body as a decoded string. + +- `response' to pass a `plz-response' structure. + +- `file' to pass a temporary filename to which the response body + has been saved without decoding. + +- `(file FILENAME)' to pass FILENAME after having saved the + response body to it without decoding. FILENAME must be a + non-existent file; if it exists, it will not be overwritten, + and an error will be signaled. + +- `(media-types MEDIA-TYPES)' to handle the processing of the + response based on the Content-Type header. MEDIA-TYPES is an + association list from a content type symbol to an instance of a + `plz-media-type' class. The `plz-media-types' variable is + bound to an association list and can be used to handle some + commonly used formats such as JSON, HTML, XML. This list can + be used as a basis and is meant to be extended by users. If no + media type was found for a content type, it will be handled by + the default octet stream media type. When this option is used, + the THEN callback will always receive a plz-response structure as + argument, and the ELSE callback always a plz-error structure. The + plz-response structure will always have the status and header + slots set. The body slot depends on the media type + implementation. In the case for JSON, HTML, XML it will + contain the decoded response body. When receiving JSON for + example, it will be an Emacs Lisp association list. For + streaming responses like text/event-stream it will be set to + nil, and the events of the server sent events specification + will be dispatched to the handlers registered with the media + type instance. + +- A function, which is called in the response buffer with it + narrowed to the response body (suitable for, e.g. `json-read'). + +If DECODE is non-nil, the response body is decoded automatically. +For binary content, it should be nil. When AS is `binary', +DECODE is automatically set to nil. + +THEN is a callback function, whose sole argument is selected +above with AS; if the request fails and no ELSE function is +given (see below), the argument will be a `plz-error' structure +describing the error. Or THEN may be `sync' to make a +synchronous request, in which case the result is returned +directly from this function. + +ELSE is an optional callback function called when the request +fails (i.e. if curl fails, or if the HTTP response has a non-2xx +status code). It is called with one argument, a `plz-error' +structure. If ELSE is nil, a `plz-curl-error' or +`plz-http-error' is signaled when the request fails, with a +`plz-error' structure as the error data. For synchronous +requests, this argument is ignored. + +NOTE: In v0.8 of `plz', only one error will be signaled: +`plz-error'. The existing errors, `plz-curl-error' and +`plz-http-error', inherit from `plz-error' to allow applications +to update their code while using v0.7 (i.e. any `condition-case' +forms should now handle only `plz-error', not the other two). + +FINALLY is an optional function called without argument after +THEN or ELSE, as appropriate. For synchronous requests, this +argument is ignored. + +CONNECT-TIMEOUT and TIMEOUT are a number of seconds that limit +how long it takes to connect to a host and to receive a response +from a host, respectively. + +NOQUERY is passed to `make-process', which see. + +When the HTTP response is streamed, the buffering in the curl +output stream is turned off and the PROCESS-FILTER may be called +multiple times, depending on the size of the HTTP body. It is +the user's responsibility to understand and process each chunk, +and to construct the finalized response if necessary. There are +no guarantees regarding the chunk, such as being line-based or +not. +\(To silence checkdoc, we mention the internal argument REST.)" + ;; FIXME(v0.8): Remove the note about error changes from the docstring. + ;; FIXME(v0.8): Update error signals in docstring. + (declare (indent defun)) + (if-let (media-types (pcase as + (`(media-types ,media-types) + media-types))) + (let ((buffer)) + (condition-case error + (let* ((plz-curl-default-args (cons "--no-buffer" plz-curl-default-args)) + (result (plz method url + :as 'buffer + :body body + :body-type body-type + :connect-timeout connect-timeout + :decode decode + :else (lambda (error) + (setq buffer (current-buffer)) + (when (or (functionp else) (symbolp else)) + (funcall else (plz-media-type-else + plz-media-type--current + error)))) + :finally (lambda () + (unwind-protect + (when (functionp finally) + (funcall finally)) + (when (buffer-live-p buffer) + (kill-buffer buffer)))) + :headers headers + :noquery noquery + :filter (lambda (process chunk) + (plz-media-type-process-filter process media-types chunk)) + :timeout timeout + :then (if (symbolp then) + then + (lambda (_) + (setq buffer (current-buffer)) + (when (or (functionp then) (symbolp then)) + (funcall then (plz-media-type-then + plz-media-type--current + plz-media-type--response)))))))) + (cond ((bufferp result) + (plz-media-type--handle-sync-response result)) + ((processp result) + result) + (t (user-error "Unexpected response: %s" result)))) + ;; TODO: How to kill the buffer for sync requests that raise an error? + (plz-error (plz-media-type--handle-sync-error error media-types)))) + (apply #'plz (append (list method url) rest)))) + +;;;; Footer + +(provide 'plz-media-type) + +;;; plz-media-type.el ends here