branch: externals/llm
commit 4058691d3f9cb8324c63212260728cc7b7cc1699
Author: Hraban <[email protected]>
Commit: GitHub <[email protected]>
fix: OpenAI API keys passed as multibyte strings (#44)
Emacs has two types of strings: multibyte and unibyte. The request library
is
essentially a giant ‘concat’ call, which converts the entire result to
multibyte
if any single component is multibyte, including the headers. Even if you
encoded
the body: that effect will be spoiled by a single multibyte header string.
This
is regardless of the header actually containing multibyte characters: while
an
Emacs string literal containing only simple characters will be unibyte, an
API
key fetched from an external source will often be multibyte,
e.g. ‘shell-command-to-string’.
Example:
(dolist (x (list
"x"
(shell-command-to-string "printf x")
(encode-coding-string (shell-command-to-string "printf x")
'utf-8)))
(let ((s (concat x (encode-coding-string "é" 'utf-8))))
(message
"%S: %s(%s) %s, %s"
s
(multibyte-string-p s)
(multibyte-string-p x)
(string-bytes s)
(length s))))
Output:
"x\303\251": nil(nil) 3, 3
"x\303\251": t(t) 5, 3
"x\303\251": nil(nil) 3, 3
And:
(multibyte-string-p "foo") ; NIL
(multibyte-string-p "fôo") ; T
---
llm-openai.el | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/llm-openai.el b/llm-openai.el
index 40f71be9e6..6020acba1a 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -91,8 +91,13 @@ MODEL is the embedding model to use, or nil to use the
default.."
"Return the headers to use for a request from PROVIDER.")
(cl-defmethod llm-openai--headers ((provider llm-openai))
- (when (llm-openai-key provider)
- `(("Authorization" . ,(format "Bearer %s" (llm-openai-key provider))))))
+ (when-let ((key (llm-openai-key provider)))
+ ;; Encode the API key to ensure it is unibyte. The request library gets
+ ;; confused by multibyte headers, which turn the entire body multibyte if
+ ;; there’s a non-ascii character, regardless of encoding. And API keys are
+ ;; likely to be obtained from external sources like
shell-command-to-string,
+ ;; which always returns multibyte.
+ `(("Authorization" . ,(format "Bearer %s" (encode-coding-string key
'utf-8))))))
(cl-defmethod llm-provider-headers ((provider llm-openai))
(llm-openai--headers provider))