branch: externals/llm
commit abcb5b1a27bbc1888d9908db8bcaf3a2a1e83de4
Author: Andrew Hyatt <[email protected]>
Commit: GitHub <[email protected]>
Add Kimi k2.5, glm-5 models, modernize default context length (#247)
---
NEWS.org | 3 +++
llm-models.el | 12 +++++++++++-
llm-ollama.el | 6 +++++-
llm-provider-utils.el | 6 ++++--
llm-test.el | 27 +++++++++------------------
5 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index 30890af8c2..548f651d11 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -4,6 +4,9 @@
- Add Claude Opus 4.6
- Fix bug running two async calls in parallel
- Set Gemini default to 3.0 pro
+- Added Kimi k2.5 and GLM-5
+- Increased the default context length for unknown models to be more up to date
+- Allow Ollama authed keys to be functions
* Version 0.28.5
- Improved the tool calling docs
- Fix for running tools in the original buffer with streaming
diff --git a/llm-models.el b/llm-models.el
index a53a1284c4..0575bb5f27 100644
--- a/llm-models.el
+++ b/llm-models.el
@@ -387,7 +387,17 @@ REGEX is a regular expression that can be used to identify
the model, uniquely (
:name "gpt-oss" :symbol 'gpt-oss
:capabilities '(generation free-software reasoning tool-use) ; Apache
license
:context-length 128000
- :regex "gpt-oss")))
+ :regex "gpt-oss")
+ (make-llm-model
+ :name "Kimi K2.5" :symbol 'kimi-k2.5
+ :capabilities '(generation free-software reasoning tool-use) ; Modified
MIT license
+ :context-length 256000
+ :regex "kimi-k2\\.5")
+ (make-llm-model
+ :name "glm-5" :symbol 'glm-5
+ :capabilities '(generation free-software reasoning tool-use) ; Apache
license
+ :context-length 200000
+ :regex "glm-5")))
(defun llm-models-by-symbol (symbol)
"Return the model with SYMBOL."
diff --git a/llm-ollama.el b/llm-ollama.el
index 7d1ad7f551..3ee516bd07 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -68,7 +68,11 @@ EMBEDDING-MODEL is the model to use for embeddings. It is
required."
key)
(cl-defmethod llm-provider-headers ((provider llm-ollama-authed))
- `(("Authorization" . ,(format "Bearer %s" (encode-coding-string
(llm-ollama-authed-key provider) 'utf-8)))))
+ `(("Authorization" . ,(format "Bearer %s" (encode-coding-string
+ (if (functionp
(llm-ollama-authed-key provider))
+ (funcall
(llm-ollama-authed-key provider))
+ (llm-ollama-authed-key
provider))
+ 'utf-8)))))
;; Ollama's models may or may not be free, we have no way of knowing. There's
no
;; way to tell, and no ToS to point out here.
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index 98ff407a1d..99e34cb186 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -572,12 +572,14 @@ conversation history will follow."
(defun llm-provider-utils-model-token-limit (model &optional default)
"Return the token limit for MODEL.
-If MODEL cannot be found, warn and return DEFAULT, which by default is 4096."
+If MODEL cannot be found, warn and return DEFAULT, which by default is
+128000."
(let ((matched-model (llm-models-match model)))
(if matched-model
(llm-model-context-length matched-model)
(warn "No model predefined for model %s, using restrictive defaults"
model)
- (or default 4096))))
+ ;; 128000 chosen to be a lower bound on reasonably up to date models.
+ (or default 128000))))
(defun llm-provider-utils--encolon (s)
"Turn S into a symbol preceded by a colon."
diff --git a/llm-test.el b/llm-test.el
index 2e163396c2..ae56c366cf 100644
--- a/llm-test.el
+++ b/llm-test.el
@@ -298,17 +298,8 @@
(ert-info ((format "Testing %s" model))
(should (equal limit (token-limit-for model))))))
;; From https://platform.openai.com/docs/models/gpt-3-5
- (should-have-token-limit "gpt-3.5-turbo" 16385)
- (should-have-token-limit "gpt-3.5-turbo-instruct" 4096)
- (should-have-token-limit "unknown" 4096)
- ;; From https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
- (should-have-token-limit "gpt-4" 8192)
- (should-have-token-limit "gpt-4-0613" 8192)
- ;; I couldn't find documentation on this, but the token limit is actually
- ;; 30k instead of 128k for most customers.
- (should-have-token-limit "gpt-4o" 30000)
- (should-have-token-limit "gpt-4o-mini" 30000)
- (should-have-token-limit "unknown" 4096)))
+ (should-have-token-limit "gpt-5.2" 400000)
+ (should-have-token-limit "unknown" 128000)))
(ert-deftest llm-test-capabilities-openai-compatible ()
(should-not (member 'tool-use (llm-capabilities (make-llm-openai-compatible
:chat-model "llama-3"))))
@@ -318,21 +309,21 @@
(ert-deftest llm-test-chat-token-limit-gemini ()
(should (= 1048576 (llm-chat-token-limit (make-llm-gemini))))
(should (= 1048576 (llm-chat-token-limit
- (make-llm-gemini :chat-model "gemini-1.5-flash"))))
- (should (= 4096 (llm-chat-token-limit
- (make-llm-vertex :chat-model "unknown")))))
+ (make-llm-gemini :chat-model "gemini-3-flash"))))
+ (should (= 128000 (llm-chat-token-limit
+ (make-llm-vertex :chat-model "unknown")))))
(ert-deftest llm-test-capabilities-gemini ()
(should-not (member 'tool-use (llm-capabilities (make-llm-gemini :chat-model
"llama-3"))))
- (should (member 'tool-use (llm-capabilities (make-llm-gemini :chat-model
"gemini-1.5-flash")))))
+ (should (member 'tool-use (llm-capabilities (make-llm-gemini :chat-model
"gemini-3-pro")))))
(ert-deftest llm-test-chat-token-limit-vertex ()
;; The default is Gemini 2.5 Pro, which has a token limit of 1048576.
(should (= 1048576 (llm-chat-token-limit (make-llm-vertex))))
(should (= 1048576 (llm-chat-token-limit
- (make-llm-gemini :chat-model "gemini-2.5-flash"))))
- (should (= 4096 (llm-chat-token-limit
- (make-llm-vertex :chat-model "unknown")))))
+ (make-llm-gemini :chat-model "gemini-3-flash"))))
+ (should (= 128000 (llm-chat-token-limit
+ (make-llm-vertex :chat-model "unknown")))))
(ert-deftest llm-test-chat-token-limit-ollama ()
;; The code is straightforward, so no need to test all the models.