branch: elpa/gptel
commit dac4bc04417531dee49b33e8c2ff30816217292e
Author: Karthik Chikmagalur <[email protected]>
Commit: Karthik Chikmagalur <[email protected]>
gptel-request: Add markdown link validation and annotation
Add link annotation functions for gptel-mode in Markdown
buffers. This brings gptel's Markdown buffer support on par with
Org, but neither is plugged into gptel-mode yet.
* gptel-request.el (gptel-markdown--link-regex): Regex covering
recognized Markdown link syntax, extracted from a function into a
constant.
(gptel-markdown-validate-link): New function (defsubst) to enforce
a single path (narrow waist) for all Markdown link checks in gptel
buffers.
(gptel--parse-media-links): Use `gptel-markdown-validate-link'.
(gptel--link-standalone-p): Change calling convention to accept a
Markdown link instead of link bounds. Using the bounds is indeed
a better way to do it, but it doesn't fit the API of
`gptel-markdown-validate-link'.
* gptel.el (gptel-markdown--annotate-links): New function to
annotate links that will be sent. Not yet hooked up to jit-lock.
---
gptel-request.el | 137 ++++++++++++++++++++++++++++++++++++++-----------------
gptel.el | 30 ++++++++++++
2 files changed, 126 insertions(+), 41 deletions(-)
diff --git a/gptel-request.el b/gptel-request.el
index 4295aaa7f6c..8a239927751 100644
--- a/gptel-request.el
+++ b/gptel-request.el
@@ -51,6 +51,7 @@
(require 'text-property-search)
(require 'cl-generic)
(require 'map)
+(require 'mailcap) ;FIXME Avoid this somehow
(declare-function json-read "json" ())
(defvar json-object-type)
@@ -740,6 +741,28 @@ buffer-locally, or let-bind it around calls to gptel
queries, or via
gptel presets."
:type '(repeat string))
+(defcustom gptel-markdown-validate-link #'gptel--link-standalone-p
+ "Validate links to be sent as context with gptel queries.
+
+When `gptel-track-media' is enabled, this option determines if a
+supported link will be followed and its source included with gptel
+queries from Markdown buffers. Currently only links to files are
+supported (along with web URLs if the model supports them).
+
+It should be a function that accepts a Markdown link and return non-nil
+if the link should be followed. See `markdown-link-at-pos' for the
+structure of a Markdown link object.
+
+By default, links are considered valid if they are placed on a line by
+themselves, separated from surrounding text. This is to ensure that
+links to be sent are intentionally placed. You can set it to the
+function `always' to try to send all links."
+ :type '(choice
+ (const :tag "Standalone links" gptel--link-standalone-p)
+ (const :tag "All links" always)
+ (function :tag "Function"))
+ :group 'gptel)
+
(defvar gptel--request-alist nil
"Alist of active gptel requests.
Each entry has the form (PROCESS . (FSM ABORT-CLOSURE))
@@ -785,6 +808,16 @@ See `gptel-backend'."
file path to a cons cell of the form (t . binaryp), where binaryp is
non-nil if the file is binary-encoded.")
+;; The following is derived from:
+;;
+;; (concat "\\(?:" markdown-regex-link-inline "\\|" markdown-regex-angle-uri
"\\)")
+;;
+;; Since we want this known at compile time, when markdown-mode is not
+;; guaranteed to be available, we have to hardcode it.
+(defconst gptel-markdown--link-regex
+
"\\(?:\\(?1:!\\)?\\(?2:\\[\\)\\(?3:\\^?\\(?:\\\\\\]\\|[^]]\\)*\\|\\)\\(?4:\\]\\)\\(?5:(\\)\\s-*\\(?6:[^)]*?\\)\\(?:\\s-+\\(?7:\"[^\"]*\"\\)\\)?\\s-*\\(?8:)\\)\\|\\(<\\)\\([a-z][a-z0-9.+-]\\{1,31\\}:[^]
\n
<>,;()]+\\)\\(>\\)\\)"
+ "Link regex for gptel-mode in Markdown mode.")
+
;;; Utility functions
@@ -952,18 +985,19 @@ Return nil if string collapses to empty string."
(unless (string-empty-p trimmed)
trimmed)))
-(defsubst gptel--link-standalone-p (beg end)
- "Return non-nil if positions BEG and END are isolated.
+(defun gptel--link-standalone-p (link)
+ "Return non-nil if Markdown LINK is isolated.
-This means the extent from BEG to END is the only non-whitespace
-content on this line."
- (save-excursion
- (and (= beg (progn (goto-char beg) (beginning-of-line)
- (skip-chars-forward "\t ")
- (point)))
- (= end (progn (goto-char end) (end-of-line)
- (skip-chars-backward "\t ")
- (point))))))
+This means the extent from the link beginning to end is the only
+non-whitespace content on its line."
+ (let ((beg (car link)) (end (cadr link)))
+ (save-excursion
+ (and (= beg (progn (goto-char beg) (beginning-of-line)
+ (skip-chars-forward "\t ")
+ (point)))
+ (= end (progn (goto-char end) (end-of-line)
+ (skip-chars-backward "\t ")
+ (point)))))))
(defsubst gptel--curl-path ()
"Curl executable to use."
@@ -2199,11 +2233,37 @@ or
(list `(:text ,(buffer-substring-no-properties
beg end))))
-(defvar markdown-regex-link-inline)
-(defvar markdown-regex-angle-uri)
(declare-function markdown-link-at-pos "markdown-mode")
(declare-function mailcap-file-name-to-mime-type "mailcap")
+(defsubst gptel-markdown--validate-link (link)
+ "Validate a Markdown LINK as sendable under the current gptel settings.
+
+Return a form (validp link-type path . REST), where REST is a list
+explaining why sending the link is not supported by gptel. Only the
+first nil value in REST is guaranteed to be correct."
+ (let ((mime))
+ (if-let* ((path (nth 3 link))
+ (prefix (or (string-search "://" path) 0))
+ (type (if (= prefix 0) "file" (substring path 0 prefix)))
+ (path (if (= prefix 0) path (substring path (+ prefix 3))))
+ (filep (member type `("file" ,@(and (gptel--model-capable-p 'url)
+ '("http" "https" "ftp")))))
+ (placementp (funcall gptel-markdown-validate-link link))
+ (readablep (or (member type '("http" "https" "ftp"))
+ (file-remote-p path)
+ (file-readable-p path)))
+ (supportedp
+ (or (not (cdr (with-memoization
+ (alist-get (expand-file-name path)
+ gptel--link-type-cache
+ nil nil #'string=)
+ (cons t (gptel--file-binary-p path)))))
+ (gptel--model-mime-capable-p
+ (setq mime (mailcap-file-name-to-mime-type path))))))
+ (list t type path filep placementp readablep supportedp mime)
+ (list nil type path filep placementp readablep supportedp mime))))
+
(cl-defmethod gptel--parse-media-links ((_mode (eql 'markdown-mode)) beg end)
"Parse text and actionable links between BEG and END.
@@ -2212,39 +2272,34 @@ Return a list of the form
(:media \"/path/to/media.png\" :mime \"image/png\")
(:text \"More text\"))
for inclusion into the user prompt for the gptel request."
- (require 'mailcap) ;FIXME Avoid this somehow
- (let ((parts) (from-pt) (mime))
+ (let ((parts) (from-pt))
(save-excursion
(setq from-pt (goto-char beg))
- (while (re-search-forward
- (concat "\\(?:" markdown-regex-link-inline "\\|"
- markdown-regex-angle-uri "\\)")
- end t)
- (setq mime nil)
- (when-let* ((link-at-pt (markdown-link-at-pos (point)))
- ((gptel--link-standalone-p
- (car link-at-pt) (cadr link-at-pt)))
- (path (nth 3 link-at-pt))
- (path (string-remove-prefix "file://" path)))
- (cond
- ((seq-some (lambda (p) (string-prefix-p p path))
- '("https:" "http:" "ftp:"))
- ;; Collect text up to this image, and collect this image url
- (when (gptel--model-capable-p 'url) ; FIXME This is not a good
place
- ; to check for url capability!
+ (while (re-search-forward gptel-markdown--link-regex end t)
+ (let* ((link-at-pt (markdown-link-at-pos (point)))
+ (link-status (gptel-markdown--validate-link link-at-pt)))
+ (cl-destructuring-bind
+ (valid type path filep placementp readablep supportedp mime)
+ link-status
+ (cond
+ ((and valid (member type '("http" "https" "ftp")))
+ ;; Collect text up to this image, and collect this image url
(let ((text (buffer-substring-no-properties from-pt (car
link-at-pt))))
(unless (string-blank-p text) (push (list :text text) parts))
(push (list :url path :mime mime) parts)
- (setq from-pt (cadr link-at-pt)))))
- ((file-readable-p path)
- (if (or (not (gptel--file-binary-p path))
- (and (setq mime (mailcap-file-name-to-mime-type path))
- (gptel--model-mime-capable-p mime)))
- ;; Collect text up to this image, and collect this image
- (let ((text (buffer-substring-no-properties from-pt (car
link-at-pt))))
- (unless (string-blank-p text) (push (list :text text) parts))
- (push (if mime (list :media path :mime mime) (list :textfile
path)) parts)
- (setq from-pt (cadr link-at-pt)))
+ (setq from-pt (cadr link-at-pt))))
+ (valid ; Collect text up to this link, and collect this link
data
+ (let ((text (buffer-substring-no-properties from-pt (car
link-at-pt))))
+ (unless (string-blank-p text) (push (list :text text) parts))
+ (push (if mime (list :media path :mime mime) (list :textfile
path)) parts)
+ (setq from-pt (cadr link-at-pt))))
+ ((not filep)
+ (message "Link source not followed for unsupported link type
\"%s\"." type))
+ ((not placementp)
+ (message "Ignoring non-standalone link \"%s\"." path))
+ ((not readablep)
+ (message "Ignoring inaccessible file \"%s\"." path))
+ ((not supportedp)
(message "Ignoring unsupported binary file \"%s\"." path)))))))
(unless (= from-pt end)
(push (list :text (buffer-substring-no-properties from-pt end)) parts))
diff --git a/gptel.el b/gptel.el
index d9d55a67755..1bd296eb9c4 100644
--- a/gptel.el
+++ b/gptel.el
@@ -647,6 +647,36 @@ which see for BEG, END and PRE."
(add-text-properties
beg end `(gptel ,val front-sticky (gptel))))))
+(defun gptel-markdown--annotate-links (beg end)
+ "Annotate Markdown links whose sources are eligible to be sent with
`gptel-send.'
+
+Search between BEG and END."
+ (when gptel-track-media
+ (save-excursion
+ (goto-char beg) (forward-line -1)
+ (let ((link-ovs (cl-loop for o in (overlays-in (point) end)
+ if (overlay-get o 'gptel-track-media)
+ collect o into os finally return os)))
+ (while (re-search-forward gptel-markdown--link-regex end t)
+ (unless (gptel--in-response-p (1- (point)))
+ (let* ((link (markdown-link-at-pos (point)))
+ (from (car link)) (to (cadr link))
+ (link-status (gptel-markdown--validate-link link))
+ (ov (cl-loop for o in (overlays-in from to)
+ if (overlay-get o 'gptel-track-media)
+ return o)))
+ (if ov ; Ensure overlay over each link
+ (progn (move-overlay ov from to)
+ (setq link-ovs (delq ov link-ovs)))
+ (setq ov (make-overlay from to nil t))
+ (overlay-put ov 'gptel-track-media t)
+ (overlay-put ov 'evaporate t)
+ (overlay-put ov 'priority -80))
+ ;; Check if link will be sent, and annotate accordingly
+ (gptel--annotate-link ov link-status))))
+ (and link-ovs (mapc #'delete-overlay link-ovs))))
+ `(jit-lock-bounds ,beg . ,end)))
+
;;;###autoload
(define-minor-mode gptel-mode
"Minor mode for interacting with LLMs."