branch: elpa/gptel
commit dac4bc04417531dee49b33e8c2ff30816217292e
Author: Karthik Chikmagalur <[email protected]>
Commit: Karthik Chikmagalur <[email protected]>

    gptel-request: Add markdown link validation and annotation
    
    Add link annotation functions for gptel-mode in Markdown
    buffers.  This brings gptel's Markdown buffer support on par with
    Org, but neither is plugged into gptel-mode yet.
    
    * gptel-request.el (gptel-markdown--link-regex): Regex covering
    recognized Markdown link syntax, extracted from a function into a
    constant.
    
    (gptel-markdown-validate-link): New function (defsubst) to enforce
    a single path (narrow waist) for all Markdown link checks in gptel
    buffers.
    
    (gptel--parse-media-links): Use `gptel-markdown-validate-link'.
    
    (gptel--link-standalone-p): Change calling convention to accept a
    Markdown link instead of link bounds.  Using the bounds is indeed
    a better way to do it, but it doesn't fit the API of
    `gptel-markdown-validate-link'.
    
    * gptel.el (gptel-markdown--annotate-links): New function to
    annotate links that will be sent.  Not yet hooked up to jit-lock.
---
 gptel-request.el | 137 ++++++++++++++++++++++++++++++++++++++-----------------
 gptel.el         |  30 ++++++++++++
 2 files changed, 126 insertions(+), 41 deletions(-)

diff --git a/gptel-request.el b/gptel-request.el
index 4295aaa7f6c..8a239927751 100644
--- a/gptel-request.el
+++ b/gptel-request.el
@@ -51,6 +51,7 @@
 (require 'text-property-search)
 (require 'cl-generic)
 (require 'map)
+(require 'mailcap)                    ;FIXME Avoid this somehow
 
 (declare-function json-read "json" ())
 (defvar json-object-type)
@@ -740,6 +741,28 @@ buffer-locally, or let-bind it around calls to gptel 
queries, or via
 gptel presets."
   :type '(repeat string))
 
+(defcustom gptel-markdown-validate-link #'gptel--link-standalone-p
+  "Validate links to be sent as context with gptel queries.
+
+When `gptel-track-media' is enabled, this option determines if a
+supported link will be followed and its source included with gptel
+queries from Markdown buffers.  Currently only links to files are
+supported (along with web URLs if the model supports them).
+
+It should be a function that accepts a Markdown link and return non-nil
+if the link should be followed.  See `markdown-link-at-pos' for the
+structure of a Markdown link object.
+
+By default, links are considered valid if they are placed on a line by
+themselves, separated from surrounding text.  This is to ensure that
+links to be sent are intentionally placed.  You can set it to the
+function `always' to try to send all links."
+  :type '(choice
+          (const :tag "Standalone links" gptel--link-standalone-p)
+          (const :tag "All links" always)
+          (function :tag "Function"))
+  :group 'gptel)
+
 (defvar gptel--request-alist nil
   "Alist of active gptel requests.
 Each entry has the form (PROCESS . (FSM ABORT-CLOSURE))
@@ -785,6 +808,16 @@ See `gptel-backend'."
 file path to a cons cell of the form (t . binaryp), where binaryp is
 non-nil if the file is binary-encoded.")
 
+;; The following is derived from:
+;;
+;; (concat "\\(?:" markdown-regex-link-inline "\\|" markdown-regex-angle-uri 
"\\)")
+;;
+;; Since we want this known at compile time, when markdown-mode is not
+;; guaranteed to be available, we have to hardcode it.
+(defconst gptel-markdown--link-regex
+  
"\\(?:\\(?1:!\\)?\\(?2:\\[\\)\\(?3:\\^?\\(?:\\\\\\]\\|[^]]\\)*\\|\\)\\(?4:\\]\\)\\(?5:(\\)\\s-*\\(?6:[^)]*?\\)\\(?:\\s-+\\(?7:\"[^\"]*\"\\)\\)?\\s-*\\(?8:)\\)\\|\\(<\\)\\([a-z][a-z0-9.+-]\\{1,31\\}:[^]
    \n
<>,;()]+\\)\\(>\\)\\)"
+  "Link regex for gptel-mode in Markdown mode.")
+
 
 ;;; Utility functions
 
@@ -952,18 +985,19 @@ Return nil if string collapses to empty string."
     (unless (string-empty-p trimmed)
       trimmed)))
 
-(defsubst gptel--link-standalone-p (beg end)
-  "Return non-nil if positions BEG and END are isolated.
+(defun gptel--link-standalone-p (link)
+  "Return non-nil if Markdown LINK is isolated.
 
-This means the extent from BEG to END is the only non-whitespace
-content on this line."
-  (save-excursion
-    (and (= beg (progn (goto-char beg) (beginning-of-line)
-                       (skip-chars-forward "\t ")
-                       (point)))
-         (= end (progn (goto-char end) (end-of-line)
-                       (skip-chars-backward "\t ")
-                       (point))))))
+This means the extent from the link beginning to end is the only
+non-whitespace content on its line."
+  (let ((beg (car link)) (end (cadr link)))
+    (save-excursion
+      (and (= beg (progn (goto-char beg) (beginning-of-line)
+                         (skip-chars-forward "\t ")
+                         (point)))
+           (= end (progn (goto-char end) (end-of-line)
+                         (skip-chars-backward "\t ")
+                         (point)))))))
 
 (defsubst gptel--curl-path ()
   "Curl executable to use."
@@ -2199,11 +2233,37 @@ or
   (list `(:text ,(buffer-substring-no-properties
                   beg end))))
 
-(defvar markdown-regex-link-inline)
-(defvar markdown-regex-angle-uri)
 (declare-function markdown-link-at-pos "markdown-mode")
 (declare-function mailcap-file-name-to-mime-type "mailcap")
 
+(defsubst gptel-markdown--validate-link (link)
+  "Validate a Markdown LINK as sendable under the current gptel settings.
+
+Return a form (validp link-type path . REST), where REST is a list
+explaining why sending the link is not supported by gptel.  Only the
+first nil value in REST is guaranteed to be correct."
+  (let ((mime))
+    (if-let* ((path (nth 3 link))
+              (prefix (or (string-search "://" path) 0))
+              (type (if (= prefix 0) "file" (substring path 0 prefix)))
+              (path (if (= prefix 0) path (substring path (+ prefix 3))))
+              (filep (member type `("file" ,@(and (gptel--model-capable-p 'url)
+                                                  '("http" "https" "ftp")))))
+              (placementp (funcall gptel-markdown-validate-link link))
+              (readablep (or (member type '("http" "https" "ftp"))
+                             (file-remote-p path)
+                             (file-readable-p path)))
+              (supportedp
+               (or (not (cdr (with-memoization
+                                 (alist-get (expand-file-name path)
+                                            gptel--link-type-cache
+                                            nil nil #'string=)
+                               (cons t (gptel--file-binary-p path)))))
+                   (gptel--model-mime-capable-p
+                    (setq mime (mailcap-file-name-to-mime-type path))))))
+        (list t type path filep placementp readablep supportedp mime)
+      (list nil type path filep placementp readablep supportedp mime))))
+
 (cl-defmethod gptel--parse-media-links ((_mode (eql 'markdown-mode)) beg end)
   "Parse text and actionable links between BEG and END.
 
@@ -2212,39 +2272,34 @@ Return a list of the form
   (:media \"/path/to/media.png\" :mime \"image/png\")
   (:text \"More text\"))
 for inclusion into the user prompt for the gptel request."
-  (require 'mailcap)                    ;FIXME Avoid this somehow
-  (let ((parts) (from-pt) (mime))
+  (let ((parts) (from-pt))
     (save-excursion
       (setq from-pt (goto-char beg))
-      (while (re-search-forward
-              (concat "\\(?:" markdown-regex-link-inline "\\|"
-                      markdown-regex-angle-uri "\\)")
-              end t)
-        (setq mime nil)
-        (when-let* ((link-at-pt (markdown-link-at-pos (point)))
-                    ((gptel--link-standalone-p
-                      (car link-at-pt) (cadr link-at-pt)))
-                    (path (nth 3 link-at-pt))
-                    (path (string-remove-prefix "file://" path)))
-          (cond
-           ((seq-some (lambda (p) (string-prefix-p p path))
-                      '("https:" "http:" "ftp:"))
-            ;; Collect text up to this image, and collect this image url
-            (when (gptel--model-capable-p 'url) ; FIXME This is not a good 
place
-                                        ; to check for url capability!
+      (while (re-search-forward gptel-markdown--link-regex end t)
+        (let* ((link-at-pt (markdown-link-at-pos (point)))
+               (link-status (gptel-markdown--validate-link link-at-pt)))
+          (cl-destructuring-bind
+              (valid type path filep placementp readablep supportedp mime)
+              link-status
+            (cond
+             ((and valid (member type '("http" "https" "ftp")))
+              ;; Collect text up to this image, and collect this image url
               (let ((text (buffer-substring-no-properties from-pt (car 
link-at-pt))))
                 (unless (string-blank-p text) (push (list :text text) parts))
                 (push (list :url path :mime mime) parts)
-                (setq from-pt (cadr link-at-pt)))))
-           ((file-readable-p path)
-            (if (or (not (gptel--file-binary-p path))
-                    (and (setq mime (mailcap-file-name-to-mime-type path))
-                         (gptel--model-mime-capable-p mime)))
-                ;; Collect text up to this image, and collect this image
-                (let ((text (buffer-substring-no-properties from-pt (car 
link-at-pt))))
-                  (unless (string-blank-p text) (push (list :text text) parts))
-                  (push (if mime (list :media path :mime mime) (list :textfile 
path)) parts)
-                  (setq from-pt (cadr link-at-pt)))
+                (setq from-pt (cadr link-at-pt))))
+             (valid   ; Collect text up to this link, and collect this link 
data
+              (let ((text (buffer-substring-no-properties from-pt (car 
link-at-pt))))
+                (unless (string-blank-p text) (push (list :text text) parts))
+                (push (if mime (list :media path :mime mime) (list :textfile 
path)) parts)
+                (setq from-pt (cadr link-at-pt))))
+             ((not filep)
+              (message "Link source not followed for unsupported link type 
\"%s\"." type))
+             ((not placementp)
+              (message "Ignoring non-standalone link \"%s\"." path))
+             ((not readablep)
+              (message "Ignoring inaccessible file \"%s\"." path))
+             ((not supportedp)
               (message "Ignoring unsupported binary file \"%s\"." path)))))))
     (unless (= from-pt end)
       (push (list :text (buffer-substring-no-properties from-pt end)) parts))
diff --git a/gptel.el b/gptel.el
index d9d55a67755..1bd296eb9c4 100644
--- a/gptel.el
+++ b/gptel.el
@@ -647,6 +647,36 @@ which see for BEG, END and PRE."
          (add-text-properties
           beg end `(gptel ,val front-sticky (gptel))))))
 
+(defun gptel-markdown--annotate-links (beg end)
+  "Annotate Markdown links whose sources are eligible to be sent with 
`gptel-send.'
+
+Search between BEG and END."
+  (when gptel-track-media
+    (save-excursion
+      (goto-char beg) (forward-line -1)
+      (let ((link-ovs (cl-loop for o in (overlays-in (point) end)
+                               if (overlay-get o 'gptel-track-media)
+                               collect o into os finally return os)))
+        (while (re-search-forward gptel-markdown--link-regex end t)
+          (unless (gptel--in-response-p (1- (point)))
+            (let* ((link (markdown-link-at-pos (point)))
+                   (from (car link)) (to (cadr link))
+                   (link-status (gptel-markdown--validate-link link))
+                   (ov (cl-loop for o in (overlays-in from to)
+                                if (overlay-get o 'gptel-track-media)
+                                return o)))
+              (if ov                    ; Ensure overlay over each link
+                  (progn (move-overlay ov from to)
+                         (setq link-ovs (delq ov link-ovs)))
+                (setq ov (make-overlay from to nil t))
+                (overlay-put ov 'gptel-track-media t)
+                (overlay-put ov 'evaporate t)
+                (overlay-put ov 'priority -80))
+              ;; Check if link will be sent, and annotate accordingly
+              (gptel--annotate-link ov link-status))))
+        (and link-ovs (mapc #'delete-overlay link-ovs))))
+    `(jit-lock-bounds ,beg . ,end)))
+
 ;;;###autoload
 (define-minor-mode gptel-mode
   "Minor mode for interacting with LLMs."

Reply via email to