branch: elpa/subed
commit 88a64355523a3bc65790afc7fe6e852183df6df4
Author: Sacha Chua <[email protected]>
Commit: Sacha Chua <[email protected]>

    subed-align, subed-word-data: word-level timestamps
    
    You can now use Montreal Forced Aligner or Aeneas
    to generate word timestamps using
    subed-align-mfa-set-word-data or
    subed-align-aeneas-set-word-data.
    
    * subed/subed-align.el (subed-align-mfa-conda-env):
    New variable. I should probably make this a defcustom someday.
    (subed-align-mfa-command): New variable.
    (subed-align-mfa-dictionary): New variable.
    (subed-align-aeneas-set-word-data):
    New command to use Aeneas for word timestamps.
    (subed-align-mfa-process-environment): New.
    (subed-align-mfa-set-word-data): New command.
    
    * subed/subed-word-data.el 
(subed-word-data-compare-normalized-string-distance):
    New function for approximate matching.
    (subed-word-data-compare-normalized-string-distance-threshold):
    Use approximate matching by default.
    (subed-word-data-subtitle-entries):
    Use renamed fuzz ms.
    (subed-word-data-fuzz-ms): Renamed.
    (subed-word-data-refresh-text-properties-for-subtitle):
    Use fuzz variable.
    (subed-word-data-refresh-text-properties-for-subtitle):
    Use fuzz variable.
    (subed-word-data-refresh-text-properties): Use
    fuzz variable.
---
 NEWS.org                 |   5 ++
 subed/subed-align.el     | 129 +++++++++++++++++++++++++++++++++++++++++++++++
 subed/subed-word-data.el |  29 ++++++++---
 subed/subed.el           |   2 +-
 4 files changed, 156 insertions(+), 9 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index 7f4f17a631..130b91de4f 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -2,6 +2,11 @@
 
 * subed news
 
+** Version 1.4.1 - 2026-02-16 - Sacha Chua
+
+- You can now use Montreal Forced Aligner or Aeneas to generate word 
timestamps using ~subed-align-mfa-set-word-data~ or 
~subed-align-aeneas-set-word-data~.
+- subed-word-data now uses approximate matches by default.
+
 ** Version 1.4.0 - 2026-02-10 - Sacha Chua
 
 - subed-word-data.el can now load word timing from TextGrid files produced by 
the Montreal Forced Aligner.
diff --git a/subed/subed-align.el b/subed/subed-align.el
index f5eb8c8d90..b916f8904e 100644
--- a/subed/subed-align.el
+++ b/subed/subed-align.el
@@ -41,6 +41,14 @@
 Ex: 
task_adjust_boundary_nonspeech_min=0.500|task_adjust_boundary_nonspeech_string=REMOVE
 will remove silence and other non-speech spans.")
 
+(defvar subed-align-mfa-conda-env "~/miniconda/env/aligner"
+  "Set this to the path to your Conda environment.")
+(defvar subed-align-mfa-command '("mfa" "align")
+  "Command to run the Montreal Forced Aligner.")
+(defvar subed-align-mfa-dictionary "french_mfa")
+(defvar subed-align-mfa-acoustic-model "french_mfa")
+
+
 ;;;###autoload
 (defun subed-align-region (audio-file beg end)
   "Align just the given section."
@@ -164,6 +172,127 @@ Return the new filename."
         (find-file new-file))
       new-file)))
 
+;; TODO Scope it to the region
+(defun subed-align-aeneas-set-word-data (audio-file)
+  "Align AUDIO-FILE with TEXT-FILE to get timestamps in FORMAT.
+Store the word data in `subed-word-data--cache' for use by subed-word-data.
+This uses the Aeneas forced aligner."
+  (interactive
+   (list
+    (or
+     (subed-media-file)
+     (subed-guess-media-file subed-audio-extensions)
+     (read-file-name "Audio file: "))))
+  (unless (derived-mode-p 'subed-mode)
+    (error "Must be in `subed-mode' buffer."))
+  (let ((temp-input (make-temp-file "subed-align-input" nil ".txt"))
+        (temp-output (make-temp-file "subed-align-output" nil ".txt"))
+        (json-object-type 'alist)
+        (json-array-type 'list)
+        data)
+    (write-region
+     (if (derived-mode-p 'subed-mode)
+         (mapconcat
+          (lambda (o) (elt o 3)) (subed-subtitle-list)
+          "\n\n")
+       (buffer-string))
+     nil temp-input)
+    (apply
+     #'call-process
+     (car subed-align-command)
+     nil
+     (get-buffer-create "*subed-aeneas*")
+     t
+     (append (cdr subed-align-command)
+             (list (expand-file-name audio-file)
+                   temp-input
+                   (format 
"task_language=%s|is_text_type=mplain|os_task_file_levels=3|os_task_file_format=json%s"
+                           subed-align-language
+                           (if subed-align-options (concat "|" 
subed-align-options) ""))
+                   temp-output)))
+    (setq data (mapcar
+                (lambda (o)
+                  (let-alist o
+                    `((start . ,(* 1000 (string-to-number .begin)))
+                      (end . ,(* 1000 (string-to-number .end)))
+                      (text . ,(string-join .lines " ")))))
+                (alist-get 'fragments
+                           (with-temp-buffer
+                             (insert-file-contents temp-output)
+                             (goto-char (point-min))
+                             (json-read)))))
+    (subed-word-data--load data)
+    (delete-file temp-input)
+    (delete-file temp-output)
+    data))
+
+(defun subed-align-mfa-process-environment ()
+  "Return process-environment with conda env activated."
+  (let* ((env-bin (expand-file-name "bin" subed-align-mfa-conda-env))
+         (env-lib (expand-file-name "lib" subed-align-mfa-conda-env))
+         (path-var (concat env-bin ":" (getenv "PATH")))
+         (ld-library-path (concat env-lib ":" (or (getenv "LD_LIBRARY_PATH") 
""))))
+    (list (concat "PATH=" path-var)
+          (concat "LD_LIBRARY_PATH=" ld-library-path)
+          (concat "CONDA_PREFIX=" (expand-file-name subed-align-mfa-conda-env))
+          (concat "CONDA_DEFAULT_ENV=aligner"))))
+
+;;;###autoload
+(defun subed-align-mfa-set-word-data (audio-file)
+  "Set the word data using Montreal Forced Aligner."
+  (interactive
+   (list
+    (or
+     (subed-media-file)
+     (subed-guess-media-file subed-audio-extensions)
+     (read-file-name "Audio file: "))))
+  ;; MFA expects audio and text
+  (let* ((temp-input (make-temp-file "subed-align-mfa-input" t))
+         (temp-output (make-temp-file "subed-align-mfa-output" t))
+         (input-wav (expand-file-name "input.wav" temp-input)))
+    ;; Set up input.wav
+    (if (string= (downcase (file-name-extension audio-file)) "wav")
+        (copy-file audio-file input-wav)
+      (call-process
+       subed-ffmpeg-executable
+       nil (get-buffer-create "*mfa*") nil
+       "-i"
+       audio-file
+       "-ar"
+       "16000"
+       input-wav))
+    ;; Set up input.txt
+    (write-region
+     (if (derived-mode-p 'subed-mode)
+         (mapconcat
+          (lambda (o) (elt o 3)) (subed-subtitle-list)
+          "\n\n")
+       (buffer-string))
+     nil (expand-file-name "input.txt" temp-input))
+    (let ((process-environment (append
+                                (and subed-align-mfa-conda-env 
(subed-align-mfa-process-environment))
+                                process-environment)))
+      (apply #'call-process
+             (if subed-align-mfa-conda-env
+                 (expand-file-name
+                  (car subed-align-mfa-command)
+                  (expand-file-name
+                   "bin"
+                   subed-align-mfa-conda-env))
+               (car subed-align-mfa-command))
+             nil (get-buffer-create "*mfa*") nil
+             (append
+              (cdr subed-align-mfa-command)
+              (list
+               temp-input
+               subed-align-mfa-dictionary
+               subed-align-mfa-acoustic-model
+               temp-output))))
+    (subed-word-data-load-from-file (expand-file-name "input.TextGrid"
+                                                      temp-output))
+    (delete-directory temp-input t)
+    (delete-directory temp-output t)))
+
 (defun subed-align-reinsert-comments (subtitles)
   "Reinsert the comments from SUBTITLES.
 Assume that the subtitles are still in the same sequence."
diff --git a/subed/subed-word-data.el b/subed/subed-word-data.el
index 46268e1371..29f07cfaee 100644
--- a/subed/subed-word-data.el
+++ b/subed/subed-word-data.el
@@ -257,7 +257,19 @@ For now, only JSON or SRV2 files are supported."
   (string= (subed-word-data-normalize-word word1)
            (subed-word-data-normalize-word word2)))
 
-(defvar subed-word-data-compare-function 
'subed-word-data-compare-normalized-string=
+(defvar subed-word-data-compare-normalized-string-distance-threshold 0.2
+  "Factor used for similarity comparison.")
+
+(defun subed-word-data-compare-normalized-string-distance (word1 word2)
+  "Compare two words and return t if they are similar enough after 
normalization.
+See `subed-word-data-compare-normalized-string-distance-threshold'."
+  (let ((w1 (subed-word-data-normalize-word word1))
+        (w2 (subed-word-data-normalize-word word2)))
+    (< (/ (string-distance w1 w2)
+          (* 1.0 (max (length w1) (length w2))))
+       subed-word-data-compare-normalized-string-distance-threshold)))
+
+(defvar subed-word-data-compare-function 
'subed-word-data-compare-normalized-string-distance
   "Function to use to compare.")
 
 (defun subed-word-data-compare (word1 word2)
@@ -306,10 +318,10 @@ Return non-nil if they are the same after normalization."
     (let ((time (assoc-default 'start (subed-word-data--look-up-word))))
       (when time (- time subed-subtitle-spacing))))))
 
-(defun subed-word-data-subtitle-entries (&optional fuzz-factor)
+(defun subed-word-data-subtitle-entries ()
   "Return the entries that start and end within the current subtitle."
-  (let ((start (- (subed-subtitle-msecs-start) (or fuzz-factor 
subed-subtitle-spacing)))
-        (stop (+ (subed-subtitle-msecs-stop) (or fuzz-factor 
subed-subtitle-spacing))))
+  (let ((start (- (subed-subtitle-msecs-start) subed-word-data-fuzz-ms))
+        (stop (+ (subed-subtitle-msecs-stop) subed-word-data-fuzz-ms)))
     (seq-filter
      (lambda (o)
        (and (<= (or (alist-get 'end o) most-positive-fixnum) stop)
@@ -319,7 +331,7 @@ Return non-nil if they are the same after normalization."
 
 (defvar subed-word-data-threshold 5
   "Number of words to consider for matching.")
-(defvar subed-word-data-fuzz-factor 200
+(defvar subed-word-data-fuzz-ms 500
   "Milliseconds to consider before or after a subtitle.")
 
 (defun subed-word-data-refresh-text-properties-for-subtitle ()
@@ -329,7 +341,7 @@ Return non-nil if they are the same after normalization."
                           '(subed-word-data-start subed-word-data-end 
font-lock-face))
   (let* ((text-start (progn (subed-jump-to-subtitle-text) (point)))
          pos
-         (word-data (reverse (subed-word-data-subtitle-entries 
subed-word-data-fuzz-factor)))
+         (word-data (reverse (subed-word-data-subtitle-entries)))
          candidate
          cand-count)
     (subed-jump-to-subtitle-end)
@@ -338,12 +350,13 @@ Return non-nil if they are the same after normalization."
       (setq pos (point))
       (backward-word)
       (let ((try-list word-data)
+            (current-word (buffer-substring (point) pos))
             candidate)
         (setq candidate (car try-list) cand-count 0)
         (setq try-list (cdr try-list))
         (while (and candidate
                     (< cand-count subed-word-data-threshold)
-                    (not (subed-word-data-compare (buffer-substring (point) 
pos)
+                    (not (subed-word-data-compare current-word
                                    (alist-get 'text candidate))))
           (setq candidate (car try-list) cand-count (1+ cand-count))
           (when (> cand-count subed-word-data-threshold)
@@ -415,7 +428,7 @@ This only works for VTTs."
       (while (not (eobp))
         (let* ((text-start (progn (subed-jump-to-subtitle-text) (point)))
                pos
-               (word-data (reverse (subed-word-data-subtitle-entries 
subed-word-data-fuzz-factor)))
+               (word-data (reverse (subed-word-data-subtitle-entries)))
                candidate)
           (subed-jump-to-subtitle-end)
           (while (> (point) text-start)
diff --git a/subed/subed.el b/subed/subed.el
index 087d32c59f..721bc6a2df 100644
--- a/subed/subed.el
+++ b/subed/subed.el
@@ -1,6 +1,6 @@
 ;;; subed.el --- A major mode for editing subtitles  -*- lexical-binding: t; 
-*-
 
-;; Version: 1.4.0
+;; Version: 1.4.1
 ;; Maintainer: Sacha Chua <[email protected]>
 ;; Author: Random User
 ;; Keywords: convenience, files, hypermedia, multimedia

Reply via email to