branch: elpa/subed
commit 664e7c00156e285805c445aa0c2bfcee6183c711
Author: Sacha Chua <[email protected]>
Commit: Sacha Chua <[email protected]>

    subed-word-data: parse file, add/remove word timestamps in region
    
    * subed/subed-word-data.el (subed-word-data-parse-file):
    New function.
    (subed-word-data-load-from-file): Use subed-word-data-parse-file.
    (subed-word-data-add-word-timestamps): Handle regions.
    (subed-word-data-remove-word-timestamps): Handle regions.
---
 subed/subed-word-data.el | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/subed/subed-word-data.el b/subed/subed-word-data.el
index 29f07cfaee..adb86abfb9 100644
--- a/subed/subed-word-data.el
+++ b/subed/subed-word-data.el
@@ -187,6 +187,18 @@ Supports WhisperX JSON, YouTube VTT, and Youtube SRV2 
files."
     (subed-word-data-refresh-text-properties)
     data))
 
+(defun subed-word-data-parse-file (file &optional offset-ms)
+  "Parse FILE for word data.
+Return a list of ((start . ?), (end . ?) (text . ?)).
+If OFFSET-MS is provided, add it to the times."
+  (let ((data (pcase (file-name-extension file)
+                ("json" (subed-word-data--extract-words-from-whisperx-json 
file))
+                ("srv2" (subed-word-data--extract-words-from-srv2 
(xml-parse-file file)))
+                ("vtt" (subed-word-data--extract-words-from-youtube-vtt file))
+                ("TextGrid" (subed-word-data--extract-words-from-textgrid 
file)))))
+    (when offset-ms (setq data (subed-word-data-adjust-times data offset-ms)))
+    data))
+
 ;;;###autoload
 (defun subed-word-data-load-from-file (file &optional offset)
   "Load word-level timing from FILE.
@@ -203,13 +215,7 @@ Supports WhisperX JSON, YouTube VTT, Youtube SRV2, and 
TextGrid files."
                                             f))))
                      (when current-prefix-arg
                        (read-string "Start offset: "))))
-  (let ((data (pcase (file-name-extension file)
-                ("json" (subed-word-data--extract-words-from-whisperx-json 
file))
-                ("srv2" (subed-word-data--extract-words-from-srv2 
(xml-parse-file file)))
-                ("vtt" (subed-word-data--extract-words-from-youtube-vtt file))
-                ("TextGrid" (subed-word-data--extract-words-from-textgrid 
file)))))
-    (when offset (setq data (subed-word-data-adjust-times data offset)))
-    (subed-word-data--load data)))
+  (subed-word-data--load (subed-word-data-parse-file file offset)))
 
 (defun subed-word-data-load-from-string (string)
   "Load word-level timing from STRING.
@@ -367,14 +373,17 @@ Return non-nil if they are the same after normalization."
           (subed-word-data--add-word-properties (point) pos candidate)
           (setq word-data try-list))))))
 
-(defun subed-word-data-add-word-timestamps ()
+(defun subed-word-data-add-word-timestamps (&optional beg end)
   "Add word timestamps.
 It uses the text properties to determine the start of each word.
 This only works for VTTs."
-  (interactive)
+  (interactive (if (region-active-p)
+                   (list (region-beginning)
+                         (region-end))
+                 (list (point-min) (point-max))))
   (save-excursion
-    (subed-word-data-remove-word-timestamps)
-    (subed-for-each-subtitle (point-min) (point-max) t
+    (subed-word-data-remove-word-timestamps beg end)
+    (subed-for-each-subtitle beg end t
       (let ((start (save-excursion (subed-jump-to-subtitle-text) (point))))
         (subed-jump-to-subtitle-end)
         (while (> (point) start)
@@ -383,11 +392,14 @@ This only works for VTTs."
             (save-excursion
               (insert (format "<%s>" (subed-msecs-to-timestamp 
(get-text-property (point) 'subed-word-data-start)))))))))))
 
-(defun subed-word-data-remove-word-timestamps ()
+(defun subed-word-data-remove-word-timestamps (&optional beg end)
   "Remove all word timestamps."
-  (interactive)
-  (goto-char (point-min))
-  (while (re-search-forward "<[0-9]+:[0-9]+:[0-9]+\\.[0-9]+>" nil t)
+  (interactive (if (region-active-p)
+                   (list (region-beginning)
+                         (region-end))
+                 (list (point-min) (point-max))))
+  (goto-char (or end (point-max)))
+  (while (re-search-backward "<[0-9]+:[0-9]+:[0-9]+\\.[0-9]+>" beg t)
     (replace-match "")))
 
 (defun subed-word-data-refresh-region (beg end)
@@ -542,6 +554,8 @@ distance is expressed as a ratio of number of edits / 
maximum length of phrase o
 ;; (subed-word-data-find-approximate-match "Go into the room." (split-string 
"Go in to the room. There you will" " "))
 ;; (subed-word-data-find-approximate-match "The quick brown fox jumps over the 
lazy dog" (split-string "The quick, oops, the quick brown fox jumps over the 
lazy dog and goes all sorts of places" " ") "\\<oops\\>")
 ;; (subed-word-data-find-approximate-match "I already talk pretty quickly," 
(split-string "I already talk pretty quickly. Oops. I already talk pretty 
quickly, so I'm not going" " ") "\\<oops\\>")
+;; (subed-word-data-find-approximate-match "The quick brown fox" (split-string 
"Well, let's get started. The quick, I mean, the quick brown fox jumps over the 
lazy dog." " ") "\\<oops\\>") ; hmm, the processing of this one could be 
improved.
+
 (defun subed-word-data-fix-subtitle-timing (beg end)
   "Sets subtitle starts and stops based on the word data.
 Assumes words haven't been edited."

Reply via email to