This is an automated email from the git hooks/post-receive script.

guix_mirror_bot pushed a commit to branch master
in repository guix.

The following commit(s) were added to refs/heads/master by this push:
     new 53ba4e4646 gnu: Add python-onnx-asr.
53ba4e4646 is described below

commit 53ba4e46467782fca0e0658ee07751f671025b5b
Author: Danny Milosavljevic <[email protected]>
AuthorDate: Sat Mar 7 04:12:00 2026 +0100

    gnu: Add python-onnx-asr.
    
    * gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch:
    New file.
    * gnu/local.mk (dist_patch_DATA): Add reference to it.
    * gnu/packages/speech.scm (%parakeet-hf-base): New variable.
    (parakeet-tdt-config): New variable.
    (parakeet-tdt-vocab): New variable.
    (parakeet-tdt-encoder-int8): New variable.
    (parakeet-tdt-decoder-joint-int8): New variable.
    (python-onnx-asr): New variable.
    
    Change-Id: Id103dfe51478b68d8332308fa30e12e5f16d1d80
---
 gnu/local.mk                                       |   1 +
 ...on-onnx-asr-0.10.2-bundled-parakeet-model.patch |  21 ++++
 gnu/packages/speech.scm                            | 129 +++++++++++++++++++++
 3 files changed, 151 insertions(+)

diff --git a/gnu/local.mk b/gnu/local.mk
index beff7a9300..6267e56038 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -2101,6 +2101,7 @@ dist_patch_DATA =                                         
\
   %D%/packages/patches/python-msal-requests.patch      \
   %D%/packages/patches/python-norns-nose.patch                 \
   %D%/packages/patches/python-numpy-gcc-14.patch               \
+  %D%/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch     
\
   %D%/packages/patches/python-random2-getrandbits-test.patch           \
   %D%/packages/patches/python-pillow-use-zlib-1.3.patch        \
   %D%/packages/patches/python-pydocstyle-add-support-for-pep701.patch  \
diff --git 
a/gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch 
b/gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch
new file mode 100644
index 0000000000..b6516e3c49
--- /dev/null
+++ b/gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch
@@ -0,0 +1,21 @@
+From: Danny Milosavljevic <[email protected]>
+Date: 2026-02-13
+Subject: Use bundled Parakeet TDT V3 int8 model instead of downloading from
+ HuggingFace.
+
+The @PARAKEET_MODEL_DIR@ placeholder is replaced with the actual store path
+during the build.
+
+--- a/src/onnx_asr/loader.py
++++ b/src/onnx_asr/loader.py
+@@ -308,6 +308,10 @@
+         case "nemo-parakeet-tdt-0.6b-v3":
+             model_type = NemoConformerTdt
+             default_repo_id = "istupakov/parakeet-tdt-0.6b-v3-onnx"
++            if path is None:
++                path = "@PARAKEET_MODEL_DIR@"
++            if quantization is None:
++                quantization = "int8"
+         case "nemo-conformer-aed":
+             model_type = NemoConformerAED
+         case "nemo-canary-1b-v2":
diff --git a/gnu/packages/speech.scm b/gnu/packages/speech.scm
index e771a29520..38858a6e68 100644
--- a/gnu/packages/speech.scm
+++ b/gnu/packages/speech.scm
@@ -824,3 +824,132 @@ intermediate representation.")
     (description "This package enables developers to author ONNX models
 using a Python-based domain-specific language.")
     (license license:expat)))
+
+;;; Parakeet TDT V3 ONNX model weights from istupakov/parakeet-tdt-0.6b-v3-onnx
+;;; (HuggingFace, revision abd2878d52a678ce380088ef9d9b1d9664404565).
+;;; License: CC-BY-4.0 (NVIDIA).
+;;; Int8 quantized variant (~670 MB total).
+
+(define %parakeet-hf-base
+  
"https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx/resolve/abd2878d52a678ce380088ef9d9b1d9664404565";)
+
+(define parakeet-tdt-config
+  (origin
+    (method url-fetch)
+    (uri (string-append %parakeet-hf-base "/config.json"))
+    (file-name "config.json")
+    (sha256
+     (base32 "0rn4i8ad5h1vga6yq04qpy6qmc30rpvd9bqhqbrcm64pdg3h6sb6"))))
+
+(define parakeet-tdt-vocab
+  (origin
+    (method url-fetch)
+    (uri (string-append %parakeet-hf-base "/vocab.txt"))
+    (file-name "vocab.txt")
+    (sha256
+     (base32 "0pf3wcvps76wq7iadw37lk7xcjs7gpmlbxficg2nmg54krkl91fm"))))
+
+(define parakeet-tdt-encoder-int8
+  (origin
+    (method url-fetch)
+    (uri (string-append %parakeet-hf-base "/encoder-model.int8.onnx"))
+    (file-name "encoder-model.int8.onnx")
+    (sha256
+     (base32 "02gzb82y86vl7jr69bn7qyfbifpd4nbi9ivpnabn020vgvxd4fb1"))))
+
+(define parakeet-tdt-decoder-joint-int8
+  (origin
+    (method url-fetch)
+    (uri (string-append %parakeet-hf-base "/decoder_joint-model.int8.onnx"))
+    (file-name "decoder_joint-model.int8.onnx")
+    (sha256
+     (base32 "0w3scrvqj74xv6h2f8c1k2q9234nwf1yvj7dv9sh78yiwcz4i9zf"))))
+
+(define-public python-onnx-asr
+  (package
+    (name "python-onnx-asr")
+    (version "0.10.2")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "onnx_asr" version))
+       (sha256
+        (base32 "0d5vmkavcqjf7b2aa0nc118b2pf34mc7yzjkaw92rl42rwwijf3h"))
+       (patches
+        (search-patches 
"python-onnx-asr-0.10.2-bundled-parakeet-model.patch"))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      #:test-flags
+      #~(list ;; These tests try to download models from HuggingFace Hub.
+              "--ignore=tests/onnx_asr/test_recognize.py"
+              "--ignore=tests/onnx_asr/test_cli.py"
+              "--ignore=tests/onnx_asr/test_load_model_errors.py"
+              ;;; These tests would compare preprocessor output against
+              ;;; reference implementations that are not in Guix:
+              ;;; - kaldi_native_fbank (C++ lib, not packaged)
+              ;;; - nemo (NVIDIA NeMo framework, not packaged)
+              ;;; - openai-whisper (Python package, not packaged;
+              ;;;   whisper-cpp exists but is C++ only, no Python module)
+              "--ignore=tests/preprocessors/test_kaldi.py"
+              "--ignore=tests/preprocessors/test_nemo.py"
+              "--ignore=tests/preprocessors/test_whisper_preprocessor.py")
+      #:phases
+      #~(modify-phases %standard-phases
+          (add-after 'install 'install-parakeet-model
+            (lambda* (#:key inputs outputs #:allow-other-keys)
+              (let* ((out (assoc-ref outputs "out"))
+                     (site (string-append out "/lib/python"
+                                          #$(version-major+minor
+                                             (package-version python))
+                                          "/site-packages/onnx_asr"))
+                     (model-dir (string-append site
+                                               "/models-data"
+                                               "/parakeet-tdt-0.6b-v3")))
+                (mkdir-p model-dir)
+                (symlink (assoc-ref inputs "config.json")
+                         (string-append model-dir "/config.json"))
+                (symlink (assoc-ref inputs "vocab.txt")
+                         (string-append model-dir "/vocab.txt"))
+                (symlink (assoc-ref inputs "encoder-model.int8.onnx")
+                         (string-append model-dir
+                                        "/encoder-model.int8.onnx"))
+                (symlink (assoc-ref inputs
+                            "decoder_joint-model.int8.onnx")
+                         (string-append model-dir
+                                          "/decoder_joint-model.int8.onnx")))))
+          (add-after 'install-parakeet-model 'patch-model-paths
+            (lambda* (#:key outputs #:allow-other-keys)
+              (let* ((out (assoc-ref outputs "out"))
+                     (site (string-append out "/lib/python"
+                                          #$(version-major+minor
+                                             (package-version python))
+                                          "/site-packages/onnx_asr"))
+                     (model-dir (string-append site
+                                               "/models-data"
+                                               "/parakeet-tdt-0.6b-v3")))
+                (substitute* (string-append site "/loader.py")
+                  (("@PARAKEET_MODEL_DIR@") model-dir))))))))
+    (propagated-inputs
+     (list python-numpy
+           python-huggingface-hub
+           (list onnxruntime "python")))
+    (native-inputs
+     (list nss-certs-for-test
+           onnx
+           parakeet-tdt-config
+           parakeet-tdt-vocab
+           parakeet-tdt-encoder-int8
+           parakeet-tdt-decoder-joint-int8
+           python-hatchling
+           python-onnxscript
+           python-pytorch
+           python-pytest
+           python-torchaudio))
+    (home-page "https://github.com/istupakov/onnx-asr";)
+    (synopsis "Speech recognition using ONNX models")
+    (description
+     "ONNX ASR is a Python library for automatic speech recognition using
+ONNX Runtime.  It supports models including Whisper and NeMo Parakeet.
+Includes bundled Parakeet TDT V3 model weights (int8, CC-BY-4.0, NVIDIA).")
+    (license license:expat)))

Reply via email to