diff options
| author | Danny Milosavljevic <dannym@friendly-machines.com> | 2026-03-07 04:12:00 +0100 |
|---|---|---|
| committer | Danny Milosavljevic <dannym@friendly-machines.com> | 2026-03-08 10:40:25 +0100 |
| commit | 53ba4e46467782fca0e0658ee07751f671025b5b (patch) | |
| tree | 291a51fb43955f2cab7cbb56347effa7d4e4978b | |
| parent | b47c9993bc1146050fa6ab83f24c1a7a0baf864f (diff) | |
gnu: Add python-onnx-asr.
* gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch:
New file.
* gnu/local.mk (dist_patch_DATA): Add reference to it.
* gnu/packages/speech.scm (%parakeet-hf-base): New variable.
(parakeet-tdt-config): New variable.
(parakeet-tdt-vocab): New variable.
(parakeet-tdt-encoder-int8): New variable.
(parakeet-tdt-decoder-joint-int8): New variable.
(python-onnx-asr): New variable.
Change-Id: Id103dfe51478b68d8332308fa30e12e5f16d1d80
| -rw-r--r-- | gnu/local.mk | 1 | ||||
| -rw-r--r-- | gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch | 21 | ||||
| -rw-r--r-- | gnu/packages/speech.scm | 129 |
3 files changed, 151 insertions, 0 deletions
diff --git a/gnu/local.mk b/gnu/local.mk index beff7a93001..6267e560387 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -2101,6 +2101,7 @@ dist_patch_DATA = \ %D%/packages/patches/python-msal-requests.patch \ %D%/packages/patches/python-norns-nose.patch \ %D%/packages/patches/python-numpy-gcc-14.patch \ + %D%/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch \ %D%/packages/patches/python-random2-getrandbits-test.patch \ %D%/packages/patches/python-pillow-use-zlib-1.3.patch \ %D%/packages/patches/python-pydocstyle-add-support-for-pep701.patch \ diff --git a/gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch b/gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch new file mode 100644 index 00000000000..b6516e3c49f --- /dev/null +++ b/gnu/packages/patches/python-onnx-asr-0.10.2-bundled-parakeet-model.patch @@ -0,0 +1,21 @@ +From: Danny Milosavljevic <dannym@friendly-machines.com> +Date: 2026-02-13 +Subject: Use bundled Parakeet TDT V3 int8 model instead of downloading from + HuggingFace. + +The @PARAKEET_MODEL_DIR@ placeholder is replaced with the actual store path +during the build. + +--- a/src/onnx_asr/loader.py ++++ b/src/onnx_asr/loader.py +@@ -308,6 +308,10 @@ + case "nemo-parakeet-tdt-0.6b-v3": + model_type = NemoConformerTdt + default_repo_id = "istupakov/parakeet-tdt-0.6b-v3-onnx" ++ if path is None: ++ path = "@PARAKEET_MODEL_DIR@" ++ if quantization is None: ++ quantization = "int8" + case "nemo-conformer-aed": + model_type = NemoConformerAED + case "nemo-canary-1b-v2": diff --git a/gnu/packages/speech.scm b/gnu/packages/speech.scm index e771a29520b..38858a6e684 100644 --- a/gnu/packages/speech.scm +++ b/gnu/packages/speech.scm @@ -824,3 +824,132 @@ intermediate representation.") (description "This package enables developers to author ONNX models using a Python-based domain-specific language.") (license license:expat))) + +;;; Parakeet TDT V3 ONNX model weights from istupakov/parakeet-tdt-0.6b-v3-onnx +;;; (HuggingFace, revision abd2878d52a678ce380088ef9d9b1d9664404565). +;;; License: CC-BY-4.0 (NVIDIA). +;;; Int8 quantized variant (~670 MB total). + +(define %parakeet-hf-base + "https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx/resolve/abd2878d52a678ce380088ef9d9b1d9664404565") + +(define parakeet-tdt-config + (origin + (method url-fetch) + (uri (string-append %parakeet-hf-base "/config.json")) + (file-name "config.json") + (sha256 + (base32 "0rn4i8ad5h1vga6yq04qpy6qmc30rpvd9bqhqbrcm64pdg3h6sb6")))) + +(define parakeet-tdt-vocab + (origin + (method url-fetch) + (uri (string-append %parakeet-hf-base "/vocab.txt")) + (file-name "vocab.txt") + (sha256 + (base32 "0pf3wcvps76wq7iadw37lk7xcjs7gpmlbxficg2nmg54krkl91fm")))) + +(define parakeet-tdt-encoder-int8 + (origin + (method url-fetch) + (uri (string-append %parakeet-hf-base "/encoder-model.int8.onnx")) + (file-name "encoder-model.int8.onnx") + (sha256 + (base32 "02gzb82y86vl7jr69bn7qyfbifpd4nbi9ivpnabn020vgvxd4fb1")))) + +(define parakeet-tdt-decoder-joint-int8 + (origin + (method url-fetch) + (uri (string-append %parakeet-hf-base "/decoder_joint-model.int8.onnx")) + (file-name "decoder_joint-model.int8.onnx") + (sha256 + (base32 "0w3scrvqj74xv6h2f8c1k2q9234nwf1yvj7dv9sh78yiwcz4i9zf")))) + +(define-public python-onnx-asr + (package + (name "python-onnx-asr") + (version "0.10.2") + (source + (origin + (method url-fetch) + (uri (pypi-uri "onnx_asr" version)) + (sha256 + (base32 "0d5vmkavcqjf7b2aa0nc118b2pf34mc7yzjkaw92rl42rwwijf3h")) + (patches + (search-patches "python-onnx-asr-0.10.2-bundled-parakeet-model.patch")))) + (build-system pyproject-build-system) + (arguments + (list + #:test-flags + #~(list ;; These tests try to download models from HuggingFace Hub. + "--ignore=tests/onnx_asr/test_recognize.py" + "--ignore=tests/onnx_asr/test_cli.py" + "--ignore=tests/onnx_asr/test_load_model_errors.py" + ;;; These tests would compare preprocessor output against + ;;; reference implementations that are not in Guix: + ;;; - kaldi_native_fbank (C++ lib, not packaged) + ;;; - nemo (NVIDIA NeMo framework, not packaged) + ;;; - openai-whisper (Python package, not packaged; + ;;; whisper-cpp exists but is C++ only, no Python module) + "--ignore=tests/preprocessors/test_kaldi.py" + "--ignore=tests/preprocessors/test_nemo.py" + "--ignore=tests/preprocessors/test_whisper_preprocessor.py") + #:phases + #~(modify-phases %standard-phases + (add-after 'install 'install-parakeet-model + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (site (string-append out "/lib/python" + #$(version-major+minor + (package-version python)) + "/site-packages/onnx_asr")) + (model-dir (string-append site + "/models-data" + "/parakeet-tdt-0.6b-v3"))) + (mkdir-p model-dir) + (symlink (assoc-ref inputs "config.json") + (string-append model-dir "/config.json")) + (symlink (assoc-ref inputs "vocab.txt") + (string-append model-dir "/vocab.txt")) + (symlink (assoc-ref inputs "encoder-model.int8.onnx") + (string-append model-dir + "/encoder-model.int8.onnx")) + (symlink (assoc-ref inputs + "decoder_joint-model.int8.onnx") + (string-append model-dir + "/decoder_joint-model.int8.onnx"))))) + (add-after 'install-parakeet-model 'patch-model-paths + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (site (string-append out "/lib/python" + #$(version-major+minor + (package-version python)) + "/site-packages/onnx_asr")) + (model-dir (string-append site + "/models-data" + "/parakeet-tdt-0.6b-v3"))) + (substitute* (string-append site "/loader.py") + (("@PARAKEET_MODEL_DIR@") model-dir)))))))) + (propagated-inputs + (list python-numpy + python-huggingface-hub + (list onnxruntime "python"))) + (native-inputs + (list nss-certs-for-test + onnx + parakeet-tdt-config + parakeet-tdt-vocab + parakeet-tdt-encoder-int8 + parakeet-tdt-decoder-joint-int8 + python-hatchling + python-onnxscript + python-pytorch + python-pytest + python-torchaudio)) + (home-page "https://github.com/istupakov/onnx-asr") + (synopsis "Speech recognition using ONNX models") + (description + "ONNX ASR is a Python library for automatic speech recognition using +ONNX Runtime. It supports models including Whisper and NeMo Parakeet. +Includes bundled Parakeet TDT V3 model weights (int8, CC-BY-4.0, NVIDIA).") + (license license:expat))) |
