diff options
| author | Nguyễn Gia Phong <cnx@loang.net> | 2026-01-12 14:40:26 +0900 |
|---|---|---|
| committer | Cayetano Santos <csantosb@inventati.org> | 2026-01-31 09:54:48 +0100 |
| commit | a2f5c702f17586932458c3f0321f527f346f9761 (patch) | |
| tree | 02db76e549a58ebfd7eb9f8d11338d55dd490f62 /gnu | |
| parent | 9f6f7fa0cb4e1036f10054c51e4803acf5220b8c (diff) | |
gnu: Add python-stanza.
* gnu/packages/machine-learning.scm (python-stanza): New variable.
Change-Id: I86369771db647c85d7a204ff8069d6e3670bb58b
Diffstat (limited to 'gnu')
| -rw-r--r-- | gnu/packages/machine-learning.scm | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index a260c523b80..8481afc0e49 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -33,6 +33,7 @@ ;;; Copyright © 2025 Cayetano Santos <csantosb@inventati.org> ;;; Copyright © 2025 Janneke Nieuwenhuizen <janneke@gnu.org> ;;; Copyright © 2025 Romain Garbage <romain.garbage@inria.fr> +;;; Copyright © 2026 Nguyễn Gia Phong <cnx@loang.net> ;;; ;;; This file is part of GNU Guix. ;;; @@ -1867,6 +1868,72 @@ transformers like BERT, as well as a production-ready training system and easy model packaging, deployment and workflow management.") (license license:expat))) +(define-public python-stanza + (package + (name "python-stanza") + (version "1.10.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/stanfordnlp/stanza") + (commit (string-append "v" version)))) + (sha256 + (base32 "0zcpzmbv0aafircl12m3x5999hxpg2hzm1xxv97pz09y4v589snj")))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-before 'check 'set-up-check + (lambda _ + ;; Cherry pick from stanza/tests/setup.py, + ;; which downloads many datasets + (mkdir-p "stanza_test/out") + (copy-file "stanza/tests/data/example_french.json" + "stanza_test/out/example_french.json")))) + ;; tests: 288 passed, 361 deselected, 1 warning + #:test-flags + #~(list + "-k" + (string-join + '("not CoreNLP" "EnglishPipeline" "FrenchPipeline" + "SentimentPipeline" "TestTrainer" + "amt_annotator" "arabic_pos" "bert" + "charlm" "conllu" "convert_units" + "data_objects" "defaultdict_config" + "depparse" "dictionary" "download" + "ensemble" "example" "finetune" "install" + "langid" "lemmatizer" "long_paragraph" "long_tokens" + "model" "morphology" "multilingual" "mwt" + "pipeline_" "pretrain" "process_doc" + "read_snippets" "register" "reload" + "requirements" "resources" "retag" + "score" "semgrex" "serialized" "server_" "ssurgeon" + "tagger" "test_core" "test_one_sentence" "test_tokenizer" + "text_processing" "tokenize_files" "tokensregex" + "train_pipeline" "training" "tsurgeon") + " and not ") ;exclude tests requiring datasets + "stanza/tests"))) + (native-inputs (list python-pytest + python-setuptools + python-transformers)) + (propagated-inputs (list python-emoji + python-networkx + python-numpy + python-protobuf + python-pytorch + python-requests + python-tqdm)) + (home-page "https://stanfordnlp.github.io/stanza/") + (synopsis "Stanford NLP Python library for many human languages") + (description + "Stanza is a collection of accurate and efficient tools +for the linguistic analysis of many human languages. Starting from raw text, +Stanza divides it into sentences and words, and then can recognize +parts of speech and entities, do syntactic analysis, and more.") + (license license:asl2.0))) + (define-public onnx (package (name "onnx") |
