guix_mirror_bot pushed a commit to branch master
in repository guix.
commit a2f5c702f17586932458c3f0321f527f346f9761
Author: Nguyễn Gia Phong <[email protected]>
AuthorDate: Mon Jan 12 14:40:26 2026 +0900
gnu: Add python-stanza.
* gnu/packages/machine-learning.scm (python-stanza): New variable.
Change-Id: I86369771db647c85d7a204ff8069d6e3670bb58b
---
gnu/packages/machine-learning.scm | 67 +++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/gnu/packages/machine-learning.scm
b/gnu/packages/machine-learning.scm
index a260c523b8..8481afc0e4 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -33,6 +33,7 @@
;;; Copyright © 2025 Cayetano Santos <[email protected]>
;;; Copyright © 2025 Janneke Nieuwenhuizen <[email protected]>
;;; Copyright © 2025 Romain Garbage <[email protected]>
+;;; Copyright © 2026 Nguyễn Gia Phong <[email protected]>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -1867,6 +1868,72 @@ transformers like BERT, as well as a production-ready
training system and easy
model packaging, deployment and workflow management.")
(license license:expat)))
+(define-public python-stanza
+ (package
+ (name "python-stanza")
+ (version "1.10.1")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/stanfordnlp/stanza")
+ (commit (string-append "v" version))))
+ (sha256
+ (base32 "0zcpzmbv0aafircl12m3x5999hxpg2hzm1xxv97pz09y4v589snj"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-before 'check 'set-up-check
+ (lambda _
+ ;; Cherry pick from stanza/tests/setup.py,
+ ;; which downloads many datasets
+ (mkdir-p "stanza_test/out")
+ (copy-file "stanza/tests/data/example_french.json"
+ "stanza_test/out/example_french.json"))))
+ ;; tests: 288 passed, 361 deselected, 1 warning
+ #:test-flags
+ #~(list
+ "-k"
+ (string-join
+ '("not CoreNLP" "EnglishPipeline" "FrenchPipeline"
+ "SentimentPipeline" "TestTrainer"
+ "amt_annotator" "arabic_pos" "bert"
+ "charlm" "conllu" "convert_units"
+ "data_objects" "defaultdict_config"
+ "depparse" "dictionary" "download"
+ "ensemble" "example" "finetune" "install"
+ "langid" "lemmatizer" "long_paragraph" "long_tokens"
+ "model" "morphology" "multilingual" "mwt"
+ "pipeline_" "pretrain" "process_doc"
+ "read_snippets" "register" "reload"
+ "requirements" "resources" "retag"
+ "score" "semgrex" "serialized" "server_" "ssurgeon"
+ "tagger" "test_core" "test_one_sentence" "test_tokenizer"
+ "text_processing" "tokenize_files" "tokensregex"
+ "train_pipeline" "training" "tsurgeon")
+ " and not ") ;exclude tests requiring datasets
+ "stanza/tests")))
+ (native-inputs (list python-pytest
+ python-setuptools
+ python-transformers))
+ (propagated-inputs (list python-emoji
+ python-networkx
+ python-numpy
+ python-protobuf
+ python-pytorch
+ python-requests
+ python-tqdm))
+ (home-page "https://stanfordnlp.github.io/stanza/")
+ (synopsis "Stanford NLP Python library for many human languages")
+ (description
+ "Stanza is a collection of accurate and efficient tools
+for the linguistic analysis of many human languages. Starting from raw text,
+Stanza divides it into sentences and words, and then can recognize
+parts of speech and entities, do syntactic analysis, and more.")
+ (license license:asl2.0)))
+
(define-public onnx
(package
(name "onnx")