guix_mirror_bot pushed a commit to branch master
in repository guix.
commit 9f6f7fa0cb4e1036f10054c51e4803acf5220b8c
Author: Nguyễn Gia Phong <[email protected]>
AuthorDate: Mon Jan 12 13:21:22 2026 +0900
gnu: Add python-sacremoses.
* gnu/packages/language.scm (python-sacremoses): New variable.
Change-Id: I7fd661f312c0cda107bf7eb79b99ea3f1eba3386
---
gnu/packages/language.scm | 48 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/gnu/packages/language.scm b/gnu/packages/language.scm
index d448d2ffc8..21e0aaa770 100644
--- a/gnu/packages/language.scm
+++ b/gnu/packages/language.scm
@@ -13,6 +13,7 @@
;;; Copyright © 2024 Nicolas Graves <[email protected]>
;;; Copyright © 2024, 2025 Zheng Junjie <[email protected]>
;;; Copyright © 2025 Janneke Nieuwenhuizen <[email protected]>
+;;; Copyright © 2026 Nguyễn Gia Phong <[email protected]>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -57,6 +58,8 @@
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages pulseaudio)
#:use-module (gnu packages python)
+ #:use-module (gnu packages python-build)
+ #:use-module (gnu packages python-xyz)
#:use-module (gnu packages perl-check)
#:use-module (gnu packages qt)
#:use-module (gnu packages ruby)
@@ -77,6 +80,7 @@
#:use-module (guix build-system glib-or-gtk)
#:use-module (guix build-system gnu)
#:use-module (guix build-system perl)
+ #:use-module (guix build-system pyproject)
#:use-module (guix build-system qt)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix download)
@@ -1059,6 +1063,50 @@ from the database are used as entries (heading terms).")
;; triple-licensed (at the user’s choice)
(license (list license:gpl2+ license:lgpl2.1 license:bsd-3))))
+(define-public python-sacremoses
+ (package
+ (name "python-sacremoses")
+ (version "0.1.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/hplt-project/sacremoses")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0g70vchfniknp65n4wnx7chg6g49d4xrz1wagv7f7ir2swdzyn9b"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (let ((norvig-big-txt
+ (origin
+ (method url-fetch)
+ ;; The file is a concatenation of public domain book excerpts
+ ;; from Project Gutenberg and lists of most frequent words
+ ;; from Wiktionary and the British National Corpus:
+ ;; https://norvig.com/spell-correct.html
+ (uri "https://norvig.com/big.txt")
+ (sha256
+ (base32
+ "0yz80icdly7na03cfpl0nfk5h3j3cam55rj486n03wph81ynq1ps")))))
+ (list #:phases #~(modify-phases %standard-phases
+ (add-before 'check 'supply-big-txt
+ (lambda _
+ (symlink #$norvig-big-txt "big.txt"))))
+ #:test-backend #~'unittest
+ #:test-flags #~'("discover" "-s" "sacremoses/test"))))
+ (native-inputs (list python-setuptools))
+ (propagated-inputs (list python-click
+ python-joblib
+ python-regex
+ python-tqdm))
+ (home-page "https://github.com/hplt-project/sacremoses")
+ (synopsis "Natural language tokenizer, truecaser and normalizer")
+ (description
+ "SacreMoses is a Python port of Moses'
+tokenizer, detokenizer, truecaser and punctuation normalizer.")
+ (license license:expat)))
+
(define-public dparser
(package
(name "dparser")