Updated patchset for vsearch 1.4.1 released today.

ben

On 29/09/15 22:24, Ben Woodcroft wrote:
Excellent to see an open source competitor to usearch.

Thanks in advance for review as usual. I'm not especially adept at using gcc's flags so perhaps some attention is warranted in the second patch's snippet.

>From e1789cbcfcf7dc6f1657f53bad04fca7180400cc Mon Sep 17 00:00:00 2001
From: Ben Woodcroft <[email protected]>
Date: Tue, 29 Sep 2015 22:10:33 +1000
Subject: [PATCH 1/2] gnu: Add cityhash.

* gnu/packages/textutils.scm (cityhash): New variable.
---
 gnu/packages/textutils.scm | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/gnu/packages/textutils.scm b/gnu/packages/textutils.scm
index 95a8ad1..5204297 100644
--- a/gnu/packages/textutils.scm
+++ b/gnu/packages/textutils.scm
@@ -1,6 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <[email protected]>
 ;;; Copyright © 2015 Ricardo Wurmus <[email protected]>
+;;; Copyright © 2015 Ben Woodcroft <[email protected]>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -153,3 +154,26 @@ encoding, supporting Unicode version 7.0.")
      "libgtextutils is a text utilities library used by the fastx toolkit from
 the Hannon Lab.")
     (license license:agpl3+)))
+
+(define-public cityhash
+  (let ((commit "8af9b8c")
+        (revision "1"))
+    (package
+      (name "cityhash")
+      (version (string-append "1.1." revision "." commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/google/cityhash.git";)
+                      (commit commit)))
+                (file-name (string-append name "-" version ".tar.gz"))
+                (sha256
+                 (base32
+                  "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6"))))
+    (build-system gnu-build-system)
+    (home-page "https://github.com/google/cityhash";)
+    (synopsis "A family of functions for strings")
+    (description
+     "CityHash provides hash functions for strings.  The functions mix the
+input bits thoroughly but are not suitable for cryptography.")
+    (license license:expat))))
-- 
2.4.3

>From 5a403f43bafe129d8a22b1d0ea169c2e97d2fb5b Mon Sep 17 00:00:00 2001
From: Ben Woodcroft <[email protected]>
Date: Wed, 30 Sep 2015 18:35:32 +1000
Subject: [PATCH 2/2] gnu: Add vsearch.

* gnu/packages/bioinformatics.scm (vsearch): New variable.
---
 gnu/packages/bioinformatics.scm | 59 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 8fc6142..25f65ae 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -33,6 +33,7 @@
   #:use-module (guix build-system ruby)
   #:use-module (guix build-system trivial)
   #:use-module (gnu packages)
+  #:use-module (gnu packages autotools)
   #:use-module (gnu packages algebra)
   #:use-module (gnu packages base)
   #:use-module (gnu packages boost)
@@ -53,6 +54,7 @@
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages tbb)
   #:use-module (gnu packages textutils)
+  #:use-module (gnu packages tls)
   #:use-module (gnu packages vim)
   #:use-module (gnu packages web)
   #:use-module (gnu packages xml)
@@ -2709,6 +2711,63 @@ data in the form of VCF files.")
     ;; at http://vcftools.sourceforge.net/license.html
     (license license:lgpl3)))
 
+(define-public vsearch
+  (package
+    (name "vsearch")
+    (version "1.4.1")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "https://github.com/torognes/vsearch/archive/v";
+             version ".tar.gz"))
+       (file-name (string-append name "-" version ".tar.gz"))
+       (sha256
+        (base32
+         "0b1359wbzgb2cm04h7dq05v80vik88hnsv298xxd1q1f2q4ydni7"))
+       (modules '((guix build utils)))
+       (snippet
+        '(begin
+           ;; Remove bundled cityhash
+           (substitute* "src/Makefile.am"
+             (((string-append "^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash"
+                              " -O3 -mtune=native -Wall -Wsign-compare"))
+              (string-append "AM_CXXFLAGS=-lcityhash"
+                             " -O3 -mtune=native -Wall -Wsign-compare"))
+             (("^__top_builddir__bin_vsearch_SOURCES = cityhash/city.h \\\\")
+              "__top_builddir__bin_vsearch_SOURCES = \\")
+             (("^cityhash/config.h \\\\") "\\")
+             (("^cityhash/city.cc \\\\") "\\"))
+           (substitute* "src/vsearch.h"
+             (("^\\#include \"cityhash/city.h\"")
+              "#include <city.h>"))
+           (delete-file-recursively "src/cityhash")
+           #t))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-before 'configure 'autogen
+                     (lambda _ (zero? (system* "autoreconf" "-vif")))))))
+    (inputs
+     `(("zlib" ,zlib)
+       ("bzip2" ,bzip2)
+       ("cityhash" ,cityhash)))
+    (native-inputs
+     `(("autoconf" ,autoconf)
+       ("automake" ,automake)))
+    (synopsis "Sequence search tools for metagenomics")
+    (description
+     "VSEARCH supports DNA sequence searching, clustering, chimera detection,
+dereplication, pairwise alignment, shuffling, subsampling, sorting and
+masking.  The tool takes advantage of parallelism in the form of SIMD
+vectorization as well as multiple threads to perform accurate alignments at
+high speed.  VSEARCH uses an optimal global aligner (full dynamic programming
+Needleman-Wunsch).")
+    (home-page "https://github.com/torognes/vsearch";)
+    ;; dual licensed, plus some public domain source
+    (license (list license:gpl3 license:bsd-2 license:public-domain))))
+
 (define-public bio-locus
   (package
     (name "bio-locus")
-- 
2.4.3

Reply via email to