This is an automated email from the git hooks/post-receive script. plessy pushed a commit to branch master in repository jellyfish.
commit 79fce41c3c8bbb8ad1d96e02774bc61d0272a8a7 Author: Charles Plessy <[email protected]> Date: Wed Jun 17 18:05:43 2015 +0900 Imported Upstream version 2.2.0 --- .gitignore | 1 - Makefile.am | 16 +- README.md | 31 ++- configure.ac | 59 +++- examples/jf_count_dump/Makefile | 7 + examples/jf_count_dump/README | 52 ++++ examples/jf_count_dump/jf_count_dump.cc | 80 ++++++ examples/swig/README.md | 1 + examples/swig/dump.pl | 8 + examples/swig/dump.py | 8 + examples/swig/dump.rb | 9 + examples/swig/query.pl | 8 + examples/swig/query.py | 9 + examples/swig/query.rb | 9 + include/jellyfish/bloom_filter.hpp | 1 + include/jellyfish/generator_manager.hpp | 2 +- include/jellyfish/large_hash_array.hpp | 2 +- include/jellyfish/mer_overlap_sequence_parser.hpp | 29 +- include/jellyfish/rectangular_binary_matrix.hpp | 4 +- include/jellyfish/simple_circular_buffer.hpp | 1 + include/jellyfish/whole_sequence_parser.hpp | 15 +- jellyfish.spec.in | 83 ++++++ lib/generator_manager.cc | 11 +- m4/.gitignore | 3 +- m4/m4-ax_perl_ext.m4 | 135 +++++++++ m4/m4-ax_pkg_swig.m4 | 135 +++++++++ m4/m4-ax_python_devel.m4 | 324 ++++++++++++++++++++++ m4/m4-ax_ruby_ext.m4 | 102 +++++++ m4/m4-ax_swig_enable_cxx.m4 | 53 ++++ m4/m4-ax_swig_python.m4 | 64 +++++ sub_commands/jellyfish.cc | 4 + swig/Makefile.am | 68 +++++ swig/Readme.md | 30 +- swig/perl5/t/test_mer_file.t | 12 +- swig/python/__init__.py | 1 + swig/python/test_mer_file.py | 10 +- swig/ruby/test_mer_file.rb | 10 +- swig/string_mers.i | 6 + tests/compat.sh.in | 8 + tests/swig_perl.sh | 17 ++ tests/swig_python.sh | 17 ++ tests/swig_ruby.sh | 19 ++ unit_tests/test_mer_overlap_sequence_parser.cc | 12 + unit_tests/test_whole_sequence_parser.cc | 7 + 44 files changed, 1411 insertions(+), 72 deletions(-) diff --git a/.gitignore b/.gitignore index d065b04..ba20c38 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,3 @@ config.h.in~ config.sub ltmain.sh *_cmdline.hpp -m4 diff --git a/Makefile.am b/Makefile.am index 78a94af..40c3740 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,7 @@ +SUBDIRS = . $(MAYBE_SWIG) ACLOCAL_AMFLAGS = -I m4 -EXTRA_DIST = doc/jellyfish.pdf doc/jellyfish.man README LICENSE HalfLICENSE + +EXTRA_DIST = doc/jellyfish.pdf doc/jellyfish.man README LICENSE # jellyfish.spec man1_MANS = doc/jellyfish.man pkgconfigdir = $(libdir)/pkgconfig @@ -20,7 +22,7 @@ DISTCLEANFILES = $(BUILT_SOURCES) # Yaggo automatic rules with silencing V_YAGGO = $(V_YAGGO_$(V)) V_YAGGO_ = $(V_YAGGO_$(AM_DEFAULT_VERBOSITY)) -V_YAGGO_0 = @echo " YAGGO " $@; +V_YAGGO_0 = @echo " YAGGO " $@; .yaggo.hpp: $(V_YAGGO)$(YAGGO) --license $(srcdir)/header-license -o $@ $< @@ -158,6 +160,16 @@ tests/min_qual.log: tests/generate_fastq_sequence.log tests/large_key.log: tests/generate_sequence.log tests/quality_filter.log: tests/generate_sequence.log +# SWIG tests +TESTS += tests/swig_python.sh tests/swig_ruby.sh tests/swig_perl.sh +tests/swig_python.log: tests/generate_sequence.log +tests/swig_ruby.log: tests/generate_sequence.log +tests/swig_perl.log: tests/generate_sequence.log +EXTRA_DIST += swig/python/test_mer_file.py swig/python/test_hash_counter.py +EXTRA_DIST += swig/ruby/test_mer_file.rb swig/ruby/test_hash_counter.rb +EXTRA_DIST += swig/perl5/t/test_mer_file.t swig/perl5/t/test_hash_counter.t + + ############## # Unit tests # ############## diff --git a/README.md b/README.md index bc5bb1c..33fb499 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Overview Jellyfish is a tool for fast, memory-efficient counting of k-mers in DNA. A k-mer is a substring of length k, and counting the occurrences of all such substrings is a central step in many analyses of DNA sequence. Jellyfish can count k-mers using an order of magnitude less memory and an order of magnitude faster than other k-mer counting packages by using an efficient encoding of a hash table and by exploiting the "compare-and-swap" CPU instruction to increase parallelism. -JELLYFISH is a command-line program that reads FASTA and multi-FASTA files containing DNA sequences. It outputs its k-mer counts in an binary format, which can be translated into a human-readable text format using the "jellyfish dump" command. See the documentation below for more details. +JELLYFISH is a command-line program that reads FASTA and multi-FASTA files containing DNA sequences. It outputs its k-mer counts in a binary format, which can be translated into a human-readable text format using the "jellyfish dump" command, or queried for specific k-mers with "jellyfish query". See the UserGuide provided on [Jellyfish's home page][1] for more details. If you use Jellyfish in your research, please cite: @@ -15,7 +15,7 @@ If you use Jellyfish in your research, please cite: Installation ------------ -To get packaged tar ball of the source code, see the [home page of Jellyfish at the University of Maryland](http://www.genome.umd.edu/jellyfish.html "University of Maryland website"). +To get an easier to compiled packaged tar ball of the source code, see the [home page of Jellyfish at the University of Maryland][1]. To compile from the git tree, you will need autoconf/automake, make, g++ 4.4 or newer and [yaggo](https://github.com/gmarcais/yaggo "Yaggo on github"). Then compile with: @@ -29,4 +29,29 @@ sudo make install Extra / Examples ---------------- -In the examples directory are potentially useful extra programs to query/manipulates output files from Jellyfish. The examples are not compiled by default. Each subdirectory of examples is independent and is compiled with a simple invocation of 'make'. +In the examples directory are potentially useful extra programs to query/manipulates output files of Jellyfish, using the shared library of Jellyfish in C++ or with scripting languages. The examples are not compiled by default. Each subdirectory of examples is independent and is compiled with a simple invocation of 'make'. + + +Binding to script languages +--------------------------- + +Bindings to Ruby, Python and Perl are provided. This binding allows to read the output file of Jellyfish directly in a scripting language. Compilation of the bindings is easier from the tarball provided on [Jellyfish's home page][1]. + +Compilation of the bindings from the git tree requires [SWIG](http://swig.org) version 3, and the development files of the scripting languages. To compile all three bindings, configure with: + +```Shell +./configure --enable-swig --enable-ruby-binding --enable-python-binding --enable-perl-binding +``` + +Note that the headers of older version of Perl 5 do not compile with recent compilers (g++ > 4.4, clang++) and C++11 mode enable. One may have to specify in addition `CXX=g++4.4` to compile the perl binding. + +The binding can installed in a different location than the default (which may require root privileges for example) by passing a path to the `--enable` switches. Then, for Python, Ruby or Perl to find the binding, an environment variable may need to be adjusted (`PYTHONPATH`, `RUBYLIB` and `PERL5LIB` respectively). For example: + +```Shell +./configure --prefix=$HOME --enable-swig --enable-python-binding=$HOME/lib/python +export PYTHONPATH=$HOME/lib/python +``` + +See the `swig` directory for examples on how to use the bindings. + +[1]: http://www.genome.umd.edu/jellyfish.html "Genome group at University of Maryland" diff --git a/configure.ac b/configure.ac index 9ddcef5..71f7e4b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([jellyfish], [2.1.5], [[email protected]]) +AC_INIT([jellyfish], [2.2.0], [[email protected]]) AC_CANONICAL_HOST AC_CONFIG_MACRO_DIR([m4]) AM_INIT_AUTOMAKE([subdir-objects foreign parallel-tests color-tests]) @@ -43,13 +43,6 @@ AS_IF([test "x$with_sse" != xno], [AC_DEFINE([HAVE_SSE], [1], [Define if you have SSE])], []) -AC_ARG_WITH([half], - [AS_HELP_STRING([--with-half], [enable half float (16 bits)])], - [], [with_half=no]) -AS_IF([test "x$with_half" = "xyes"], - [AC_DEFINE([HALF_FLOATS], [1], [Define if you use half floats for qmer counting])], - []) - # Use valgrind to check memory allocation with mmap AC_ARG_ENABLE([valgrind], [AS_HELP_STRING([--enable-valgrind], [Instrument mmap memory allocation with valgrind])]) @@ -95,6 +88,56 @@ STATIC_FLAGS= AS_IF([test x$enable_all_static = xyes], [AC_SUBST([STATIC_FLAGS], [-all-static])]) +# +# SWIG and bindings +# +maybe_swig= +# --enable-python-binding +AC_ARG_ENABLE([python-binding], + [AC_HELP_STRING([--enable-python-binding@<:@=PATH@:>@], [create SWIG python module and install in PATH])]) +# --enable-ruby-binding +AC_ARG_ENABLE([ruby-binding], + [AC_HELP_STRING([--enable-ruby-binding@<:@=PATH@:>@], [create SWIG ruby module and install in PATH])]) +# --enable-perl-binding +AC_ARG_ENABLE([perl-binding], + [AC_HELP_STRING([--enable-perl-binding@<:@=PATH@:>@], [create SWIG perl module and install in PATH])]) + +# --enable-swig +AC_ARG_ENABLE([swig], + [AC_HELP_STRING([--enable-swig], [enable development of swig binding])]) +AS_IF([test x$enable_swig = xyes], + [AX_PKG_SWIG([3.0.0], [], [AC_MSG_ERROR([SWIG version 3 is required])])]) +AS_IF([test -n "$SWIG"], + [SWIG_ENABLE_CXX] + [maybe_swig=swig]) +AM_CONDITIONAL([HAVE_SWIG], [test -n "$SWIG"]) + +# Python binding setup +AM_CONDITIONAL(PYTHON_BINDING, [test -n "$enable_python_binding" -a x$enable_python_binding != xno]) +AM_COND_IF([PYTHON_BINDING], + [AS_IF([test x$enable_python_binding != xyes], [PYTHON_SITE_PKG=$enable_python_binding])] + [AX_PYTHON_DEVEL] + [maybe_swig=swig]) + +# Ruby binding setup +AM_CONDITIONAL([RUBY_BINDING], [test -n "$enable_ruby_binding" -a x$enable_ruby_binding != xno]) +AM_COND_IF([RUBY_BINDING], + [AS_IF([test x$enable_ruby_binding != xyes], [RUBY_EXT_LIB=$enable_ruby_binding])] + [AX_RUBY_EXT] + [maybe_swig=swig]) + +# Perl binding setup +AM_CONDITIONAL([PERL_BINDING], [test -n "$enable_perl_binding" -a x$enable_perl_binding != xno]) +AM_COND_IF([PERL_BINDING], + [AS_IF([test x$enable_perl_binding != xyes], [PERL_EXT_LIB=$enable_perl_binding])] + [AX_PERL_EXT] + [maybe_swig=swig]) + +# Enable compilation of SWIG and bindings +AC_SUBST([MAYBE_SWIG], [$maybe_swig]) +AS_IF([test -n "$maybe_swig"], + [AC_CONFIG_FILES([swig/Makefile])]) + AC_OUTPUT diff --git a/examples/jf_count_dump/Makefile b/examples/jf_count_dump/Makefile new file mode 100644 index 0000000..2880b13 --- /dev/null +++ b/examples/jf_count_dump/Makefile @@ -0,0 +1,7 @@ +CC = g++ +CXXFLAGS = $(shell pkg-config --cflags jellyfish-2.0) -std=c++0x -Wall -O3 +LDFLAGS = $(shell pkg-config --libs jellyfish-2.0) -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g') + +all: jf_count_dump +clean: + rm -f *.o jf_count_dump diff --git a/examples/jf_count_dump/README b/examples/jf_count_dump/README new file mode 100644 index 0000000..3aafc81 --- /dev/null +++ b/examples/jf_count_dump/README @@ -0,0 +1,52 @@ +What is it? +=========== + +This program is an example on how to count k-mers with Jellyfish and +then use the resulting hash of mers. The program counts all the k-mers +in the files passed on the command line. Then it generate some random +k-mer and displays its count in the hash (by default, k=25 and with +good likely hood this value will be zero). Then, it dumps the content +of the entire database on stdout. + +It is equivalent to using 'jellyfish count', 'jellyfish query' and +'jellyfish dump'. + +Some details +============ + +Many of the parameters that are switches to 'jellyfish count' are hard +coded constant in this program. They are: + + jellyfish::mer_dna::k(25); + +The length k of the mers. Here set to 25. + + const uint64_t hash_size = 10000000; + +The initial size of the hash. Here set to 10 million. Ideally, this +will be set close to the actual number of mers that will be inserted +in the hash, so as to limit the number of size doubling of the hash. + + const uint32_t num_reprobes = 126; + +The maximum number of reprobes in the hash. The larger this number, +the higher the load of the hash table before a size doubling is +necessary, at the expense of some computation time. + + const uint32_t num_threads = 16; + +Number of threads used for parsing the input files and counting the mers. + + const uint32_t counter_len = 7; + +Minimum length of the counting field. Mers that have a count that does +not fit in this field will take up the space of 2 mers in the hash +table. + + const bool canonical = true; + +A mer and its reverse complement are considered to be one and the same +mer. The canonical representation is whichever of a mer or its reverse +complement that comes first in lexicographic order. This is usually +set when counting mers in sequencing reads. + diff --git a/examples/jf_count_dump/jf_count_dump.cc b/examples/jf_count_dump/jf_count_dump.cc new file mode 100644 index 0000000..54a8f5a --- /dev/null +++ b/examples/jf_count_dump/jf_count_dump.cc @@ -0,0 +1,80 @@ +#include <iostream> + +#include <jellyfish/mer_dna.hpp> +#include <jellyfish/thread_exec.hpp> +#include <jellyfish/hash_counter.hpp> +#include <jellyfish/stream_manager.hpp> +#include <jellyfish/mer_overlap_sequence_parser.hpp> +#include <jellyfish/mer_iterator.hpp> + +typedef jellyfish::cooperative::hash_counter<jellyfish::mer_dna> mer_hash_type; +typedef jellyfish::mer_overlap_sequence_parser<jellyfish::stream_manager<char**>> sequence_parser_type; +typedef jellyfish::mer_iterator<sequence_parser_type, jellyfish::mer_dna> mer_iterator_type; + + +class mer_counter : public jellyfish::thread_exec { + mer_hash_type& mer_hash_; + jellyfish::stream_manager<char**> streams_; + sequence_parser_type parser_; + const bool canonical_; + +public: + mer_counter(int nb_threads, mer_hash_type& mer_hash, + char** file_begin, char** file_end, + bool canonical) + : mer_hash_(mer_hash) + , streams_(file_begin, file_end) + , parser_(jellyfish::mer_dna::k(), streams_.nb_streams(), 3 * nb_threads, 4096, streams_) + , canonical_(canonical) + { } + + virtual void start(int thid) { + mer_iterator_type mers(parser_, canonical_); + + for( ; mers; ++mers) + mer_hash_.add(*mers, 1); + mer_hash_.done(); + } +}; + + +int main(int argc, char *argv[]) { + // Parameters that are hard coded. Most likely some of those should + // be switches to this program. + jellyfish::mer_dna::k(25); // Set length of mers (k=25) + const uint64_t hash_size = 10000000; // Initial size of hash. + const uint32_t num_reprobes = 126; + const uint32_t num_threads = 16; // Number of concurrent threads + const uint32_t counter_len = 7; // Minimum length of counting field + const bool canonical = true; // Use canonical representation + + // create the hash + mer_hash_type mer_hash(hash_size, jellyfish::mer_dna::k()*2, counter_len, num_threads, num_reprobes); + + + // count the kmers + mer_counter counter(num_threads, mer_hash, argv + 1, argv + argc, canonical); + counter.exec_join(num_threads); + + const auto jf_ary = mer_hash.ary(); + + // Display value for some random k-mer + uint64_t val = 0; + jellyfish::mer_dna random_mer; + random_mer.randomize(); + random_mer.canonicalize(); + if(jf_ary->get_val_for_key(random_mer, &val)) { + std::cout << random_mer << ' ' << val << '\n'; + } else { + std::cout << random_mer << " not present in hash\n"; + } + + // Dump all the k-mers on stdout + const auto end = jf_ary->end(); + for(auto it = jf_ary->begin(); it != end; ++it) { + auto& key_val = *it; + std::cout << key_val.first << ' ' << key_val.second << '\n'; + } + + return 0; +} diff --git a/examples/swig/README.md b/examples/swig/README.md new file mode 100644 index 0000000..cc05610 --- /dev/null +++ b/examples/swig/README.md @@ -0,0 +1 @@ +Simple examples on how to implement (simplified versions of) 'jellyfish dump' and 'jellyfish query' in Python, Ruby and Perl. diff --git a/examples/swig/dump.pl b/examples/swig/dump.pl new file mode 100644 index 0000000..b4443a7 --- /dev/null +++ b/examples/swig/dump.pl @@ -0,0 +1,8 @@ +#! /usr/bin/env perl + +use jellyfish; + +my $mf = jellyfish::ReadMerFile->new($ARGV[0]); +while($mf->next_mer) { + print($mf->mer, " ", $mf->count, "\n"); +} diff --git a/examples/swig/dump.py b/examples/swig/dump.py new file mode 100644 index 0000000..d03335f --- /dev/null +++ b/examples/swig/dump.py @@ -0,0 +1,8 @@ +#! /usr/bin/env python + +import jellyfish +import sys + +mf = jellyfish.ReadMerFile(sys.argv[1]) +for mer, count in mf: + print("%s %d" % (mer, count)) diff --git a/examples/swig/dump.rb b/examples/swig/dump.rb new file mode 100644 index 0000000..0fb3849 --- /dev/null +++ b/examples/swig/dump.rb @@ -0,0 +1,9 @@ +#! /usr/bin/env ruby + +require 'jellyfish' + +mf = Jellyfish::ReadMerFile.new(ARGV[0]) +mf.each { |mer, count| + print(mer, " ", count, "\n") +} + diff --git a/examples/swig/query.pl b/examples/swig/query.pl new file mode 100644 index 0000000..1b65d7c --- /dev/null +++ b/examples/swig/query.pl @@ -0,0 +1,8 @@ +#! /usr/bin/env perl + +use jellyfish; + +my $qf = jellyfish::QueryMerFile->new(shift @ARGV); +for my $s (@ARGV) { + print($s, " ", $qf->get(jellyfish::MerDNA->new($s)), "\n"); +} diff --git a/examples/swig/query.py b/examples/swig/query.py new file mode 100644 index 0000000..070a5ed --- /dev/null +++ b/examples/swig/query.py @@ -0,0 +1,9 @@ +#! /usr/bin/env python + +import jellyfish +import sys + +qf = jellyfish.QueryMerFile(sys.argv[1]) +for str in sys.argv[2:]: + print("%s %d" % (str, qf[jellyfish.MerDNA(str)])) + diff --git a/examples/swig/query.rb b/examples/swig/query.rb new file mode 100644 index 0000000..f4bcf8e --- /dev/null +++ b/examples/swig/query.rb @@ -0,0 +1,9 @@ +#! /usr/bin/env ruby + +require 'jellyfish' + +qf = Jellyfish::QueryMerFile.new(ARGV[0]) +ARGV[1..-1].each { |s| + print(s, " ", qf[Jellyfish::MerDNA.new(s)], "\n") +} + diff --git a/include/jellyfish/bloom_filter.hpp b/include/jellyfish/bloom_filter.hpp index 4d3d7c8..b3c2865 100644 --- a/include/jellyfish/bloom_filter.hpp +++ b/include/jellyfish/bloom_filter.hpp @@ -79,6 +79,7 @@ public: unsigned int check__(const uint64_t *hashes) const { // Prefetch memory locations + static_assert(std::is_pod<typename super::prefetch_info>::value, "prefetch_info must be a POD"); typename super::prefetch_info pinfo[super::k_]; const size_t base = super::d_.remainder(hashes[0]); const size_t inc = super::d_.remainder(hashes[1]); diff --git a/include/jellyfish/generator_manager.hpp b/include/jellyfish/generator_manager.hpp index ece2dd5..64cd0b0 100644 --- a/include/jellyfish/generator_manager.hpp +++ b/include/jellyfish/generator_manager.hpp @@ -153,7 +153,7 @@ private: void start_commands(); void start_one_command(const std::string& command, int pipe); bool display_status(int status, const std::string& command); - void setup_signal_handlers(); + int setup_signal_handlers(); void unset_signal_handlers(); static void signal_handler(int signal); void cleanup(); diff --git a/include/jellyfish/large_hash_array.hpp b/include/jellyfish/large_hash_array.hpp index 74f1000..2a49c9f 100644 --- a/include/jellyfish/large_hash_array.hpp +++ b/include/jellyfish/large_hash_array.hpp @@ -444,7 +444,7 @@ public: bool get_key_id(const key_type& key, size_t* id, key_type& tmp_key, const word** w, const offset_t** o, const size_t oid) const { // This static_assert makes clang++ happy static_assert(std::is_pod<prefetch_info>::value, "prefetch_info must be a POD"); - prefetch_info info_ary[prefetch_buffer::capacity()]; + prefetch_info info_ary[prefetch_buffer::capacityConstant]; prefetch_buffer buffer(info_ary); warm_up_cache(buffer, oid); diff --git a/include/jellyfish/mer_overlap_sequence_parser.hpp b/include/jellyfish/mer_overlap_sequence_parser.hpp index 7700a60..f5bd549 100644 --- a/include/jellyfish/mer_overlap_sequence_parser.hpp +++ b/include/jellyfish/mer_overlap_sequence_parser.hpp @@ -48,14 +48,16 @@ class mer_overlap_sequence_parser : public jellyfish::cooperative_pool2<mer_over stream_status() : seam(0), seq_len(0), have_seam(false), type(DONE_TYPE) { } }; - uint16_t mer_len_; - size_t buf_size_; - char* buffer; - char* seam_buffer; - locks::pthread::mutex streams_mutex; - char* data; - cpp_array<stream_status> streams_; - StreamIterator& streams_iterator_; + uint16_t mer_len_; + size_t buf_size_; + char* buffer; + char* seam_buffer; + locks::pthread::mutex streams_mutex; + char* data; + cpp_array<stream_status> streams_; + StreamIterator& streams_iterator_; + size_t files_read_; // nb of files read + size_t reads_read_; // nb of reads read public: /// Max_producers is the maximum number of concurrent threads than @@ -73,7 +75,8 @@ public: buffer(new char[size * buf_size]), seam_buffer(new char[max_producers * (mer_len - 1)]), streams_(max_producers), - streams_iterator_(streams) + streams_iterator_(streams), + files_read_(0), reads_read_(0) { for(sequence_ptr* it = super::element_begin(); it != super::element_end(); ++it) it->start = it->end = buffer + (it - super::element_begin()) * buf_size; @@ -114,6 +117,9 @@ public: return false; } + size_t nb_files() const { return files_read_; } + size_t nb_reads() const { return reads_read_; } + protected: bool open_next_file(stream_status& st) { // The stream must be released, with .reset(), before calling @@ -127,15 +133,18 @@ protected: return false; } + ++files_read_; switch(st.stream->peek()) { case EOF: return open_next_file(st); case '>': st.type = FASTA_TYPE; ignore_line(*st.stream); // Pass header + ++reads_read_; break; case '@': st.type = FASTQ_TYPE; ignore_line(*st.stream); // Pass header + ++reads_read_; break; default: throw std::runtime_error("Unsupported format"); // Better error management @@ -157,6 +166,7 @@ protected: if(st.stream->peek() == '>') { *(buff.start + read++) = 'N'; // Add N between reads ignore_line(*st.stream); // Skip to next sequence (skip headers, quals, ...) + ++reads_read_; } } buff.end = buff.start + read; @@ -184,6 +194,7 @@ protected: if(st.stream->good()) { *(buff.start + read++) = 'N'; // Add N between reads ignore_line(*st.stream); // Skip sequence header + ++reads_read_; } st.seq_len = 0; } diff --git a/include/jellyfish/rectangular_binary_matrix.hpp b/include/jellyfish/rectangular_binary_matrix.hpp index d545b65..75ab794 100644 --- a/include/jellyfish/rectangular_binary_matrix.hpp +++ b/include/jellyfish/rectangular_binary_matrix.hpp @@ -261,8 +261,8 @@ namespace jellyfish { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wuninitialized" #endif - register xmm_t acc = acc ^ acc; // Set acc to 0 - register xmm_t load = load ^ load; + xmm_t acc = acc ^ acc; // Set acc to 0 + xmm_t load = load ^ load; #ifdef __clang__ #pragma clang diagnostic pop #endif diff --git a/include/jellyfish/simple_circular_buffer.hpp b/include/jellyfish/simple_circular_buffer.hpp index 08afcc3..16cc2ba 100644 --- a/include/jellyfish/simple_circular_buffer.hpp +++ b/include/jellyfish/simple_circular_buffer.hpp @@ -106,6 +106,7 @@ namespace jellyfish { class pre_alloc : public base<T, pre_alloc<T, capa> > { typedef base<T, pre_alloc<T, capa> > super; public: + static const int capacityConstant = capa; explicit pre_alloc(T* data) : super(data) { } static int capacity() { return capa; } }; diff --git a/include/jellyfish/whole_sequence_parser.hpp b/include/jellyfish/whole_sequence_parser.hpp index 15b8984..2474e10 100644 --- a/include/jellyfish/whole_sequence_parser.hpp +++ b/include/jellyfish/whole_sequence_parser.hpp @@ -33,6 +33,9 @@ class whole_sequence_parser : public jellyfish::cooperative_pool2<whole_sequence }; cpp_array<stream_status> streams_; StreamIterator& streams_iterator_; + size_t files_read_; // nb of files read + size_t reads_read_; // nb of reads read + public: /// Size is the number of buffers to keep around. It should be @@ -43,7 +46,9 @@ public: uint32_t max_producers, StreamIterator& streams) : super(max_producers, size), streams_(max_producers), - streams_iterator_(streams) + streams_iterator_(streams), + files_read_(0), + reads_read_(0) { for(auto it = super::element_begin(); it != super::element_end(); ++it) { it->nb_filled = 0; @@ -77,6 +82,9 @@ public: return false; } + size_t nb_files() const { return files_read_; } + size_t nb_reads() const { return reads_read_; } + protected: void open_next_file(stream_status& st) { st.stream.reset(); @@ -86,6 +94,7 @@ protected: return; } + ++files_read_; // Update the type of the current file and move past first header // to beginning of sequence. switch(st.stream->peek()) { @@ -106,11 +115,12 @@ protected: const size_t data_size = buff.data.size(); for(nb_filled = 0; nb_filled < data_size && st.stream->peek() != EOF; ++nb_filled) { + ++reads_read_; header_sequence_qual& fill_buff = buff.data[nb_filled]; st.stream->get(); // Skip '>' std::getline(*st.stream, fill_buff.header); fill_buff.seq.clear(); - while(st.stream->peek() != '>' && st.stream->peek() != EOF) { + for(int c = st.stream->peek(); c != '>' && c != EOF; c = st.stream->peek()) { std::getline(*st.stream, st.buffer); // Wish there was an easy way to combine the fill_buff.seq.append(st.buffer); // two lines avoiding copying } @@ -122,6 +132,7 @@ protected: const size_t data_size = buff.data.size(); for(nb_filled = 0; nb_filled < data_size && st.stream->peek() != EOF; ++nb_filled) { + ++reads_read_; header_sequence_qual& fill_buff = buff.data[nb_filled]; st.stream->get(); // Skip '@' std::getline(*st.stream, fill_buff.header); diff --git a/jellyfish.spec.in b/jellyfish.spec.in new file mode 100644 index 0000000..8fca8fb --- /dev/null +++ b/jellyfish.spec.in @@ -0,0 +1,83 @@ +Name: jellyfish +Version: @PACKAGE_VERSION@ +Release: 1%{?dist} +Summary: A fast multi-threaded k-mer counter + +Group: Applications/Engineering +License: GNU GPL +URL: http://www.genome.umd.edu/jellyfish +Source0: %{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +#BuildRequires: +Requires: %{name}-lib = %{version}-%{release} + +%description +Jellyfish is a tool for fast, memory-efficient counting of k-mers in +DNA. A k-mer is a substring of length k, and counting the occurrences +of all such substrings is a central step in many analyses of DNA +sequence. JELLYFISH can count k-mers quickly by using an efficient +encoding of a hash table and by exploiting the "compare-and-swap" CPU +instruction to increase parallelism. + +%package lib +Summary: Library for %{name} +Group: System Environment/Libraries +# Requires: %{name}%{?_isa} = %{version}-%{release} + +%description lib +The %{name}-lib package contains libraries required to run %{name} or +software using the %{name} library. + +%package devel +Summary: Development files for %{name} +Group: Development/Libraries +Requires: %{name}-lib%{?_isa} = %{version}-%{release} + +%description devel +The %{name}-devel package contains libraries and header files for +developing applications that use %{name}. + + +%prep +%setup -q + + +%build +%configure --disable-static +make %{?_smp_mflags} + + +%install +rm -rf $RPM_BUILD_ROOT +make install DESTDIR=$RPM_BUILD_ROOT +find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';' + + +%clean +rm -rf $RPM_BUILD_ROOT + + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + + +%files +%defattr(-,root,root,-) +%{_bindir}/* + +%files lib +%defattr(-,root,root,-) +%doc +%{_libdir}/*.so.* + +%files devel +%defattr(-,root,root,-) +%doc +%{_includedir}/* +%{_libdir}/*.so + + +%changelog +* Initial version diff --git a/lib/generator_manager.cc b/lib/generator_manager.cc index b9d801b..1d03541 100644 --- a/lib/generator_manager.cc +++ b/lib/generator_manager.cc @@ -127,7 +127,8 @@ void generator_manager::start() { // In child - setup_signal_handlers(); + if(setup_signal_handlers() == -1) + exit(EXIT_FAILURE); start_commands(); // child start commands int signal = kill_signal_; if(signal == 0) @@ -148,13 +149,11 @@ static generator_manager* manager = 0; void generator_manager::signal_handler(int signal) { manager->kill_signal_ = signal; } -void generator_manager::setup_signal_handlers() { - int res; +int generator_manager::setup_signal_handlers() { struct sigaction act; memset(&act, '\0', sizeof(act)); act.sa_handler = signal_handler; - res = sigaction(SIGTERM, &act, 0); - assert(res == 0); + return sigaction(SIGTERM, &act, 0); // Should we redefine other signals as well? Like SIGINT, SIGQUIT? } @@ -162,7 +161,7 @@ void generator_manager::unset_signal_handlers() { struct sigaction act; memset(&act, '\0', sizeof(act)); act.sa_handler = SIG_DFL; - assert(sigaction(SIGTERM, &act, 0) == 0); + sigaction(SIGTERM, &act, 0); } bool generator_manager::wait() { diff --git a/m4/.gitignore b/m4/.gitignore index 5e7d273..148d151 100644 --- a/m4/.gitignore +++ b/m4/.gitignore @@ -1,4 +1,5 @@ # Ignore everything in this directory * -# Except this file +# Except our files !.gitignore +!m4-ax_* diff --git a/m4/m4-ax_perl_ext.m4 b/m4/m4-ax_perl_ext.m4 new file mode 100644 index 0000000..1316f7c --- /dev/null +++ b/m4/m4-ax_perl_ext.m4 @@ -0,0 +1,135 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_perl_ext.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PERL_EXT +# +# DESCRIPTION +# +# Fetches the linker flags and C compiler flags for compiling and linking +# Perl binary extensions. The macro substitutes PERL_EXT_PREFIX, +# PERL_EXT_INC, PERL_EXT_LIB, PERL_EXT_CPPFLAGS, PERL_EXT_LDFLAGS and +# PERL_EXT_DLEXT variables if Perl executable was found. It also checks +# the same variables before trying to retrieve them from the Perl +# configuration. +# +# PERL_EXT_PREFIX: top-level perl installation path (--prefix) +# PERL_EXT_INC: XS include directory +# PERL_EXT_LIB: Perl extensions destination directory +# PERL_EXT_CPPFLAGS: C preprocessor flags to compile extensions +# PERL_EXT_LDFLAGS: linker flags to build extensions +# PERL_EXT_DLEXT: extensions suffix for perl modules (e.g. ".so") +# +# Examples: +# +# AX_PERL_EXT +# if test x"$PERL" = x; then +# AC_ERROR(["cannot find Perl"]) +# fi +# +# LICENSE +# +# Copyright (c) 2011 Stanislav Sedov <[email protected]> +# Copyright (c) 2014 Thomas Klausner <[email protected]> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +#serial 2 + +AC_DEFUN([AX_PERL_EXT],[ + + # + # Check if perl executable exists. + # + AC_PATH_PROGS(PERL, ["${PERL-perl}"], []) + + if test -n "$PERL" ; then + + # + # Check for Perl prefix. + # + AC_ARG_VAR(PERL_EXT_PREFIX, [Perl PREFIX]) + AC_MSG_CHECKING([for Perl prefix]) + if test -z "$PERL_EXT_PREFIX" ; then + [PERL_EXT_PREFIX=`$PERL -MConfig -e 'print $Config{prefix};'`]; + fi + AC_MSG_RESULT([$PERL_EXT_PREFIX]) + AC_SUBST(PERL_EXT_PREFIX) + + # + # Check for Perl extensions include path. + # + AC_ARG_VAR(PERL_EXT_INC, [Directory to include XS headers from]) + AC_MSG_CHECKING([for Perl extension include path]) + if test -z "$PERL_EXT_INC" ; then + [PERL_EXT_INC=`$PERL -MConfig -e 'print $Config{archlibexp}, "/CORE";'`]; + fi + AC_MSG_RESULT([$PERL_EXT_INC]) + AC_SUBST(PERL_EXT_INC) + + # + # Check for the extensions target directory. + # + AC_ARG_VAR(PERL_EXT_LIB, [Directory to install perl files into]) + AC_MSG_CHECKING([for Perl extension target directory]) + if test -z "$PERL_EXT_LIB" ; then + [PERL_EXT_LIB=`$PERL -MConfig -e 'print $Config{sitearch};'`]; + fi + AC_MSG_RESULT([$PERL_EXT_LIB]) + AC_SUBST(PERL_EXT_LIB) + + # + # Check for Perl CPP flags. + # + AC_ARG_VAR(PERL_EXT_CPPFLAGS, [CPPFLAGS to compile perl extensions]) + AC_MSG_CHECKING([for Perl extensions C preprocessor flags]) + if test -z "$PERL_EXT_CPPFLAGS" ; then + [PERL_EXT_CPPFLAGS=`$PERL -MConfig -e 'print $Config{cppflags};'`]; + fi + AC_MSG_RESULT([$PERL_EXT_CPPFLAGS]) + AC_SUBST(PERL_EXT_CPPFLAGS) + + # + # Check for Perl extension link flags. + # + AC_ARG_VAR(PERL_EXT_LDFLAGS, [LDFLAGS to build perl extensions]) + AC_MSG_CHECKING([for Perl extensions linker flags]) + if test -z "$PERL_EXT_LDFLAGS" ; then + [PERL_EXT_LDFLAGS=`$PERL -MConfig -e 'print $Config{lddlflags};'`]; + fi + # Fix LDFLAGS for OS X. We don't want any -arch flags here, otherwise + # linking will fail. Also, OS X Perl LDFLAGS contains "-arch ppc" which + # is not supported by XCode anymore. + case "${host}" in + *darwin*) + PERL_EXT_LDFLAGS=`echo ${PERL_EXT_LDFLAGS} | sed -e "s,-arch [[^ ]]*,,g"` + ;; + esac + AC_MSG_RESULT([$PERL_EXT_LDFLAGS]) + AC_SUBST(PERL_EXT_LDFLAGS) + + fi +]) diff --git a/m4/m4-ax_pkg_swig.m4 b/m4/m4-ax_pkg_swig.m4 new file mode 100644 index 0000000..d836eec --- /dev/null +++ b/m4/m4-ax_pkg_swig.m4 @@ -0,0 +1,135 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pkg_swig.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PKG_SWIG([major.minor.micro], [action-if-found], [action-if-not-found]) +# +# DESCRIPTION +# +# This macro searches for a SWIG installation on your system. If found, +# then SWIG is AC_SUBST'd; if not found, then $SWIG is empty. If SWIG is +# found, then SWIG_LIB is set to the SWIG library path, and AC_SUBST'd. +# +# You can use the optional first argument to check if the version of the +# available SWIG is greater than or equal to the value of the argument. It +# should have the format: N[.N[.N]] (N is a number between 0 and 999. Only +# the first N is mandatory.) If the version argument is given (e.g. +# 1.3.17), AX_PKG_SWIG checks that the swig package is this version number +# or higher. +# +# As usual, action-if-found is executed if SWIG is found, otherwise +# action-if-not-found is executed. +# +# In configure.in, use as: +# +# AX_PKG_SWIG(1.3.17, [], [ AC_MSG_ERROR([SWIG is required to build..]) ]) +# AX_SWIG_ENABLE_CXX +# AX_SWIG_MULTI_MODULE_SUPPORT +# AX_SWIG_PYTHON +# +# LICENSE +# +# Copyright (c) 2008 Sebastian Huber <[email protected]> +# Copyright (c) 2008 Alan W. Irwin +# Copyright (c) 2008 Rafael Laboissiere <[email protected]> +# Copyright (c) 2008 Andrew Collier +# Copyright (c) 2011 Murray Cumming <[email protected]> +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 11 + +AC_DEFUN([AX_PKG_SWIG],[ + # Ubuntu has swig 2.0 as /usr/bin/swig2.0 + AC_PATH_PROGS([SWIG],[swig swig2.0]) + if test -z "$SWIG" ; then + m4_ifval([$3],[$3],[:]) + elif test -n "$1" ; then + AC_MSG_CHECKING([SWIG version]) + [swig_version=`$SWIG -version 2>&1 | grep 'SWIG Version' | sed 's/.*\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g'`] + AC_MSG_RESULT([$swig_version]) + if test -n "$swig_version" ; then + # Calculate the required version number components + [required=$1] + [required_major=`echo $required | sed 's/[^0-9].*//'`] + if test -z "$required_major" ; then + [required_major=0] + fi + [required=`echo $required | sed 's/[0-9]*[^0-9]//'`] + [required_minor=`echo $required | sed 's/[^0-9].*//'`] + if test -z "$required_minor" ; then + [required_minor=0] + fi + [required=`echo $required | sed 's/[0-9]*[^0-9]//'`] + [required_patch=`echo $required | sed 's/[^0-9].*//'`] + if test -z "$required_patch" ; then + [required_patch=0] + fi + # Calculate the available version number components + [available=$swig_version] + [available_major=`echo $available | sed 's/[^0-9].*//'`] + if test -z "$available_major" ; then + [available_major=0] + fi + [available=`echo $available | sed 's/[0-9]*[^0-9]//'`] + [available_minor=`echo $available | sed 's/[^0-9].*//'`] + if test -z "$available_minor" ; then + [available_minor=0] + fi + [available=`echo $available | sed 's/[0-9]*[^0-9]//'`] + [available_patch=`echo $available | sed 's/[^0-9].*//'`] + if test -z "$available_patch" ; then + [available_patch=0] + fi + # Convert the version tuple into a single number for easier comparison. + # Using base 100 should be safe since SWIG internally uses BCD values + # to encode its version number. + required_swig_vernum=`expr $required_major \* 10000 \ + \+ $required_minor \* 100 \+ $required_patch` + available_swig_vernum=`expr $available_major \* 10000 \ + \+ $available_minor \* 100 \+ $available_patch` + + if test $available_swig_vernum -lt $required_swig_vernum; then + AC_MSG_WARN([SWIG version >= $1 is required. You have $swig_version.]) + SWIG='' + m4_ifval([$3],[$3],[]) + else + AC_MSG_CHECKING([for SWIG library]) + SWIG_LIB=`$SWIG -swiglib` + AC_MSG_RESULT([$SWIG_LIB]) + m4_ifval([$2],[$2],[]) + fi + else + AC_MSG_WARN([cannot determine SWIG version]) + SWIG='' + m4_ifval([$3],[$3],[]) + fi + fi + AC_SUBST([SWIG_LIB]) +]) diff --git a/m4/m4-ax_python_devel.m4 b/m4/m4-ax_python_devel.m4 new file mode 100644 index 0000000..59a2ff0 --- /dev/null +++ b/m4/m4-ax_python_devel.m4 @@ -0,0 +1,324 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_python_devel.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PYTHON_DEVEL([version]) +# +# DESCRIPTION +# +# Note: Defines as a precious variable "PYTHON_VERSION". Don't override it +# in your configure.ac. +# +# This macro checks for Python and tries to get the include path to +# 'Python.h'. It provides the $(PYTHON_CPPFLAGS) and $(PYTHON_LDFLAGS) +# output variables. It also exports $(PYTHON_EXTRA_LIBS) and +# $(PYTHON_EXTRA_LDFLAGS) for embedding Python in your code. +# +# You can search for some particular version of Python by passing a +# parameter to this macro, for example ">= '2.3.1'", or "== '2.4'". Please +# note that you *have* to pass also an operator along with the version to +# match, and pay special attention to the single quotes surrounding the +# version number. Don't use "PYTHON_VERSION" for this: that environment +# variable is declared as precious and thus reserved for the end-user. +# +# This macro should work for all versions of Python >= 2.1.0. As an end +# user, you can disable the check for the python version by setting the +# PYTHON_NOVERSIONCHECK environment variable to something else than the +# empty string. +# +# If you need to use this macro for an older Python version, please +# contact the authors. We're always open for feedback. +# +# LICENSE +# +# Copyright (c) 2009 Sebastian Huber <[email protected]> +# Copyright (c) 2009 Alan W. Irwin +# Copyright (c) 2009 Rafael Laboissiere <[email protected]> +# Copyright (c) 2009 Andrew Collier +# Copyright (c) 2009 Matteo Settenvini <[email protected]> +# Copyright (c) 2009 Horst Knorr <[email protected]> +# Copyright (c) 2013 Daniel Mullner <[email protected]> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 17 + +AU_ALIAS([AC_PYTHON_DEVEL], [AX_PYTHON_DEVEL]) +AC_DEFUN([AX_PYTHON_DEVEL],[ + # + # Allow the use of a (user set) custom python version + # + AC_ARG_VAR([PYTHON_VERSION],[The installed Python + version to use, for example '2.3'. This string + will be appended to the Python interpreter + canonical name.]) + + AC_PATH_PROG([PYTHON],[python[$PYTHON_VERSION]]) + if test -z "$PYTHON"; then + AC_MSG_ERROR([Cannot find python$PYTHON_VERSION in your system path]) + PYTHON_VERSION="" + fi + + # + # Check for a version of Python >= 2.1.0 + # + AC_MSG_CHECKING([for a version of Python >= '2.1.0']) + ac_supports_python_ver=`$PYTHON -c "import sys; \ + ver = sys.version.split ()[[0]]; \ + print (ver >= '2.1.0')"` + if test "$ac_supports_python_ver" != "True"; then + if test -z "$PYTHON_NOVERSIONCHECK"; then + AC_MSG_RESULT([no]) + AC_MSG_FAILURE([ +This version of the AC@&t@_PYTHON_DEVEL macro +doesn't work properly with versions of Python before +2.1.0. You may need to re-run configure, setting the +variables PYTHON_CPPFLAGS, PYTHON_LDFLAGS, PYTHON_SITE_PKG, +PYTHON_EXTRA_LIBS and PYTHON_EXTRA_LDFLAGS by hand. +Moreover, to disable this check, set PYTHON_NOVERSIONCHECK +to something else than an empty string. +]) + else + AC_MSG_RESULT([skip at user request]) + fi + else + AC_MSG_RESULT([yes]) + fi + + # + # if the macro parameter ``version'' is set, honour it + # + if test -n "$1"; then + AC_MSG_CHECKING([for a version of Python $1]) + ac_supports_python_ver=`$PYTHON -c "import sys; \ + ver = sys.version.split ()[[0]]; \ + print (ver $1)"` + if test "$ac_supports_python_ver" = "True"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([this package requires Python $1. +If you have it installed, but it isn't the default Python +interpreter in your system path, please pass the PYTHON_VERSION +variable to configure. See ``configure --help'' for reference. +]) + PYTHON_VERSION="" + fi + fi + + # + # Check if you have distutils, else fail + # + AC_MSG_CHECKING([for the distutils Python package]) + ac_distutils_result=`$PYTHON -c "import distutils" 2>&1` + if test -z "$ac_distutils_result"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([cannot import Python module "distutils". +Please check your Python installation. The error was: +$ac_distutils_result]) + PYTHON_VERSION="" + fi + + # + # Check for Python include path + # + AC_MSG_CHECKING([for Python include path]) + if test -z "$PYTHON_CPPFLAGS"; then + python_path=`$PYTHON -c "import distutils.sysconfig; \ + print (distutils.sysconfig.get_python_inc ());"` + plat_python_path=`$PYTHON -c "import distutils.sysconfig; \ + print (distutils.sysconfig.get_python_inc (plat_specific=1));"` + if test -n "${python_path}"; then + if test "${plat_python_path}" != "${python_path}"; then + python_path="-I$python_path -I$plat_python_path" + else + python_path="-I$python_path" + fi + fi + PYTHON_CPPFLAGS=$python_path + fi + AC_MSG_RESULT([$PYTHON_CPPFLAGS]) + AC_SUBST([PYTHON_CPPFLAGS]) + + # + # Check for Python library path + # + AC_MSG_CHECKING([for Python library path]) + if test -z "$PYTHON_LDFLAGS"; then + # (makes two attempts to ensure we've got a version number + # from the interpreter) + ac_python_version=`cat<<EOD | $PYTHON - + +# join all versioning strings, on some systems +# major/minor numbers could be in different list elements +from distutils.sysconfig import * +e = get_config_var('VERSION') +if e is not None: + print(e) +EOD` + + if test -z "$ac_python_version"; then + if test -n "$PYTHON_VERSION"; then + ac_python_version=$PYTHON_VERSION + else + ac_python_version=`$PYTHON -c "import sys; \ + print (sys.version[[:3]])"` + fi + fi + + # Make the versioning information available to the compiler + AC_DEFINE_UNQUOTED([HAVE_PYTHON], ["$ac_python_version"], + [If available, contains the Python version number currently in use.]) + + # First, the library directory: + ac_python_libdir=`cat<<EOD | $PYTHON - + +# There should be only one +import distutils.sysconfig +e = distutils.sysconfig.get_config_var('LIBDIR') +if e is not None: + print (e) +EOD` + + # Now, for the library: + ac_python_library=`cat<<EOD | $PYTHON - + +import distutils.sysconfig +c = distutils.sysconfig.get_config_vars() +if 'LDVERSION' in c: + print ('python'+c[['LDVERSION']]) +else: + print ('python'+c[['VERSION']]) +EOD` + + # This small piece shamelessly adapted from PostgreSQL python macro; + # credits goes to momjian, I think. I'd like to put the right name + # in the credits, if someone can point me in the right direction... ? + # + if test -n "$ac_python_libdir" -a -n "$ac_python_library" + then + # use the official shared library + ac_python_library=`echo "$ac_python_library" | sed "s/^lib//"` + PYTHON_LDFLAGS="-L$ac_python_libdir -l$ac_python_library" + else + # old way: use libpython from python_configdir + ac_python_libdir=`$PYTHON -c \ + "from distutils.sysconfig import get_python_lib as f; \ + import os; \ + print (os.path.join(f(plat_specific=1, standard_lib=1), 'config'));"` + PYTHON_LDFLAGS="-L$ac_python_libdir -lpython$ac_python_version" + fi + + if test -z "PYTHON_LDFLAGS"; then + AC_MSG_ERROR([ + Cannot determine location of your Python DSO. Please check it was installed with + dynamic libraries enabled, or try setting PYTHON_LDFLAGS by hand. + ]) + fi + fi + AC_MSG_RESULT([$PYTHON_LDFLAGS]) + AC_SUBST([PYTHON_LDFLAGS]) + + # + # Check for site packages + # + AC_MSG_CHECKING([for Python site-packages path]) + if test -z "$PYTHON_SITE_PKG"; then + PYTHON_SITE_PKG=`$PYTHON -c "import distutils.sysconfig; \ + print (distutils.sysconfig.get_python_lib(0,0));"` + fi + AC_MSG_RESULT([$PYTHON_SITE_PKG]) + AC_SUBST([PYTHON_SITE_PKG]) + + # + # libraries which must be linked in when embedding + # + AC_MSG_CHECKING(python extra libraries) + if test -z "$PYTHON_EXTRA_LIBS"; then + PYTHON_EXTRA_LIBS=`$PYTHON -c "import distutils.sysconfig; \ + conf = distutils.sysconfig.get_config_var; \ + print (conf('LIBS') + ' ' + conf('SYSLIBS'))"` + fi + AC_MSG_RESULT([$PYTHON_EXTRA_LIBS]) + AC_SUBST(PYTHON_EXTRA_LIBS) + + # + # linking flags needed when embedding + # + AC_MSG_CHECKING(python extra linking flags) + if test -z "$PYTHON_EXTRA_LDFLAGS"; then + PYTHON_EXTRA_LDFLAGS=`$PYTHON -c "import distutils.sysconfig; \ + conf = distutils.sysconfig.get_config_var; \ + print (conf('LINKFORSHARED'))"` + fi + AC_MSG_RESULT([$PYTHON_EXTRA_LDFLAGS]) + AC_SUBST(PYTHON_EXTRA_LDFLAGS) + + # + # final check to see if everything compiles alright + # + AC_MSG_CHECKING([consistency of all components of python development environment]) + # save current global flags + ac_save_LIBS="$LIBS" + ac_save_CPPFLAGS="$CPPFLAGS" + LIBS="$ac_save_LIBS $PYTHON_LDFLAGS $PYTHON_EXTRA_LDFLAGS $PYTHON_EXTRA_LIBS" + CPPFLAGS="$ac_save_CPPFLAGS $PYTHON_CPPFLAGS" + AC_LANG_PUSH([C]) + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[#include <Python.h>]], + [[Py_Initialize();]]) + ],[pythonexists=yes],[pythonexists=no]) + AC_LANG_POP([C]) + # turn back to default flags + CPPFLAGS="$ac_save_CPPFLAGS" + LIBS="$ac_save_LIBS" + + AC_MSG_RESULT([$pythonexists]) + + if test ! "x$pythonexists" = "xyes"; then + AC_MSG_FAILURE([ + Could not link test program to Python. Maybe the main Python library has been + installed in some non-standard library path. If so, pass it to configure, + via the LDFLAGS environment variable. + Example: ./configure LDFLAGS="-L/usr/non-standard-path/python/lib" + ============================================================================ + ERROR! + You probably have to install the development version of the Python package + for your distribution. The exact name of this package varies among them. + ============================================================================ + ]) + PYTHON_VERSION="" + fi + + # + # all done! + # +]) diff --git a/m4/m4-ax_ruby_ext.m4 b/m4/m4-ax_ruby_ext.m4 new file mode 100644 index 0000000..af668ef --- /dev/null +++ b/m4/m4-ax_ruby_ext.m4 @@ -0,0 +1,102 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_ruby_ext.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_RUBY_EXT +# +# DESCRIPTION +# +# Fetches the linker flags and C compiler flags for compiling and linking +# Ruby binary extensions. The macro substitutes RUBY_VERSION, +# RUBY_EXT_INC, RUBY_EXT_LIB, RUBY_EXT_CPPFLAGS, RUBY_EXT_LDFLAGS and +# RUBY_EXT_DLEXT variables if Ruby executable has been found. It also +# checks the same variables before trying to retrieve them from the Ruby +# configuration. +# +# RUBY_VERSION: version of the Ruby interpreter +# RUBY_EXT_INC: Ruby include directory +# RUBY_EXT_LIB: Ruby extensions destination directory +# RUBY_EXT_CPPFLAGS: C preprocessor flags to compile extensions +# RUBY_EXT_LDFLAGS: linker flags to build extensions +# RUBY_EXT_DLEXT: extensions suffix for ruby modules (e.g. "so") +# +# Examples: +# +# AX_RUBY_EXT +# if test x"$RUBY" = x; then +# AC_ERROR(["cannot find Ruby"]) +# fi +# +# LICENSE +# +# Copyright (c) 2011 Stanislav Sedov <[email protected]> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +#serial 2 + +AC_DEFUN([AX_RUBY_EXT],[ + # + # Check if ruby executable exists. + # + AC_ARG_VAR([RUBY], [the Ruby interpreter]) + AC_PATH_PROGS(RUBY, ["${RUBY-ruby}"], []) + + if test -n "$RUBY" ; then + # + # Check Ruby version. + # + AC_MSG_CHECKING([for Ruby version]) + [RUBY_VERSION=`$RUBY -e 'puts RUBY_VERSION'`]; + AC_MSG_RESULT([$RUBY_VERSION]) + AC_SUBST(RUBY_VERSION) + + # + # Get CFLAGS and LIBS from pkg-config + # + PKG_CHECK_MODULES([RUBY_EXT], [ruby-1.9 >= 1.9.1]) + + # + # Check for the extensions target directory. + # + AC_MSG_CHECKING([for Ruby extensions target directory]) + AS_IF([test -z "$RUBY_EXT_LIB"], + [RUBY_EXT_LIB=`$RUBY -rrbconfig -e 'puts RbConfig::CONFIG.values_at("sitearchdir")'`]) + AC_MSG_RESULT([$RUBY_EXT_LIB]) + AC_SUBST(RUBY_EXT_LIB) + + # Fix LDFLAGS for OS X. We don't want any -arch flags here, otherwise + # linking might fail. We also including the proper flags to create a bundle. + case "$host" in + *darwin*) + RUBY_EXT_LDFLAGS=`echo ${RUBY_EXT_LDFLAGS} | sed -e "s,-arch [[^ ]]*,,g"` + RUBY_EXT_LDFLAGS="${RUBY_EXT_LDFLAGS} -bundle -undefined dynamic_lookup" + ;; + esac + AC_MSG_RESULT([$RUBY_EXT_LDFLAGS]) + AC_SUBST(RUBY_EXT_LDFLAGS) + fi +]) diff --git a/m4/m4-ax_swig_enable_cxx.m4 b/m4/m4-ax_swig_enable_cxx.m4 new file mode 100644 index 0000000..f2cb90b --- /dev/null +++ b/m4/m4-ax_swig_enable_cxx.m4 @@ -0,0 +1,53 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_swig_enable_cxx.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_SWIG_ENABLE_CXX +# +# DESCRIPTION +# +# Enable SWIG C++ support. This affects all invocations of $(SWIG). +# +# LICENSE +# +# Copyright (c) 2008 Sebastian Huber <[email protected]> +# Copyright (c) 2008 Alan W. Irwin +# Copyright (c) 2008 Rafael Laboissiere <[email protected]> +# Copyright (c) 2008 Andrew Collier +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 9 + +AU_ALIAS([SWIG_ENABLE_CXX], [AX_SWIG_ENABLE_CXX]) +AC_DEFUN([AX_SWIG_ENABLE_CXX],[ + AC_REQUIRE([AX_PKG_SWIG]) + AC_REQUIRE([AC_PROG_CXX]) + SWIG="$SWIG -c++" +]) diff --git a/m4/m4-ax_swig_python.m4 b/m4/m4-ax_swig_python.m4 new file mode 100644 index 0000000..bf22558 --- /dev/null +++ b/m4/m4-ax_swig_python.m4 @@ -0,0 +1,64 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_swig_python.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_SWIG_PYTHON([use-shadow-classes = {no, yes}]) +# +# DESCRIPTION +# +# Checks for Python and provides the $(AX_SWIG_PYTHON_CPPFLAGS), and +# $(AX_SWIG_PYTHON_OPT) output variables. +# +# $(AX_SWIG_PYTHON_OPT) contains all necessary SWIG options to generate +# code for Python. Shadow classes are enabled unless the value of the +# optional first argument is exactly 'no'. If you need multi module +# support (provided by the AX_SWIG_MULTI_MODULE_SUPPORT macro) use +# $(AX_SWIG_PYTHON_LIBS) to link against the appropriate library. It +# contains the SWIG Python runtime library that is needed by the type +# check system for example. +# +# LICENSE +# +# Copyright (c) 2008 Sebastian Huber <[email protected]> +# Copyright (c) 2008 Alan W. Irwin +# Copyright (c) 2008 Rafael Laboissiere <[email protected]> +# Copyright (c) 2008 Andrew Collier +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 10 + +AU_ALIAS([SWIG_PYTHON], [AX_SWIG_PYTHON]) +AC_DEFUN([AX_SWIG_PYTHON],[ + AC_REQUIRE([AX_PKG_SWIG]) + AC_REQUIRE([AX_PYTHON_DEVEL]) + test "x$1" != "xno" || swig_shadow=" -noproxy" + AC_SUBST([AX_SWIG_PYTHON_OPT],[-python$swig_shadow]) + AC_SUBST([AX_SWIG_PYTHON_CPPFLAGS],[$PYTHON_CPPFLAGS]) +]) diff --git a/sub_commands/jellyfish.cc b/sub_commands/jellyfish.cc index 53549fd..25fc3cd 100644 --- a/sub_commands/jellyfish.cc +++ b/sub_commands/jellyfish.cc @@ -127,7 +127,11 @@ int sos(int argc, char *argv[]) int version(int argc, char *argv[]) { +#ifdef PACKAGE_STRING std::cout << PACKAGE_STRING << std::endl; +#else + std::cout << "no version" << std::endl; +#endif return 0; } diff --git a/swig/Makefile.am b/swig/Makefile.am new file mode 100644 index 0000000..af7ed5e --- /dev/null +++ b/swig/Makefile.am @@ -0,0 +1,68 @@ +# SWIG +BUILT_SOURCES = +CLEANFILES = +EXTRA_DIST = +SWIG_SRC = jellyfish.i hash_counter.i hash_set.i mer_dna.i mer_file.i string_mers.i + +if HAVE_SWIG +SWIG_V_GEN = $(swig_v_GEN_$(V)) +swig_v_GEN_ = $(swig_v_GEN_$(AM_DEFAULT_VERBOSITY)) +swig_v_GEN_0 = @echo " SWIG " $@; +%/swig_wrap.cpp: $(SWIG_SRC) + $(SWIG_V_GEN)$(SWIG) -$* -I$(top_srcdir)/include -o $@ $< +else +%/swig_wrap.cc: + @echo >&2 SWIG >= 3.x.x not found. Make sure it is install and rerun configure + @false +endif + +# Python support +if PYTHON_BINDING +PYTHON_BUILT = python/swig_wrap.cpp python/jellyfish.py +BUILT_SOURCES += $(PYTHON_BUILT) + +pythonextdir = $(PYTHON_SITE_PKG)/jellyfish +pythonext_SCRIPTS = python/jellyfish.pyc python/__init__.pyc +pythonext_LTLIBRARIES = python/_jellyfish.la +python__jellyfish_la_SOURCES = python/swig_wrap.cpp $(SWIG_SRC) +python__jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(top_srcdir)/include +python__jellyfish_la_LDFLAGS = -module $(PYTHON_LDFLAGS) +python__jellyfish_la_LIBADD = ../libjellyfish-2.0.la +CLEANFILES += $(PYTHON_BUILT) $(dist_pythonext_DATA) python/jellyfish.py +PYTHONC_V_GEN = $(pythonc_v_GEN_$(V)) +pythonc_v_GEN_ = $(pythonc_v_GEN_$(AM_DEFAULT_VERBOSITY)) +pythonc_v_GEN_0 = @echo " PYTHONC " $@; +%.pyc: %.py + $(PYTHONC_V_GEN)$(PYTHON) -c 'import py_compile, sys; py_compile.compile(sys.argv[1], sys.argv[2])' $< $@ +python/jellyfish.py: python/swig_wrap.cpp +EXTRA_DIST += python/jellyfish.py python/__init__.py +endif + +# Ruby support +if RUBY_BINDING +RUBY_BUILT = ruby/swig_wrap.cpp +BUILT_SOURCES += $(RUBY_BUILT) +rubyextdir = $(RUBY_EXT_LIB) +rubyext_LTLIBRARIES = ruby/jellyfish.la +ruby_jellyfish_la_SOURCES = ruby/swig_wrap.cpp $(SWIG_SRC) +ruby_jellyfish_la_CPPFLAGS = $(RUBY_EXT_CFLAGS) -I$(top_srcdir)/include +ruby_jellyfish_la_LDFLAGS = -module $(RUBY_EXT_LIBS) +ruby_jellyfish_la_LIBADD = ../libjellyfish-2.0.la +CLEANFILES += $(RUBY_BUILT) +endif + +# Perl5 support +if PERL_BINDING +PERL_BUILT = perl5/swig_wrap.cpp perl5/jellyfish.pm +BUILT_SOURCES += $(PERL_BUILT) +perlextdir = $(PERL_EXT_LIB) +perlext_SCRIPTS = perl5/jellyfish.pm +perlext_LTLIBRARIES = perl5/jellyfish.la +perl5_jellyfish_la_SOURCES = perl5/swig_wrap.cpp $(SWIG_SRC) +perl5_jellyfish_la_CPPFLAGS = $(PERL_EXT_CPPFLAGS) -I$(PERL_EXT_INC) -I$(top_srcdir)/include +perl5_jellyfish_la_LDFLAGS = -module $(PERL5_EXT_LDFLAGS) +perl5_jellyfish_la_LIBADD = ../libjellyfish-2.0.la +CLEANFILES += $(PERL5_BUILT) perl5/jellyfish.pm +perl5/jellyfish.pm: perl5/swig_wrap.cpp +EXTRA_DIST += perl5/jellyfish.pm +endif diff --git a/swig/Readme.md b/swig/Readme.md index ee297af..ae8868a 100644 --- a/swig/Readme.md +++ b/swig/Readme.md @@ -28,39 +28,29 @@ The [swig](http://www.swig.org/) software package must be installed. All the testing is done with version 3.x. Version 2.x MAY work, but is not tested. -Python ------- +Configure +--------- -The following was tested with Python version 3.3.3. To install: +To compile the bindings, use, according to taste, some of the the following switches with configure: ```Shell -python setup.py build -sudo python setup.py install +./configure --enable-swig --enable-python-binding --enable-ruby-binding --enable-perl-binding ``` -Ruby ----- - -The following was tested with Ruby version 1.9.3. To install: +In addition, each of the `--enable-*-binding` switch can take a path where to install the binding. This allows to install without root privilegies. For example: ```Shell -ruby extconf.rb +./configure --prefix=`pwd`/inst --enable-swig --enable-python-binding=`pwd`/inst/python make -sudo make install +make install ``` -Perl ----- +will install `jellyfish` in `./inst/bin` and the python files in `./inst/python`. Then, one needs to add `$(pwd)/inst/python` to `PYTHONPATH` to use the binding. Similarly with ruby and `RUBYLIB`, perl and `PERL5LIB`. -The following was tested with Perl version 5.18.1. The Perl headers -may not compile properly with recent version of g++. It compiles -properly with g++ version 4.4. Hence, you may need to pass something -like 'CC=g++-4.4' to the 'make' command for it to compile. +The swig bindings were tested with Python 3.3.3, Ruby 1.9.3 and Perl 5.18.1. The Perl headers may not compile properly with recent version of g++. It compiles properly with g++ version 4.4. Hence, you may need to pass the path to `g++` version 4.4 to the configure command line. For example: ```Shell -perl Makefile.PL -make -sudo make install +./configure --enable-swig --enable-perl-binding CXX=g++-4.4 ``` Examples diff --git a/swig/perl5/t/test_mer_file.t b/swig/perl5/t/test_mer_file.t index 2403545..a4db65b 100644 --- a/swig/perl5/t/test_mer_file.t +++ b/swig/perl5/t/test_mer_file.t @@ -7,11 +7,11 @@ my $data = shift(@ARGV); # Compare histo { - my $rf = jellyfish::ReadMerFile->new($data . "/sequence.jf"); + my $rf = jellyfish::ReadMerFile->new($data . "/swig_perl.jf"); my @histo; $histo[$rf->count]++ while($rf->next_mer); - open(my $io, "<", $data . "/sequence.histo"); + open(my $io, "<", $data . "/swig_perl.histo"); my @jf_histo; while(<$io>) { my ($freq, $count) = split; @@ -23,8 +23,8 @@ my $data = shift(@ARGV); # Compare dump { - my $rf = jellyfish::ReadMerFile->new($data . "/sequence.jf"); - my $equal = open(my $io, "<", $data . "/sequence.dump"); + my $rf = jellyfish::ReadMerFile->new($data . "/swig_perl.jf"); + my $equal = open(my $io, "<", $data . "/swig_perl.dump"); while(<$io>) { my ($mer, $count) = split; $equal &&= $rf->next_mer; @@ -38,8 +38,8 @@ my $data = shift(@ARGV); # Query { - my $rf = jellyfish::ReadMerFile->new($data . "/sequence.jf"); - my $qf = jellyfish::QueryMerFile->new($data . "/sequence.jf"); + my $rf = jellyfish::ReadMerFile->new($data . "/swig_perl.jf"); + my $qf = jellyfish::QueryMerFile->new($data . "/swig_perl.jf"); my $good = 1; while(my ($mer, $count) = $rf->each) { $good &&= $count == $qf->get($mer) or diff --git a/swig/python/__init__.py b/swig/python/__init__.py new file mode 100644 index 0000000..afb0598 --- /dev/null +++ b/swig/python/__init__.py @@ -0,0 +1 @@ +from jellyfish import * diff --git a/swig/python/test_mer_file.py b/swig/python/test_mer_file.py index 34a8cf6..1a1cc6c 100644 --- a/swig/python/test_mer_file.py +++ b/swig/python/test_mer_file.py @@ -6,7 +6,7 @@ from collections import Counter class TestMerFile(unittest.TestCase): def setUp(self): - self.mf = jellyfish.ReadMerFile(os.path.join(data, "sequence.jf")) + self.mf = jellyfish.ReadMerFile(os.path.join(data, "swig_python.jf")) def test_histo(self): histo = Counter() @@ -14,14 +14,14 @@ class TestMerFile(unittest.TestCase): histo[self.mf.count()] += 1 jf_histo = Counter() - with open(os.path.join(data, "sequence.histo")) as f: + with open(os.path.join(data, "swig_python.histo")) as f: for line in f: num, count = [int(n) for n in line.split()] self.assertEqual(count, histo[num]) def test_dump(self): good = True - with open(os.path.join(data, "sequence.dump")) as f: + with open(os.path.join(data, "swig_python.dump")) as f: for line in f: good = good and self.mf.next_mer() if not good: break @@ -32,7 +32,7 @@ class TestMerFile(unittest.TestCase): def test_iter(self): good = True - with open(os.path.join(data, "sequence.dump")) as f: + with open(os.path.join(data, "swig_python.dump")) as f: for mer, count in self.mf: line = f.readline() good = good and line @@ -46,7 +46,7 @@ class TestMerFile(unittest.TestCase): def test_query(self): good = True - qf = jellyfish.QueryMerFile(os.path.join(data, "sequence.jf")) + qf = jellyfish.QueryMerFile(os.path.join(data, "swig_python.jf")) for mer, count in self.mf: good = good and count == qf[mer] if not good: break diff --git a/swig/ruby/test_mer_file.rb b/swig/ruby/test_mer_file.rb index 92dc60c..1a80ca4 100644 --- a/swig/ruby/test_mer_file.rb +++ b/swig/ruby/test_mer_file.rb @@ -5,7 +5,7 @@ $data = ARGV.shift class TestMerFile < MiniTest::Unit::TestCase def setup - @mf = Jellyfish::ReadMerFile.new(File.join($data, "sequence.jf")) + @mf = Jellyfish::ReadMerFile.new(File.join($data, "swig_ruby.jf")) end def test_histo @@ -13,7 +13,7 @@ class TestMerFile < MiniTest::Unit::TestCase histo[@mf.count] = (histo[@mf.count] || 0) + 1 while @mf.next_mer jf_histo = [] - open(File.join($data, "sequence.histo")) { |f| + open(File.join($data, "swig_ruby.histo")) { |f| f.lines.each { |l| freq, count = l.split.map {|x| x.to_i } jf_histo[freq] = count @@ -24,7 +24,7 @@ class TestMerFile < MiniTest::Unit::TestCase end def test_each - open(File.join($data, "sequence.dump")) { |f| + open(File.join($data, "swig_ruby.dump")) { |f| @mf.each { |m, c| l = f.readline assert l @@ -37,7 +37,7 @@ class TestMerFile < MiniTest::Unit::TestCase end def test_dump - open(File.join($data, "sequence.dump")) { |f| + open(File.join($data, "swig_ruby.dump")) { |f| f.lines.each { |l| mer, count = l.split assert @mf.next_mer @@ -49,7 +49,7 @@ class TestMerFile < MiniTest::Unit::TestCase end def test_query - query = Jellyfish::QueryMerFile.new(File.join($data, "sequence.jf")) + query = Jellyfish::QueryMerFile.new(File.join($data, "swig_ruby.jf")) @mf.each { |m, c| assert_equal c, query[m] } diff --git a/swig/string_mers.i b/swig/string_mers.i new file mode 100644 index 0000000..9a1f49e --- /dev/null +++ b/swig/string_mers.i @@ -0,0 +1,6 @@ +/****************************************/ +/* Iterator of all the mers in a string */ +/****************************************/ +%{ + +%} diff --git a/tests/compat.sh.in b/tests/compat.sh.in index 29bf812..37bca5a 100644 --- a/tests/compat.sh.in +++ b/tests/compat.sh.in @@ -5,11 +5,19 @@ pref=$(basename $0 .sh) DIR=../bin JF="$DIR/jellyfish" [ -n "$VALGRIND" ] && JF="valgrind $JF" +SRCDIR=@abs_top_srcdir@ +BUILDDIR=@abs_top_builddir@ check () { cut -d\ -f 2 $1 | xargs @MD5@ | sort -k2,2 | diff -w $DIFFFLAGS $1 - } +ENABLE_RUBY_BINDING="@RUBY_EXT_LIB@" +RUBY="@RUBY@" +ENABLE_PYTHON_BINDING="@PYTHON@" +PYTHON="@PYTHON@" +ENABLE_PERL_BINDING="@PERL_EXT_LIB@" +PERL="@PERL@" if [ -n "$DEBUG" ]; then set -x; diff --git a/tests/swig_perl.sh b/tests/swig_perl.sh new file mode 100644 index 0000000..306541e --- /dev/null +++ b/tests/swig_perl.sh @@ -0,0 +1,17 @@ +#! /bin/sh + +cd tests +. ./compat.sh +[ -z "$ENABLE_PERL_BINDING" ] && exit 77 + +LOADPATH="$BUILDDIR/swig/perl5" +K=$($PERL -e 'print(int(rand(16)) + 6)') +I=$($PERL -e 'print(int(rand(5)))') +$JF count -m $K -s 10M -t $nCPUs -C -o ${pref}.jf seq1m_$I.fa +$JF dump -c ${pref}.jf > ${pref}.dump +$JF histo ${pref}.jf > ${pref}.histo + +for i in test_mer_file.t test_hash_counter.t; do + echo Test $i + $PERL "-I$LOADPATH/.libs" "-I$LOADPATH" "$SRCDIR/swig/perl5/t/$i" . +done diff --git a/tests/swig_python.sh b/tests/swig_python.sh new file mode 100644 index 0000000..e70bba5 --- /dev/null +++ b/tests/swig_python.sh @@ -0,0 +1,17 @@ +#! /bin/sh + +cd tests +. ./compat.sh +[ -z "$ENABLE_PYTHON_BINDING" ] && exit 77 + +export PYTHONPATH="$BUILDDIR/swig/python/.libs:$BUILDDIR/swig/python${PYTHONPATH+:$PYTHONPATH}" +K=$($PYTHON -c 'import random; print(random.randint(6, 20))') +I=$($PYTHON -c 'import random; print(random.randint(0, 4))') +$JF count -m $K -s 10M -t $nCPUs -C -o ${pref}.jf seq1m_$I.fa +$JF dump -c ${pref}.jf > ${pref}.dump +$JF histo ${pref}.jf > ${pref}.histo + +for i in test_mer_file.py test_hash_counter.py; do + echo Test $i + $PYTHON "$SRCDIR/swig/python/$i" . +done diff --git a/tests/swig_ruby.sh b/tests/swig_ruby.sh new file mode 100644 index 0000000..0644fcc --- /dev/null +++ b/tests/swig_ruby.sh @@ -0,0 +1,19 @@ +#! /bin/sh + +cd tests +. ./compat.sh +[ -z "$ENABLE_RUBY_BINDING" ] && exit 77 + +LOADPATH="$BUILDDIR/swig/ruby/.libs" +K=$($RUBY -e 'print(rand(15) + 6)') +I=$($RUBY -e 'print(rand(5))') +$JF count -m $K -s 10M -t $nCPUs -C -o ${pref}.jf seq1m_$I.fa +$JF dump -c ${pref}.jf > ${pref}.dump +$JF histo ${pref}.jf > ${pref}.histo + + + +for i in test_mer_file.rb test_hash_counter.rb; do + echo Test $i + $RUBY "-I$LOADPATH" "$SRCDIR/swig/ruby/$i" . +done diff --git a/unit_tests/test_mer_overlap_sequence_parser.cc b/unit_tests/test_mer_overlap_sequence_parser.cc index 26cfc32..c104ac4 100644 --- a/unit_tests/test_mer_overlap_sequence_parser.cc +++ b/unit_tests/test_mer_overlap_sequence_parser.cc @@ -47,6 +47,9 @@ TEST(MerOverlapSequenceParser, OneSmallSequence) { parser_type::job j2(parser); EXPECT_TRUE(j2.is_empty()); + + EXPECT_EQ((size_t)1, parser.nb_files()); + EXPECT_EQ((size_t)1, parser.nb_reads()); } string generate_sequences(std::ostream& os, int a, int b, int nb, bool fastq = false) { @@ -102,6 +105,9 @@ TEST(MerOverlapSequenceParser, ManySmallSequences) { offset += j->end - j->start - (mer_len - 1); } EXPECT_EQ(res.size(), offset + mer_len - 1); + + EXPECT_EQ((size_t)1, parser.nb_files()); + EXPECT_EQ((size_t)nb_reads, parser.nb_reads()); } TEST(MerOverlapSequenceParser, BigSequences) { @@ -147,6 +153,9 @@ TEST(MerOverlapSequenceParser, BigSequences) { parser_type::job j2(parser); EXPECT_TRUE(j2.is_empty()); + + EXPECT_EQ((size_t)2, parser.nb_files()); + EXPECT_EQ((size_t)6, parser.nb_reads()); } TEST(MerOverlapSequenceParser, Fastq) { @@ -173,5 +182,8 @@ TEST(MerOverlapSequenceParser, Fastq) { offset += j->end - j->start - (mer_len - 1); } EXPECT_EQ(res.size(), offset + mer_len - 1); + + EXPECT_EQ((size_t)1, parser.nb_files()); + EXPECT_EQ((size_t)nb_reads, parser.nb_reads()); } } // namespace { diff --git a/unit_tests/test_whole_sequence_parser.cc b/unit_tests/test_whole_sequence_parser.cc index 587f422..ddd2fc3 100644 --- a/unit_tests/test_whole_sequence_parser.cc +++ b/unit_tests/test_whole_sequence_parser.cc @@ -64,6 +64,8 @@ TEST(SequenceParser, Fasta) { parser_type::job j(parser); EXPECT_TRUE(j.is_empty()); } + EXPECT_EQ((size_t)1, parser.nb_files()); + EXPECT_EQ((size_t)2, parser.nb_reads()); } TEST(SequenceParser, Fastq) { @@ -116,6 +118,9 @@ TEST(SequenceParser, Fastq) { parser_type::job j(parser); EXPECT_TRUE(j.is_empty()); } + + EXPECT_EQ((size_t)1, parser.nb_files()); + EXPECT_EQ((size_t)2, parser.nb_reads()); } TEST(SequenceParser, FastaMany) { @@ -156,6 +161,8 @@ TEST(SequenceParser, FastaMany) { } } EXPECT_EQ(nb_sequences, got_sequences); + EXPECT_EQ((size_t)1, parser.nb_files()); + EXPECT_EQ((size_t)nb_sequences, parser.nb_reads()); } } -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/jellyfish.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
