This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch upstream in repository fastaq_tmp.
commit ea86704d47a615da7ce242c28aadac5d32a8410c Author: Jorge Soares <[email protected]> Date: Tue Nov 18 16:17:44 2014 +0000 Imported Upstream version 1.6.0 --- debian/changelog | 5 - debian/compat | 1 - debian/control | 174 ----- debian/copyright | 22 - debian/fastaq.manpages | 1 - ...ay-import-statements-for-manpage-creation.patch | 737 --------------------- debian/patches/series | 1 - debian/rules | 25 - debian/source/format | 1 - debian/upstream/metadata | 12 - debian/usage_to_man | 99 --- debian/watch | 3 - fastaq/tasks.py | 31 +- ...sequences_test_fastaq_to_quasr_primers.expected | 2 - .../data/sequences_test_fastaq_to_quasr_primers.fa | 4 - fastaq/tests/data/tasks_test_sequence_trim_1.fa | 24 +- .../data/tasks_test_sequence_trim_1.trimmed.fa | 14 +- fastaq/tests/data/tasks_test_sequence_trim_2.fa | 24 +- .../data/tasks_test_sequence_trim_2.trimmed.fa | 14 +- fastaq/tests/data/tasks_test_sequences_to_trim.fa | 8 +- fastaq/tests/tasks_test.py | 11 +- scripts/fastaq_sequence_trim | 4 +- scripts/fastaq_to_quasr_primers_file | 12 - setup.py | 2 +- 24 files changed, 76 insertions(+), 1155 deletions(-) diff --git a/debian/changelog b/debian/changelog deleted file mode 100644 index 3e78fa5..0000000 --- a/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -fastaq (1.5.0-1) unstable; urgency=medium - - * Initial release (Closes: #766321) - - -- Jorge Soares <[email protected]> Thu, 23 Oct 2014 20:23:54 +0200 diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec63514..0000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index 97f4773..0000000 --- a/debian/control +++ /dev/null @@ -1,174 +0,0 @@ -Source: fastaq -Maintainer: Debian Med Packaging Team <[email protected]> -Uploaders: Andreas Tille <[email protected]>, - Jorge Soares <[email protected]> -Section: science -Priority: optional -Build-Depends: debhelper (>= 9), - python3, - python3-setuptools, - python3-numpy, - python3-nose, - samtools, - help2man -Standards-Version: 3.9.6 -Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/fastaq.git -Homepage: https://github.com/sanger-pathogens/Fastaq - -Package: fastaq -Architecture: all -Depends: ${python3:Depends}, - ${misc:Depends} -Description: FASTA and FASTQ file manipulation tools - A collection of scripts that perform useful and common - fasta/q manipulation tasks. - . - All scripts automatically detect whether the input is - a FASTA or FASTQ file. - . - Input and output files can be gzipped. - . - fastaq_capillary_to_pairs - - Given a fasta/q file of capillary reads, - makes an interleaved file of read pairs - . - fastaq_chunker - - Splits a multi fasta/q file into separate files. - Splits sequences into chunks of a fixed size. - . - fastaq_count_sequences - - Counts the number of sequences in a fasta/q file - . - fastaq_deinterleave - - Deinterleaves fasta/q file, so that reads are written - alternately between two output files - . - fastaq_enumerate_names - - Renames sequences in a file, calling them 1,2,3... - . - fastaq_expand_nucleotides - - Makes all combinations of sequences in input file - by using all possibilities of redundant bases. - e.g. ART could be AAT or AGT. - . - fastaq_extend_gaps - - Extends the length of all gaps (and trims the start/end - of sequences) in a fasta/q file. - . - fastaq_fasta_to_fastq - - Given a fasta and qual file, makes a fastq file. - . - fastaq_filter - - Filters a fasta/q file by sequence length and/or - by name matching a regular expression. - . - fastaq_get_ids - - Gets IDs from each sequence in a fasta or fastq file. - . - fastaq_get_seq_flanking_gaps - - Gets the sequences either side of gaps in a fasta/q file. - . - fastaq_insert_or_delete_bases - - Deletes or inserts bases at given position(s) - from a fasta/q file. - . - fastaq_interleave - - Interleaves two fasta/q files, so that reads are written - alternately first/second in output file. - . - fastaq_long_read_simulate - - Simulates long reads from a fasta/q file. Can optionally - make insertions into the reads, like pacbio does. - . - fastaq_make_random_contigs - - Makes a multi-fasta file of random sequences, - all of the same length. Each base has equal chance of - being A,C,G or T - . - fastaq_merge - - Converts multi fasta/q file to single sequence file, - preserving original order of sequences. - . - fastaq_replace_bases - - Replaces all occurences of one letter with another in - a fasta/q file. - . - fastaq_reverse_complement - - Reverse complements all sequences in a fasta/q file - . - fastaq_scaffolds_to_contigs - - Creates a file of contigs from a file of scaffolds - i.e. - breaks at every gap in the input. - . - fastaq_search_for_seq - - Searches for an exact match on a given string and its - reverese complement, in every sequences of a fasta/q file. - Case insensitive. Guaranteed to find all hits. - . - fastaq_sequence_trim - - Trims sequences off the start of all sequences in a pair - of fasta/q files, whenever there is a perfect match. - Only keeps a read pair if both reads of the pair are at - least a minimum length after any trimming. - . - fastaq_split_by_base_count - - Splits a multi fasta/q file into separate files. - Does not split sequences. Puts up to max_bases - into each split file. The exception is that any - sequence longer than max_bases is put into its own file. - . - fastaq_strip_illumina_suffix - - Strips /1 or /2 off the end of every read name - in a fasta/q file. - . - fastaq_to_fake_qual - - Makes fake quality scores file from a fasta/q file. - . - fastaq_to_fasta - - Converts sequence file to FASTA format. - . - fastaq_to_mira_xml - - Creates an xml file from a fasta/q file of reads, - for use with Mira assembler. - . - fastaq_to_orfs_gff - - Writes a GFF file of open reading frames from a fasta/q file - . - fastaq_to_perfect_reads - - Makes perfect paired end fastq reads from a fasta/q file, - with insert sizes sampled from a normal distribution. - Read orientation is innies. Output is an interleaved fastq file. - . - fastaq_to_quasr_primers_file - - Converts a fasta/q file to QUASR primers format: - just the sequence on each line and its reverse complement, - tab separated. - . - fastaq_to_random_subset - - Takes a random subset of reads from a fasta/q file and optionally - the corresponding read from a mates file. - Ouptut is interleaved if mates file given. - . - fastaq_to_tiling_bam - - Takes a fasta/q file. Makes a BAM file containing perfect - (unpaired) reads tiling the whole genome. - . - fastaq_to_unique_by_id - - Removes duplicate sequences from a fasta/q file, - based on their names. If the same name is found - more than once, then the longest sequence is kept. - Order of sequences is preserved in output. - . - fastaq_translate - - Translates all sequences in a fasta or fastq file. - Output is always fasta format - . - fastaq_trim_ends - - Trims set number of bases off each sequence in a fasta/q file - . - fastaq_trim_Ns_at_end - - Trims any Ns off each sequence in a fasta/q file. - Does nothing to gaps in the middle, just trims the ends - . - A developer API is also provided by this package. - There are plenty of examples in tasks.py diff --git a/debian/copyright b/debian/copyright deleted file mode 100644 index 7b2546c..0000000 --- a/debian/copyright +++ /dev/null @@ -1,22 +0,0 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Upstream-Name: Fastaq -Source: https://github.com/sanger-pathogens/Fastaq - -Files: * -Copyright: © 2012-2013 Martin Hunt <[email protected]> -License: GPL-3+ - This package is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - . - This package is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - . - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/> - . - On Debian systems, the complete text of the GNU General - Public License version 3 can be found in "/usr/share/common-licenses/GPL-3". diff --git a/debian/fastaq.manpages b/debian/fastaq.manpages deleted file mode 100644 index d2c65e3..0000000 --- a/debian/fastaq.manpages +++ /dev/null @@ -1 +0,0 @@ -debian/man/* \ No newline at end of file diff --git a/debian/patches/delay-import-statements-for-manpage-creation.patch b/debian/patches/delay-import-statements-for-manpage-creation.patch deleted file mode 100644 index b3120ce..0000000 --- a/debian/patches/delay-import-statements-for-manpage-creation.patch +++ /dev/null @@ -1,737 +0,0 @@ -Description: Delay import of Fastaq modules by the python executables - Man pages for this package are being automatically created with through the - help2man wrapper called usage_to_man. help2man calls the python executables - with the -h option and converts the usage into a man page. - . - The first step done by all the executables is the import of the modules deployed - by this package. Since the package is not installed in the system at build time, - the man pages would never be properly created. - . - This patch solves this problem by importing the modules in this package after - the argument parsing code. - . - Upstream prefered to keep the code as it is for styling reasons, which is - perfectly reasonable - . - fastaq (1.5.0-1) UNRELEASED; urgency=low - . - * Initial release (Closes: #1234) -Author: DMPT <[email protected]> ---- a/scripts/fastaq_capillary_to_pairs -+++ b/scripts/fastaq_capillary_to_pairs -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.capillary_to_pairs(options.infile, options.outprefix) ---- a/scripts/fastaq_chunker -+++ b/scripts/fastaq_chunker -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences', -@@ -12,6 +11,10 @@ - parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size') - parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.split_by_fixed_size( - options.infile, - options.outprefix, ---- a/scripts/fastaq_count_sequences -+++ b/scripts/fastaq_count_sequences -@@ -1,11 +1,14 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Counts the number of sequences in a fasta/q file', - usage = '%(prog)s <fasta/q in>') - parser.add_argument('infile', help='Name of input fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - print(tasks.count_sequences(options.infile)) ---- a/scripts/fastaq_deinterleave -+++ b/scripts/fastaq_deinterleave -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files', -@@ -11,4 +10,8 @@ - parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads') - parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out) ---- a/scripts/fastaq_enumerate_names -+++ b/scripts/fastaq_enumerate_names -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Renames sequences in a file, calling them 1,2,3... etc', -@@ -12,6 +11,10 @@ - parser.add_argument('infile', help='Name of fasta/q file to be read') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.enumerate_names(options.infile, - options.outfile, - start_index=options.start_index, ---- a/scripts/fastaq_expand_nucleotides -+++ b/scripts/fastaq_expand_nucleotides -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Makes all combinations of sequences in input file by using all possibilities of redundant bases. e.g. ART could be AAT or AGT. Assumes input is nucleotides, not amino acids', -@@ -9,6 +8,10 @@ - parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip') - parser.add_argument('outfile', help='Name of output file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.expand_nucleotides( - options.infile, - options.outfile, ---- a/scripts/fastaq_extend_gaps -+++ b/scripts/fastaq_extend_gaps -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost', -@@ -10,4 +9,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.extend_gaps(options.infile, options.outfile, options.trim_number) ---- a/scripts/fastaq_fasta_to_fastq -+++ b/scripts/fastaq_fasta_to_fastq -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Given a fasta and qual file, makes a fastq file', -@@ -10,4 +9,8 @@ - parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in') - parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile) ---- a/scripts/fastaq_filter -+++ b/scripts/fastaq_filter -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression', -@@ -14,6 +13,10 @@ - parser.add_argument('infile', help='Name of fasta/q file to be filtered') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.filter(options.infile, - options.outfile, - minlength=options.min_length, ---- a/scripts/fastaq_get_ids -+++ b/scripts/fastaq_get_ids -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Gets IDs from each sequence in a fasta or fastq file', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.get_ids(options.infile, options.outfile) ---- a/scripts/fastaq_get_seq_flanking_gaps -+++ b/scripts/fastaq_get_seq_flanking_gaps -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Gets the sequences either side of gaps in a fasta/q file', -@@ -11,4 +10,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right) ---- a/scripts/fastaq_insert_or_delete_bases -+++ b/scripts/fastaq_insert_or_delete_bases -@@ -1,9 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --import sys --import random --from fastaq import sequences, utils, intervals - - parser = argparse.ArgumentParser( - description = 'Deletes or inserts bases at given position(s) from a fasta/q file', -@@ -16,6 +13,11 @@ - parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step') - options = parser.parse_args() - -+ -+import sys -+import random -+from fastaq import sequences, utils, intervals -+ - test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]] - - if sum(test_ops) != 1: ---- a/scripts/fastaq_interleave -+++ b/scripts/fastaq_interleave -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file', -@@ -10,4 +9,8 @@ - parser.add_argument('infile_2', help='Name of second input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.interleave(options.infile_1, options.infile_2, options.outfile) ---- a/scripts/fastaq_long_read_simulate -+++ b/scripts/fastaq_long_read_simulate -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Simulates long reads from a fasta/q file. Can optionally make insertions into the reads, like pacbio does. If insertions made, coverage calculation is done before the insertions (so total read length may appear longer then expected).', -@@ -16,7 +15,6 @@ - parser.add_argument('--fixed_read_length', type=int, help='Length of each read. Only applies if method is tile or uniform. [%(default)s]', default=20000, metavar='INT') - parser.add_argument('--coverage', type=float, help='Read coverage. Only applies if method is gamma or uniform. [%(default)s]', default=2, metavar='FLOAT') - -- - tiling_group = parser.add_argument_group('tiling options') - tiling_group.add_argument('--tile_step', type=int, help='Distance between start of each read [%(default)s]', default=10000, metavar='INT') - -@@ -29,8 +27,11 @@ - ins_group.add_argument('--ins_skip', type=int, help='Insert a random base every --skip bases plus or minus --ins_window. If this option is used, must also use --ins_window.', metavar='INT') - ins_group.add_argument('--ins_window', type=int, help='See --ins_skip. If this option is used, must also use --ins_skip.', metavar='INT') - -- - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.make_long_reads( - options.infile, - options.outfile, ---- a/scripts/fastaq_make_random_contigs -+++ b/scripts/fastaq_make_random_contigs -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T', -@@ -14,6 +13,10 @@ - parser.add_argument('length', type=int, help='Length of each contig') - parser.add_argument('outfile', help='Name of output file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.make_random_contigs( - options.contigs, - options.length, ---- a/scripts/fastaq_merge -+++ b/scripts/fastaq_merge -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Converts multi fasta/q file to single sequence file, preserving original order of sequences', -@@ -10,6 +9,10 @@ - parser.add_argument('outfile', help='Name of output file') - parser.add_argument('-n', '--name', help='Name of sequence in output file [%(default)s]', default='union') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.merge_to_one_seq( - options.infile, - options.outfile, ---- a/scripts/fastaq_replace_bases -+++ b/scripts/fastaq_replace_bases -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Replaces all occurences of one letter with another in a fasta/q file', -@@ -11,4 +10,8 @@ - parser.add_argument('old', help='Base to be replaced') - parser.add_argument('new', help='Replace with this letter') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.replace_bases(options.infile, options.outfile, options.old, options.new) ---- a/scripts/fastaq_reverse_complement -+++ b/scripts/fastaq_reverse_complement -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Reverse complements all sequences in a fasta/q file', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.reverse_complement(options.infile, options.outfile) ---- a/scripts/fastaq_scaffolds_to_contigs -+++ b/scripts/fastaq_scaffolds_to_contigs -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input', -@@ -10,4 +9,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output contigs file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs) ---- a/scripts/fastaq_search_for_seq -+++ b/scripts/fastaq_search_for_seq -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits', -@@ -10,4 +9,8 @@ - parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand') - parser.add_argument('search_string', help='String to search for in the sequences') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.search_for_seq(options.infile, options.outfile, options.search_string) ---- a/scripts/fastaq_sequence_trim -+++ b/scripts/fastaq_sequence_trim -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming', -@@ -13,6 +12,10 @@ - parser.add_argument('outfile_2', help='Name of output reverse fasta/q file', metavar='out_2') - parser.add_argument('trim_seqs', help='Name of fasta/q file of sequences to search for at the start of each input sequence', metavar='trim_seqs') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.sequence_trim( - options.infile_1, - options.infile_2, ---- a/scripts/fastaq_split_by_base_count -+++ b/scripts/fastaq_split_by_base_count -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.', -@@ -10,6 +9,9 @@ - parser.add_argument('outprefix', help='Name of output fasta/q file') - parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases') - parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT') -- - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs) ---- a/scripts/fastaq_strip_illumina_suffix -+++ b/scripts/fastaq_strip_illumina_suffix -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Strips /1 or /2 off the end of every read name in a fasta/q file', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.strip_illumina_suffix(options.infile, options.outfile) ---- a/scripts/fastaq_to_fake_qual -+++ b/scripts/fastaq_to_fake_qual -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Makes fake quality scores file from a fasta/q file', -@@ -10,6 +9,10 @@ - parser.add_argument('outfile', help='Name of output file') - parser.add_argument('-q', '--qual', type=int, help='Quality score to assign to all bases [%(default)s]', default=40) - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.fastaq_to_fake_qual( - options.infile, - options.outfile, ---- a/scripts/fastaq_to_fasta -+++ b/scripts/fastaq_to_fasta -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Converts sequence file to FASTA format', -@@ -11,6 +10,10 @@ - parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60) - parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.to_fasta( - options.infile, - options.outfile, ---- a/scripts/fastaq_to_mira_xml -+++ b/scripts/fastaq_to_mira_xml -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('xml_out', help='Name of output xml file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.fastaq_to_mira_xml(options.infile, options.xml_out) ---- a/scripts/fastaq_to_orfs_gff -+++ b/scripts/fastaq_to_orfs_gff -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Writes a GFF file of open reading frames from a fasta/q file', -@@ -10,4 +9,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('gff_out', help='Name of output gff file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.fastaq_to_orfs_gff(options.infile, options.gff_out, min_length=options.min_length) ---- a/scripts/fastaq_to_perfect_reads -+++ b/scripts/fastaq_to_perfect_reads -@@ -1,10 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --import random --from math import floor, ceil --from fastaq import sequences, utils --import sys - - parser = argparse.ArgumentParser( - description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.', -@@ -20,6 +16,12 @@ - parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT') - options = parser.parse_args() - -+ -+import random -+from math import floor, ceil -+from fastaq import sequences, utils -+import sys -+ - random.seed(a=options.seed) - - seq_reader = sequences.file_reader(options.infile) ---- a/scripts/fastaq_to_quasr_primers_file -+++ b/scripts/fastaq_to_quasr_primers_file -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.to_quasr_primers(options.infile, options.outfile) ---- a/scripts/fastaq_to_random_subset -+++ b/scripts/fastaq_to_random_subset -@@ -1,9 +1,6 @@ - #!/usr/bin/env python3 - --import sys - import argparse --import random --from fastaq import sequences, utils - - parser = argparse.ArgumentParser( - description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' + -@@ -15,6 +12,11 @@ - parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT') - options = parser.parse_args() - -+ -+import sys -+import random -+from fastaq import sequences, utils -+ - seq_reader = sequences.file_reader(options.infile) - fout = utils.open_file_write(options.outfile) - ---- a/scripts/fastaq_to_tiling_bam -+++ b/scripts/fastaq_to_tiling_bam -@@ -1,9 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --import sys --import os --from fastaq import sequences, utils - - parser = argparse.ArgumentParser( - description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome', -@@ -17,6 +14,11 @@ - parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42') - options = parser.parse_args() - -+ -+import sys -+import os -+from fastaq import sequences, utils -+ - # make a header first - we need to add the @RG line to the default header made by samtools - tmp_empty_file = options.outfile + '.tmp.empty' - f = utils.open_file_write(tmp_empty_file) ---- a/scripts/fastaq_to_unique_by_id -+++ b/scripts/fastaq_to_unique_by_id -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.to_unique_by_id(options.infile, options.outfile) ---- a/scripts/fastaq_translate -+++ b/scripts/fastaq_translate -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format', -@@ -10,4 +9,8 @@ - parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q') - parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.translate(options.infile, options.outfile, frame=options.frame) ---- a/scripts/fastaq_trim_Ns_at_end -+++ b/scripts/fastaq_trim_Ns_at_end -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends', -@@ -9,4 +8,8 @@ - parser.add_argument('infile', help='Name of input fasta/q file') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.trim_Ns_at_end(options.infile, options.outfile) ---- a/scripts/fastaq_trim_ends -+++ b/scripts/fastaq_trim_ends -@@ -1,7 +1,6 @@ - #!/usr/bin/env python3 - - import argparse --from fastaq import tasks - - parser = argparse.ArgumentParser( - description = 'Trims set number of bases off each sequence in a fasta/q file', -@@ -11,4 +10,8 @@ - parser.add_argument('end_trim', type=int, help='Number of bases to trim off end') - parser.add_argument('outfile', help='Name of output fasta/q file') - options = parser.parse_args() -+ -+ -+from fastaq import tasks -+ - tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim) diff --git a/debian/patches/series b/debian/patches/series deleted file mode 100644 index dfa3826..0000000 --- a/debian/patches/series +++ /dev/null @@ -1 +0,0 @@ -delay-import-statements-for-manpage-creation.patch diff --git a/debian/rules b/debian/rules deleted file mode 100755 index 58f2a1b..0000000 --- a/debian/rules +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/make -f - -export DH_VERBOSE := 1 -export PYBUILD_NAME=fastaq - -mandir := $(CURDIR)/debian/man -debfolder := $(CURDIR)/debian - -%: - dh $@ --with python3 --buildsystem=pybuild - -override_dh_auto_build: - dh_python3 - dh_auto_build - mkdir $(CURDIR)/doc - cd $(CURDIR)/doc - -override_dh_auto_clean: - rm -rf build .pybuild - rm -rf $(mandir) - -override_dh_installman: - mkdir -p $(mandir) - $(debfolder)/usage_to_man - dh_installman -- \ No newline at end of file diff --git a/debian/source/format b/debian/source/format deleted file mode 100644 index 46ebe02..0000000 --- a/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (quilt) \ No newline at end of file diff --git a/debian/upstream/metadata b/debian/upstream/metadata deleted file mode 100644 index d8b5812..0000000 --- a/debian/upstream/metadata +++ /dev/null @@ -1,12 +0,0 @@ -Reference: - Author: - Title: - Journal: - Year: - Volume: - Number: - Pages: - DOI: - PMID: - URL: - eprint: diff --git a/debian/usage_to_man b/debian/usage_to_man deleted file mode 100755 index 32f28b8..0000000 --- a/debian/usage_to_man +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -#Converts Fastaq python scripts usage into man pages. -#The man pages are placed in the man folder of the main Fastaq directory - -createManPages(); - -sub createManPages { - - my $source= 'scripts'; - my $destination= 'debian/man'; - my $app_name = 'Fastaq'; - - - unless ( -d $destination ) { - system(mkdir $destination); - } - - my @files; - - push(@files,`ls $source/fastaq_*`); - - if ( scalar @files > 0 ) { - - print "Creating manpages\n"; - for my $file ( @files ) { - $file =~ s/\n$//; - - my $filename = $file; - $filename =~ s/$source\///; - - my $uc_filename = uc($filename); - my $man_file = $filename; - - $man_file = $destination . '/' . $man_file . '.1'; - - open (my $man_fh, ">", $man_file); - - my $grep_string = $filename . ': error: too few arguments'; - - my $cmd = "help2man -m $filename -n $filename --no-discard-stderr $file | sed 's/usage://gi'"; - my @output; - push(@output, `$cmd`); - - for my $line (@output) { - $line =~ s/\n$//; - - } - - for (my $i = 0; $i < scalar @output; $i++) { - my $output_line = $output[$i]; - - if ($output_line =~ m/^\.TH/) { - $output_line =~ s/\s+/ /g; - $output_line =~ s/(\.TH) ("\d+") ("[a-zA-Z0-9_ ]*") ("[a-zA-Z0-9_<>\[\]\/\.\(\), ]*") ("[a-zA-Z0-9_]*")/$1 $uc_filename $2 $3 "$app_name" "Fastaq executables"/; - } - - $output_line =~ s/ \\- $filename/$filename/; - - if ( $output_line =~ m/^.PP/ && $output[$i + 1] =~ m/^$filename\:/ ) { - $output_line = $output[$i + 1] = ''; - } - - if ($output_line =~ m/^\.SH "SEE ALSO"/) { - last; - } - print $man_fh "$output_line\n"; - } - - writeAuthorAndCopyright($man_fh,$filename); - close($man_fh); - } - print "Manpage creation complete\n"; - } -} - -sub writeAuthorAndCopyright { - - my ($man_fh,$filename) = @_; - - my $author_blurb = <<END_OF_AUTHOR_BLURB; -.SH "AUTHOR" -.sp -$filename was originally written by Martin Hunt (mh12\@sanger.ac.uk) -END_OF_AUTHOR_BLURB - - print $man_fh "$author_blurb\n"; - - my $copyright_blurb = <<'END_OF_C_BLURB'; -.SH "COPYING" -.sp -Wellcome Trust Sanger Institute Copyright \(co 2013 Wellcome Trust Sanger Institute This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version\&. -END_OF_C_BLURB - - print $man_fh "$copyright_blurb\n"; - -} diff --git a/debian/watch b/debian/watch deleted file mode 100644 index 46c1516..0000000 --- a/debian/watch +++ /dev/null @@ -1,3 +0,0 @@ -version=3 -https://github.com/sanger-pathogens/fastaq/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz) - diff --git a/fastaq/tasks.py b/fastaq/tasks.py index 068a640..1a7d378 100644 --- a/fastaq/tasks.py +++ b/fastaq/tasks.py @@ -467,10 +467,16 @@ def search_for_seq(infile, outfile, search_string): utils.close(fout) -def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50): - trim_seqs = {} - file_to_dict(to_trim_file, trim_seqs) - trim_seqs = [x.seq for x in trim_seqs.values()] +def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50, check_revcomp=False): + to_trim_seqs = {} + file_to_dict(to_trim_file, to_trim_seqs) + trim_seqs = [x.seq for x in to_trim_seqs.values()] + if check_revcomp: + for seq in to_trim_seqs.values(): + seq.revcomp() + trim_seqs_revcomp = [x.seq for x in to_trim_seqs.values()] + else: + trim_seqs_revcomp = [] seq_reader_1 = sequences.file_reader(infile_1) seq_reader_2 = sequences.file_reader(infile_2) @@ -490,6 +496,11 @@ def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_le seq.trim(len(trim_seq),0) break + for trim_seq in trim_seqs_revcomp: + if seq.seq.endswith(trim_seq): + seq.trim(0,len(trim_seq)) + break + if len(seq_1) >= min_length and len(seq_2) >= min_length: print(seq_1, file=f_out_1) print(seq_2, file=f_out_2) @@ -679,18 +690,6 @@ def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False sequences.Fasta.line_length = original_line_length -def to_quasr_primers(infile, outfile): - seq_reader = sequences.file_reader(infile) - f_out = utils.open_file_write(outfile) - - for seq in seq_reader: - seq2 = copy.copy(seq) - seq2.revcomp() - print(seq.seq, seq2.seq, sep='\t', file=f_out) - - utils.close(f_out) - - def to_fasta_union(infile, outfile, seqname='union'): seq_reader = sequences.file_reader(infile) new_seq = [] diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected deleted file mode 100644 index 88ce837..0000000 --- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected +++ /dev/null @@ -1,2 +0,0 @@ -ACGT ACGT -AG CT diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa deleted file mode 100644 index be7c130..0000000 --- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa +++ /dev/null @@ -1,4 +0,0 @@ ->1 -ACGT ->2 -AG diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.fa index 28f665b..ac2ff83 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_1.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_1.fa @@ -1,12 +1,24 @@ >1/1 -TRIM1GCTCGAGCT +1234567890 >2/1 -TRIM1AGCTAGCTAG +AACG123456789 >3/1 -CGCTAGCTAG +1234567890 >4/1 -TRIM2AGCTAGCTAG +AACG1234567890 >5/1 -AGCTAGCTAG +1234567890 >6/1 -TRIM4AGCTAGCTAG +AACG1234567890 +>7/1 +123456789AGGC +>8/1 +123456789 +>9/1 +1234567890AGGC +>10/1 +AACG123456789CGTT +>11/1 +AACG1234567890CGTT +>12/1 +AACG1234567890CGTT diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa index 0bebad8..0512244 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa @@ -1,8 +1,12 @@ ->3/1 -CGCTAGCTAG +>1/1 +1234567890 >4/1 -AGCTAGCTAG +1234567890 >5/1 -AGCTAGCTAG +1234567890 >6/1 -AGCTAGCTAG +1234567890 +>9/1 +1234567890 +>12/1 +1234567890 diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.fa index 7514250..cf3e872 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_2.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_2.fa @@ -1,12 +1,24 @@ >1/2 -TRIM1ACGTACGTAC +1234567890 >2/2 -TRIM2ACGTAGTGA +1234567890 >3/2 -ACGCTGCAGTCAGTCAGTAT +AACG123456789 >4/2 -TRIM3CGATCGATCG +1234567890 >5/2 -TRIM3CGATCGATCG +AACG1234567890 >6/2 -CGATCGATCG +GCCT1234567890 +>7/2 +1234567890 +>8/2 +123456789AGGC +>9/2 +1234567890CGTT +>10/2 +AACG1234567890CGTT +>11/2 +AACG123456789CGTT +>12/2 +AACG1234567890CGTT diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa index ec80f40..432f60a 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa @@ -1,8 +1,12 @@ ->3/2 -ACGCTGCAGTCAGTCAGTAT +>1/2 +1234567890 >4/2 -CGATCGATCG +1234567890 >5/2 -CGATCGATCG +1234567890 >6/2 -CGATCGATCG +1234567890 +>9/2 +1234567890 +>12/2 +1234567890 diff --git a/fastaq/tests/data/tasks_test_sequences_to_trim.fa b/fastaq/tests/data/tasks_test_sequences_to_trim.fa index 395eaaa..cd2aa28 100644 --- a/fastaq/tests/data/tasks_test_sequences_to_trim.fa +++ b/fastaq/tests/data/tasks_test_sequences_to_trim.fa @@ -1,8 +1,4 @@ >1 -TRIM1 +AACG >2 -TRIM2 ->3 -TRIM3 ->4 -TRIM4 +GCCT diff --git a/fastaq/tests/tasks_test.py b/fastaq/tests/tasks_test.py index 36ebfba..7528815 100644 --- a/fastaq/tests/tasks_test.py +++ b/fastaq/tests/tasks_test.py @@ -291,7 +291,7 @@ class TestSequenceTrim(unittest.TestCase): to_trim = os.path.join(data_dir, 'tasks_test_sequences_to_trim.fa') expected1 = os.path.join(data_dir, 'tasks_test_sequence_trim_1.trimmed.fa') expected2 = os.path.join(data_dir, 'tasks_test_sequence_trim_2.trimmed.fa') - tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10) + tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10, check_revcomp=True) self.assertTrue(filecmp.cmp(expected1, tmp1)) self.assertTrue(filecmp.cmp(expected2, tmp2)) os.unlink(tmp1) @@ -478,15 +478,6 @@ class TestStripIlluminaSuffix(unittest.TestCase): os.unlink(tmpfile) -class TestToQuasrPrimers(unittest.TestCase): - def test_to_quasr_primers(self): - '''Check that fasta file gets converted to QUASR sequence file''' - tmpfile = 'tmp.primers' - tasks.to_quasr_primers(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.fa'), tmpfile) - self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.expected'), tmpfile)) - os.unlink(tmpfile) - - class TestToFasta(unittest.TestCase): def test_to_fasta(self): '''Test to_fasta''' diff --git a/scripts/fastaq_sequence_trim b/scripts/fastaq_sequence_trim index 50a4f34..7021c6c 100755 --- a/scripts/fastaq_sequence_trim +++ b/scripts/fastaq_sequence_trim @@ -7,6 +7,7 @@ parser = argparse.ArgumentParser( description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming', usage = '%(prog)s [options] <fasta/q 1 in> <fastaq/2 in> <out 1> <out 2> <trim_seqs>') parser.add_argument('--min_length', type=int, help='Minimum length of output sequences [%(default)s]', default=50, metavar='INT') +parser.add_argument('--revcomp', action='store_true', help='Trim the end of each sequence if it matches the reverse complement. This option is intended for PCR primer trimming') parser.add_argument('infile_1', help='Name of forward fasta/q file to be trimmed', metavar='fasta/q 1 in') parser.add_argument('infile_2', help='Name of reverse fasta/q file to be trimmed', metavar='fasta/q 2 in') parser.add_argument('outfile_1', help='Name of output forward fasta/q file', metavar='out_1') @@ -19,5 +20,6 @@ tasks.sequence_trim( options.outfile_1, options.outfile_2, options.trim_seqs, - min_length=options.min_length + min_length=options.min_length, + check_revcomp=options.revcomp ) diff --git a/scripts/fastaq_to_quasr_primers_file b/scripts/fastaq_to_quasr_primers_file deleted file mode 100755 index 8e5bf7c..0000000 --- a/scripts/fastaq_to_quasr_primers_file +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -from fastaq import tasks - -parser = argparse.ArgumentParser( - description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated', - usage = '%(prog)s <fasta/q in> <outfile>') -parser.add_argument('infile', help='Name of input fasta/q file') -parser.add_argument('outfile', help='Name of output file') -options = parser.parse_args() -tasks.to_quasr_primers(options.infile, options.outfile) diff --git a/setup.py b/setup.py index 3064862..5506ba9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ def read(fname): setup( name='Fastaq', - version='1.5.0', + version='1.6.0', description='Scripts to manipulate FASTA and FASTQ files, plus API for developers', long_description=read('README.md'), packages = find_packages(), -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq_tmp.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
