This is an automated email from the git hooks/post-receive script. jssoares-guest pushed a commit to branch master in repository fastaq.
commit 35b6b76c8a48c9e8702763abee5fef92c9dbc718 Author: Jorge Soares <[email protected]> Date: Tue Nov 18 16:16:56 2014 +0000 new upstream --- fastaq/tasks.py | 31 +++++++++++----------- ...sequences_test_fastaq_to_quasr_primers.expected | 2 -- .../data/sequences_test_fastaq_to_quasr_primers.fa | 4 --- fastaq/tests/data/tasks_test_sequence_trim_1.fa | 24 ++++++++++++----- .../data/tasks_test_sequence_trim_1.trimmed.fa | 14 ++++++---- fastaq/tests/data/tasks_test_sequence_trim_2.fa | 24 ++++++++++++----- .../data/tasks_test_sequence_trim_2.trimmed.fa | 14 ++++++---- fastaq/tests/data/tasks_test_sequences_to_trim.fa | 8 ++---- fastaq/tests/tasks_test.py | 11 +------- scripts/fastaq_sequence_trim | 4 ++- scripts/fastaq_to_quasr_primers_file | 12 --------- setup.py | 2 +- 12 files changed, 76 insertions(+), 74 deletions(-) diff --git a/fastaq/tasks.py b/fastaq/tasks.py index 068a640..1a7d378 100644 --- a/fastaq/tasks.py +++ b/fastaq/tasks.py @@ -467,10 +467,16 @@ def search_for_seq(infile, outfile, search_string): utils.close(fout) -def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50): - trim_seqs = {} - file_to_dict(to_trim_file, trim_seqs) - trim_seqs = [x.seq for x in trim_seqs.values()] +def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50, check_revcomp=False): + to_trim_seqs = {} + file_to_dict(to_trim_file, to_trim_seqs) + trim_seqs = [x.seq for x in to_trim_seqs.values()] + if check_revcomp: + for seq in to_trim_seqs.values(): + seq.revcomp() + trim_seqs_revcomp = [x.seq for x in to_trim_seqs.values()] + else: + trim_seqs_revcomp = [] seq_reader_1 = sequences.file_reader(infile_1) seq_reader_2 = sequences.file_reader(infile_2) @@ -490,6 +496,11 @@ def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_le seq.trim(len(trim_seq),0) break + for trim_seq in trim_seqs_revcomp: + if seq.seq.endswith(trim_seq): + seq.trim(0,len(trim_seq)) + break + if len(seq_1) >= min_length and len(seq_2) >= min_length: print(seq_1, file=f_out_1) print(seq_2, file=f_out_2) @@ -679,18 +690,6 @@ def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False sequences.Fasta.line_length = original_line_length -def to_quasr_primers(infile, outfile): - seq_reader = sequences.file_reader(infile) - f_out = utils.open_file_write(outfile) - - for seq in seq_reader: - seq2 = copy.copy(seq) - seq2.revcomp() - print(seq.seq, seq2.seq, sep='\t', file=f_out) - - utils.close(f_out) - - def to_fasta_union(infile, outfile, seqname='union'): seq_reader = sequences.file_reader(infile) new_seq = [] diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected deleted file mode 100644 index 88ce837..0000000 --- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected +++ /dev/null @@ -1,2 +0,0 @@ -ACGT ACGT -AG CT diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa deleted file mode 100644 index be7c130..0000000 --- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa +++ /dev/null @@ -1,4 +0,0 @@ ->1 -ACGT ->2 -AG diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.fa index 28f665b..ac2ff83 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_1.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_1.fa @@ -1,12 +1,24 @@ >1/1 -TRIM1GCTCGAGCT +1234567890 >2/1 -TRIM1AGCTAGCTAG +AACG123456789 >3/1 -CGCTAGCTAG +1234567890 >4/1 -TRIM2AGCTAGCTAG +AACG1234567890 >5/1 -AGCTAGCTAG +1234567890 >6/1 -TRIM4AGCTAGCTAG +AACG1234567890 +>7/1 +123456789AGGC +>8/1 +123456789 +>9/1 +1234567890AGGC +>10/1 +AACG123456789CGTT +>11/1 +AACG1234567890CGTT +>12/1 +AACG1234567890CGTT diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa index 0bebad8..0512244 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa @@ -1,8 +1,12 @@ ->3/1 -CGCTAGCTAG +>1/1 +1234567890 >4/1 -AGCTAGCTAG +1234567890 >5/1 -AGCTAGCTAG +1234567890 >6/1 -AGCTAGCTAG +1234567890 +>9/1 +1234567890 +>12/1 +1234567890 diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.fa index 7514250..cf3e872 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_2.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_2.fa @@ -1,12 +1,24 @@ >1/2 -TRIM1ACGTACGTAC +1234567890 >2/2 -TRIM2ACGTAGTGA +1234567890 >3/2 -ACGCTGCAGTCAGTCAGTAT +AACG123456789 >4/2 -TRIM3CGATCGATCG +1234567890 >5/2 -TRIM3CGATCGATCG +AACG1234567890 >6/2 -CGATCGATCG +GCCT1234567890 +>7/2 +1234567890 +>8/2 +123456789AGGC +>9/2 +1234567890CGTT +>10/2 +AACG1234567890CGTT +>11/2 +AACG123456789CGTT +>12/2 +AACG1234567890CGTT diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa index ec80f40..432f60a 100644 --- a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa +++ b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa @@ -1,8 +1,12 @@ ->3/2 -ACGCTGCAGTCAGTCAGTAT +>1/2 +1234567890 >4/2 -CGATCGATCG +1234567890 >5/2 -CGATCGATCG +1234567890 >6/2 -CGATCGATCG +1234567890 +>9/2 +1234567890 +>12/2 +1234567890 diff --git a/fastaq/tests/data/tasks_test_sequences_to_trim.fa b/fastaq/tests/data/tasks_test_sequences_to_trim.fa index 395eaaa..cd2aa28 100644 --- a/fastaq/tests/data/tasks_test_sequences_to_trim.fa +++ b/fastaq/tests/data/tasks_test_sequences_to_trim.fa @@ -1,8 +1,4 @@ >1 -TRIM1 +AACG >2 -TRIM2 ->3 -TRIM3 ->4 -TRIM4 +GCCT diff --git a/fastaq/tests/tasks_test.py b/fastaq/tests/tasks_test.py index 36ebfba..7528815 100644 --- a/fastaq/tests/tasks_test.py +++ b/fastaq/tests/tasks_test.py @@ -291,7 +291,7 @@ class TestSequenceTrim(unittest.TestCase): to_trim = os.path.join(data_dir, 'tasks_test_sequences_to_trim.fa') expected1 = os.path.join(data_dir, 'tasks_test_sequence_trim_1.trimmed.fa') expected2 = os.path.join(data_dir, 'tasks_test_sequence_trim_2.trimmed.fa') - tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10) + tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10, check_revcomp=True) self.assertTrue(filecmp.cmp(expected1, tmp1)) self.assertTrue(filecmp.cmp(expected2, tmp2)) os.unlink(tmp1) @@ -478,15 +478,6 @@ class TestStripIlluminaSuffix(unittest.TestCase): os.unlink(tmpfile) -class TestToQuasrPrimers(unittest.TestCase): - def test_to_quasr_primers(self): - '''Check that fasta file gets converted to QUASR sequence file''' - tmpfile = 'tmp.primers' - tasks.to_quasr_primers(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.fa'), tmpfile) - self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.expected'), tmpfile)) - os.unlink(tmpfile) - - class TestToFasta(unittest.TestCase): def test_to_fasta(self): '''Test to_fasta''' diff --git a/scripts/fastaq_sequence_trim b/scripts/fastaq_sequence_trim index 50a4f34..7021c6c 100755 --- a/scripts/fastaq_sequence_trim +++ b/scripts/fastaq_sequence_trim @@ -7,6 +7,7 @@ parser = argparse.ArgumentParser( description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming', usage = '%(prog)s [options] <fasta/q 1 in> <fastaq/2 in> <out 1> <out 2> <trim_seqs>') parser.add_argument('--min_length', type=int, help='Minimum length of output sequences [%(default)s]', default=50, metavar='INT') +parser.add_argument('--revcomp', action='store_true', help='Trim the end of each sequence if it matches the reverse complement. This option is intended for PCR primer trimming') parser.add_argument('infile_1', help='Name of forward fasta/q file to be trimmed', metavar='fasta/q 1 in') parser.add_argument('infile_2', help='Name of reverse fasta/q file to be trimmed', metavar='fasta/q 2 in') parser.add_argument('outfile_1', help='Name of output forward fasta/q file', metavar='out_1') @@ -19,5 +20,6 @@ tasks.sequence_trim( options.outfile_1, options.outfile_2, options.trim_seqs, - min_length=options.min_length + min_length=options.min_length, + check_revcomp=options.revcomp ) diff --git a/scripts/fastaq_to_quasr_primers_file b/scripts/fastaq_to_quasr_primers_file deleted file mode 100755 index 8e5bf7c..0000000 --- a/scripts/fastaq_to_quasr_primers_file +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -from fastaq import tasks - -parser = argparse.ArgumentParser( - description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated', - usage = '%(prog)s <fasta/q in> <outfile>') -parser.add_argument('infile', help='Name of input fasta/q file') -parser.add_argument('outfile', help='Name of output file') -options = parser.parse_args() -tasks.to_quasr_primers(options.infile, options.outfile) diff --git a/setup.py b/setup.py index 3064862..5506ba9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ def read(fname): setup( name='Fastaq', - version='1.5.0', + version='1.6.0', description='Scripts to manipulate FASTA and FASTQ files, plus API for developers', long_description=read('README.md'), packages = find_packages(), -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
