This is an automated email from the git hooks/post-receive script. satta pushed a commit to branch master in repository ariba.
commit 713f261d25cc84c5020ad5ed7ff100bfb8d18225 Author: Sascha Steinbiss <[email protected]> Date: Mon Dec 5 09:58:15 2016 +0000 New upstream version 2.5.1+ds --- ariba/ref_seq_chooser.py | 19 ++++++++++++------- ariba/tests/cluster_test.py | 4 ++-- .../data/ref_seq_chooser_test_flanking.all_refs.fa | 20 ++++++++++++++++++++ .../ref_seq_chooser_test_flanking.cluster_refs.fa | 10 ++++++++++ .../data/ref_seq_chooser_test_flanking.contigs.fa | 22 ++++++++++++++++++++++ ...f_seq_chooser_test_flanking.expected_contigs.fa | 11 +++++++++++ ariba/tests/ref_seq_chooser_test.py | 14 ++++++++++++++ setup.py | 2 +- 8 files changed, 92 insertions(+), 10 deletions(-) diff --git a/ariba/ref_seq_chooser.py b/ariba/ref_seq_chooser.py index 3565c9b..5c9eb69 100644 --- a/ariba/ref_seq_chooser.py +++ b/ariba/ref_seq_chooser.py @@ -93,7 +93,7 @@ class RefSeqChooser: @classmethod - def _best_of_two_hits(cls, hit1, hit2, use_qry_length=False): + def _best_of_two_hits(cls, hit1, hit2, use_qry_length=False, check_flanking=False): if use_qry_length: qry_length_percent1 = hit1.hit_length_qry / hit1.qry_length qry_length_percent2 = hit2.hit_length_qry / hit2.qry_length @@ -107,6 +107,11 @@ class RefSeqChooser: elif hit1.percent_identity != hit2.percent_identity: return hit1 if hit1.percent_identity > hit2.percent_identity else hit2 else: + if check_flanking: + flank1 = min(min(hit1.qry_start, hit1.qry_end), hit1.qry_length - 1 - max(hit1.qry_start, hit1.qry_end)) + flank2 = min(min(hit2.qry_start, hit2.qry_end), hit2.qry_length - 1 - max(hit2.qry_start, hit2.qry_end)) + if flank1 != flank2: + return hit1 if flank1 > flank2 else hit2 l1, c1 = RefSeqChooser._l_and_c_from_contig_name(hit1.qry_name) l2, c2 = RefSeqChooser._l_and_c_from_contig_name(hit2.qry_name) if l1 != l2: @@ -116,20 +121,20 @@ class RefSeqChooser: @classmethod - def _choose_best_nucmer_match(cls, matches, use_qry_length=False): + def _choose_best_nucmer_match(cls, matches, use_qry_length=False, check_flanking=False): best_match = None for ref_name in matches: for hit in matches[ref_name]: if best_match is None: best_match = hit else: - best_match = RefSeqChooser._best_of_two_hits(best_match, hit, use_qry_length=use_qry_length) + best_match = RefSeqChooser._best_of_two_hits(best_match, hit, use_qry_length=use_qry_length, check_flanking=check_flanking) return best_match @classmethod - def _closest_nucmer_match_between_fastas(cls, ref_fasta, qry_fasta, log_fh, min_id, min_length, breaklen, use_qry_length): + def _closest_nucmer_match_between_fastas(cls, ref_fasta, qry_fasta, log_fh, min_id, min_length, breaklen, use_qry_length, check_flanking): tmpdir = tempfile.mkdtemp(prefix='tmp.closest_nucmer_match.', dir=os.getcwd()) coords_file = os.path.join(tmpdir, 'nucmer_vs_cluster_refs.coords') pymummer.nucmer.Runner( @@ -147,13 +152,13 @@ class RefSeqChooser: if len(nucmer_matches) == 0: return None, {} else: - best_hit = RefSeqChooser._choose_best_nucmer_match(nucmer_matches, use_qry_length=use_qry_length) + best_hit = RefSeqChooser._choose_best_nucmer_match(nucmer_matches, use_qry_length=use_qry_length, check_flanking=check_flanking) return best_hit, nucmer_matches def run(self): print('Looking for closest match from sequences within cluster', file=self.log_fh) - best_hit_from_cluster, nucmer_matches = RefSeqChooser._closest_nucmer_match_between_fastas(self.cluster_fasta, self.assembly_fasta_in, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, False) + best_hit_from_cluster, nucmer_matches = RefSeqChooser._closest_nucmer_match_between_fastas(self.cluster_fasta, self.assembly_fasta_in, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, False, True) if best_hit_from_cluster is None: return @@ -166,7 +171,7 @@ class RefSeqChooser: RefSeqChooser._make_matching_contig_pieces_fasta(self.assembly_fasta_in, pieces_coords, pieces_fasta_file) print('Checking for a better match to a ref sequence outside the cluster', file=self.log_fh) - best_hit_from_all_seqs, not_needed = RefSeqChooser._closest_nucmer_match_between_fastas(self.all_refs_fasta, pieces_fasta_file, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, True) + best_hit_from_all_seqs, not_needed = RefSeqChooser._closest_nucmer_match_between_fastas(self.all_refs_fasta, pieces_fasta_file, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, True, False) shutil.rmtree(tmpdir) self.closest_ref_from_all_refs = best_hit_from_all_seqs.ref_name if self.closest_ref_from_all_refs is None: diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py index 3e18ee4..754c7a5 100644 --- a/ariba/tests/cluster_test.py +++ b/ariba/tests/cluster_test.py @@ -277,7 +277,7 @@ class TestCluster(unittest.TestCase): c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080) c.run() expected = [ - 'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene' + 'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l6.c30.ctg.1\t362\t27.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene' ] self.assertEqual(expected, c.report_lines) shutil.rmtree(tmpdir) @@ -490,7 +490,7 @@ class TestCluster(unittest.TestCase): c.run() expected = [ - 'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.l15.c30.ctg.1\t807\t22.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1' + 'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.l15.c17.ctg.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1' ] self.assertEqual(expected, c.report_lines) shutil.rmtree(tmpdir) diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa new file mode 100644 index 0000000..7a29467 --- /dev/null +++ b/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa @@ -0,0 +1,20 @@ +>ref1 +GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG +AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG +ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA +TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG +AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC +ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA +ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA +TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC +CAGATCTCAATCTGCAGCTA +>ref2 +CACGCGTCGTGGCCAACCACGCGTTCGTTGGCAGATGCCTTTACGATCACTACCCAAAAT +AAAGAGCAGTGTGTGTATGTTACCTAACTACGTAGTAAGCGCTAGAGTAGGCAGTGGCCT +AAGTGACACCTGTTCCGTGTTGCCCTGGCAGCAGCACACCGCATTCTAAGGACCGTCGCG +TCGTATTCTTCCAGCTAAATCACCCTAAGTGCTATAATTTGGAGGAGTGAAGAGTTTGAT +GCCAAGCTGACGTCAGGCGGGGATTGCCATTGATCTTGGCTCTCAGCCAGAGAAAGTACA +TAACAGGAAAATTCAGCCCTTGGGTCTGTGCTCAACGATGGTTTGGAGACTCCTAGAATA +ATAGCACCTCAGGGACCTTTTCCTAGGAACTGTCCACGGTCGCCACGACTGGAGCTGAAA +TTTAGTACACAGAGCACCGCCTGTAGATTGCTCCTCGGTCCGGCTGTCTATAGACCGTCA +CAGAATTCTAGAGCAACCGT diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa new file mode 100644 index 0000000..1e81f7c --- /dev/null +++ b/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa @@ -0,0 +1,10 @@ +>ref1 +GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG +AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG +ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA +TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG +AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC +ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA +ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA +TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC +CAGATCTCAATCTGCAGCTA diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa new file mode 100644 index 0000000..814f1cd --- /dev/null +++ b/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa @@ -0,0 +1,22 @@ +>cluster.l15.c17.ctg.1 +ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC +GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG +AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG +ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA +TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG +AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC +ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA +ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA +TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC +CAGATCTCAATCTGTACCTA +>cluster.l6.c4.ctg.1 +ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC +GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG +AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG +ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA +TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG +AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC +ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA +ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA +TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC +CAGATCTCAATCTGTACCTACTGACGTATCATCTGCGTACTGCGTCGTATGCATGAAAAC diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa new file mode 100644 index 0000000..36413a4 --- /dev/null +++ b/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa @@ -0,0 +1,11 @@ +>cluster.l6.c4.ctg.1 +ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC +GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG +AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG +ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA +TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG +AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC +ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA +ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA +TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC +CAGATCTCAATCTGTACCTACTGACGTATCATCTGCGTACTGCGTCGTATGCATGAAAAC diff --git a/ariba/tests/ref_seq_chooser_test.py b/ariba/tests/ref_seq_chooser_test.py index 9cb39b3..00e6fbe 100644 --- a/ariba/tests/ref_seq_chooser_test.py +++ b/ariba/tests/ref_seq_chooser_test.py @@ -96,3 +96,17 @@ class TestRefSeqChooser(unittest.TestCase): self.assertTrue(os.path.exists(tmp_out)) os.unlink(tmp_out) + + def test_run_flanking_different(self): + '''Test full run where amount of flanking seq varies''' + all_ref_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.all_refs.fa') + cluster_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.cluster_refs.fa') + contig_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.contigs.fa') + expected_fa = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.expected_contigs.fa') + tmp_out = 'tmp.ref_seq_chooser_test_flanking.fa' + refchooser = ref_seq_chooser.RefSeqChooser(cluster_fasta, all_ref_fasta, contig_fasta, tmp_out, sys.stdout) + refchooser.run() + self.assertEqual('ref1', refchooser.closest_ref_from_all_refs) + self.assertTrue(refchooser.closest_ref_is_in_cluster) + self.assertTrue(filecmp.cmp(expected_fa, tmp_out, shallow=False)) + os.unlink(tmp_out) diff --git a/setup.py b/setup.py index 25c8331..a3e6fa1 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ vcfcall_mod = Extension( setup( ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod], name='ariba', - version='2.5.0', + version='2.5.1', description='ARIBA: Antibiotic Resistance Identification By Assembly', packages = find_packages(), package_data={'ariba': ['test_run_data/*']}, -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ariba.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
