This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository last-align.
commit efc61168e70f34c6119d92b9cf49414286aeeee5 Author: Andreas Tille <ti...@debian.org> Date: Tue Oct 18 13:42:51 2016 +0200 New upstream version 759 --- ChangeLog.txt | 25 +++++++++++++++- doc/lastal.html | 3 +- doc/lastal.txt | 3 +- doc/lastdb.html | 8 ++--- doc/lastdb.txt | 8 ++--- scripts/last-dotplot | 2 +- src/LastalArguments.cc | 3 -- src/gaplessPssmXdrop.cc | 39 ++++++++++++++++++++++++ src/gaplessPssmXdrop.hh | 7 +++++ src/gaplessTwoQualityXdrop.cc | 46 +++++++++++++++++++++++++++++ src/gaplessTwoQualityXdrop.hh | 11 +++++++ src/gaplessXdrop.cc | 40 +++++++++++++++++++++++++ src/gaplessXdrop.hh | 15 ++++++++++ src/lastal.cc | 69 ++++++++++++++++++++++++++++--------------- src/version.hh | 2 +- 15 files changed, 241 insertions(+), 40 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index d49aba7..00416a1 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,3 +1,26 @@ +2016-09-29 Martin C. Frith <Martin C. Frith> + + * test/last-test.out, test/last-test.sh: + Added (incomplete) tests for gapless overlap alignment. + [19b58d988059] [tip] + + * src/LastalArguments.cc, src/gaplessPssmXdrop.cc, + src/gaplessPssmXdrop.hh, src/gaplessTwoQualityXdrop.cc, + src/gaplessTwoQualityXdrop.hh, src/gaplessXdrop.cc, + src/gaplessXdrop.hh, src/lastal.cc: + Enabled gapless overlap alignment. + [8f2bf0e299a1] + + * src/lastal.cc: + Refactoring. + [64ac7ea995de] + +2016-09-16 Martin C. Frith <Martin C. Frith> + + * doc/lastal.txt, doc/lastdb.txt, scripts/last-dotplot: + Fixed last-dotplot axis labels for breaking change in Pillow 3.0. + [a0074597cb5d] + 2016-09-01 Martin C. Frith <Martin C. Frith> * doc/last-map-probs.txt, doc/last-pair-probs.txt, doc/last- @@ -5,7 +28,7 @@ /last-bisulfite-paired.sh, examples/last-bisulfite.sh, test/bs100.maf, test/maf-convert-test.out: Replaced score thresholds with significance thresholds. - [d48e9d4a7268] [tip] + [d48e9d4a7268] * doc/last-split.txt, src/split/last-split-main.cc, src/split/last- split.cc, test/last-split-test.out: diff --git a/doc/lastal.html b/doc/lastal.html index 0dea52a..d58d90a 100644 --- a/doc/lastal.html +++ b/doc/lastal.html @@ -567,7 +567,8 @@ letters spanned by the match.</td></tr> <tr><td class="option-group"> <kbd><span class="option">-k <var>STEP</var></span></kbd></td> <td>Look for initial matches starting only at every STEP-th position -in the query. This makes lastal faster but less sensitive.</td></tr> +in each query (positions 0, STEP, 2×STEP, etc). This makes +lastal faster but less sensitive.</td></tr> <tr><td class="option-group"> <kbd><span class="option">-W <var>SIZE</var></span></kbd></td> <td>Look for initial matches starting only at query positions that diff --git a/doc/lastal.txt b/doc/lastal.txt index a8f6b32..ce62516 100644 --- a/doc/lastal.txt +++ b/doc/lastal.txt @@ -224,7 +224,8 @@ Initial-match options -k STEP Look for initial matches starting only at every STEP-th position - in the query. This makes lastal faster but less sensitive. + in each query (positions 0, STEP, 2×STEP, etc). This makes + lastal faster but less sensitive. -W SIZE Look for initial matches starting only at query positions that diff --git a/doc/lastdb.html b/doc/lastdb.html index c5dfc0b..7fc9a57 100644 --- a/doc/lastdb.html +++ b/doc/lastdb.html @@ -414,10 +414,10 @@ lastal command line.</p> <tr><td class="option-group"> <kbd><span class="option">-w <var>STEP</var></span></kbd></td> <td>Allow initial matches to start only at every STEP-th position in -each of the sequences given to lastdb. This reduces the memory -usage of lastdb and lastal, and it makes lastdb faster. Its -effect on the speed and sensitivity of lastal is not entirely -clear.</td></tr> +each of the sequences given to lastdb (positions 0, STEP, +2×STEP, etc). This reduces the memory usage of lastdb and +lastal, and it makes lastdb faster. Its effect on the speed and +sensitivity of lastal is not entirely clear.</td></tr> <tr><td class="option-group"> <kbd><span class="option">-W <var>SIZE</var></span></kbd></td> <td><p class="first">Allow initial matches to start only at positions that are diff --git a/doc/lastdb.txt b/doc/lastdb.txt index b0485fb..caba336 100644 --- a/doc/lastdb.txt +++ b/doc/lastdb.txt @@ -78,10 +78,10 @@ Advanced Options -w STEP Allow initial matches to start only at every STEP-th position in - each of the sequences given to lastdb. This reduces the memory - usage of lastdb and lastal, and it makes lastdb faster. Its - effect on the speed and sensitivity of lastal is not entirely - clear. + each of the sequences given to lastdb (positions 0, STEP, + 2×STEP, etc). This reduces the memory usage of lastdb and + lastal, and it makes lastdb faster. Its effect on the speed and + sensitivity of lastal is not entirely clear. -W SIZE Allow initial matches to start only at positions that are diff --git a/scripts/last-dotplot b/scripts/last-dotplot index f8948e8..52becbf 100755 --- a/scripts/last-dotplot +++ b/scripts/last-dotplot @@ -350,7 +350,7 @@ def lastDotplot(opts, args): font, image_mode, opts) axis2 = get_axis_image(seq_names2, name_sizes2, seq_starts2, seq_pix2, font, image_mode, opts) - axis2 = axis2.rotate(270) + axis2 = axis2.rotate(270, expand=1) im.paste(axis1, (0, 0)) im.paste(axis2, (0, 0)) diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc index a4f71f2..f9b2dbf 100644 --- a/src/LastalArguments.cc +++ b/src/LastalArguments.cc @@ -354,9 +354,6 @@ LAST home page: http://last.cbrc.jp/\n\ if( isTranslated() && isQueryStrandMatrix ) ERR( "can't combine option -F with option -S 1" ); - if( globality == 1 && outputType == 1 ) - ERR( "can't combine option -T 1 with option -j 1" ); - if( isGreedy && outputType > 3 ) ERR( "can't combine option -M with option -j > 3" ); diff --git a/src/gaplessPssmXdrop.cc b/src/gaplessPssmXdrop.cc index b59ab83..e71dd4b 100644 --- a/src/gaplessPssmXdrop.cc +++ b/src/gaplessPssmXdrop.cc @@ -71,6 +71,45 @@ bool isOptimalGaplessPssmXdrop(const uchar *seq, return true; } +int gaplessPssmXdropOverlap(const uchar *seq, + const ScoreMatrixRow *pssm, + int maxScoreDrop, + size_t &reverseLength, + size_t &forwardLength) { + int minScore = 0; + int maxScore = 0; + int score = 0; + + const uchar *rs = seq; + const ScoreMatrixRow *rp = pssm; + while (true) { + --rs; --rp; + int s = (*rp)[*rs]; + if (s <= -INF) break; + score += s; + if (score > maxScore) maxScore = score; + else if (score < maxScore - maxScoreDrop) return -INF; + else if (score < minScore) minScore = score; + } + + maxScore = score - minScore; + + const uchar *fs = seq; + const ScoreMatrixRow *fp = pssm; + while (true) { + int s = (*fp)[*fs]; + if (s <= -INF) break; + score += s; + if (score > maxScore) maxScore = score; + else if (score < maxScore - maxScoreDrop) return -INF; + ++fs; ++fp; + } + + reverseLength = seq - (rs + 1); + forwardLength = fs - seq; + return score; +} + int gaplessPssmAlignmentScore(const uchar *seq, const uchar *seqEnd, const ScoreMatrixRow *pssm) { diff --git a/src/gaplessPssmXdrop.hh b/src/gaplessPssmXdrop.hh index ebbcea2..53fd6e3 100644 --- a/src/gaplessPssmXdrop.hh +++ b/src/gaplessPssmXdrop.hh @@ -10,6 +10,7 @@ #define GAPLESS_PSSM_XDROP_HH #include "ScoreMatrixRow.hh" +#include <stddef.h> namespace cbrc { @@ -36,6 +37,12 @@ bool isOptimalGaplessPssmXdrop(const uchar *seq, const ScoreMatrixRow *pssm, int maxScoreDrop); +int gaplessPssmXdropOverlap(const uchar *seq, + const ScoreMatrixRow *pssm, + int maxScoreDrop, + size_t &reverseLength, + size_t &forwardLength); + int gaplessPssmAlignmentScore(const uchar *seq, const uchar *seqEnd, const ScoreMatrixRow *pssm); diff --git a/src/gaplessTwoQualityXdrop.cc b/src/gaplessTwoQualityXdrop.cc index c089dd3..4989d41 100644 --- a/src/gaplessTwoQualityXdrop.cc +++ b/src/gaplessTwoQualityXdrop.cc @@ -87,6 +87,52 @@ bool isOptimalGaplessTwoQualityXdrop(const uchar *seq1, return true; } +int gaplessTwoQualityXdropOverlap(const uchar *seq1, + const uchar *qual1, + const uchar *seq2, + const uchar *qual2, + const TwoQualityScoreMatrix &m, + int maxScoreDrop, + size_t &reverseLength, + size_t &forwardLength) { + int minScore = 0; + int maxScore = 0; + int score = 0; + + const uchar *rs1 = seq1; + const uchar *rq1 = qual1; + const uchar *rs2 = seq2; + const uchar *rq2 = qual2; + while (true) { + --rs1; --rq1; --rs2; --rq2; + int s = m(*rs1, *rs2, *rq1, *rq2); + if (s <= -INF) break; + score += s; + if (score > maxScore) maxScore = score; + else if (score < maxScore - maxScoreDrop) return -INF; + else if (score < minScore) minScore = score; + } + + maxScore = score - minScore; + + const uchar *fs1 = seq1; + const uchar *fq1 = qual1; + const uchar *fs2 = seq2; + const uchar *fq2 = qual2; + while (true) { + int s = m(*fs1, *fs2, *fq1, *fq2); + if (s <= -INF) break; + score += s; + if (score > maxScore) maxScore = score; + else if (score < maxScore - maxScoreDrop) return -INF; + ++fs1; ++fq1; ++fs2; ++fq2; + } + + reverseLength = seq1 - (rs1 + 1); + forwardLength = fs1 - seq1; + return score; +} + int gaplessTwoQualityAlignmentScore(const uchar *seq1, const uchar *seq1end, const uchar *qual1, diff --git a/src/gaplessTwoQualityXdrop.hh b/src/gaplessTwoQualityXdrop.hh index a3e4606..afd5274 100644 --- a/src/gaplessTwoQualityXdrop.hh +++ b/src/gaplessTwoQualityXdrop.hh @@ -9,6 +9,8 @@ #ifndef GAPLESS_TWO_QUALITY_XDROP_HH #define GAPLESS_TWO_QUALITY_XDROP_HH +#include <stddef.h> + namespace cbrc { typedef unsigned char uchar; @@ -51,6 +53,15 @@ bool isOptimalGaplessTwoQualityXdrop(const uchar *seq1, const TwoQualityScoreMatrix &m, int maxScoreDrop); +int gaplessTwoQualityXdropOverlap(const uchar *seq1, + const uchar *qual1, + const uchar *seq2, + const uchar *qual2, + const TwoQualityScoreMatrix &m, + int maxScoreDrop, + size_t &reverseLength, + size_t &forwardLength); + int gaplessTwoQualityAlignmentScore(const uchar *seq1, const uchar *seq1end, const uchar *qual1, diff --git a/src/gaplessXdrop.cc b/src/gaplessXdrop.cc index 0129a7d..09bdf17 100644 --- a/src/gaplessXdrop.cc +++ b/src/gaplessXdrop.cc @@ -76,6 +76,46 @@ bool isOptimalGaplessXdrop(const uchar *seq1, return true; } +int gaplessXdropOverlap(const uchar *seq1, + const uchar *seq2, + const ScoreMatrixRow *scorer, + int maxScoreDrop, + size_t &reverseLength, + size_t &forwardLength) { + int minScore = 0; + int maxScore = 0; + int score = 0; + + const uchar *r1 = seq1; + const uchar *r2 = seq2; + while (true) { + --r1; --r2; + int s = scorer[*r1][*r2]; + if (s <= -INF) break; + score += s; + if (score > maxScore) maxScore = score; + else if (score < maxScore - maxScoreDrop) return -INF; + else if (score < minScore) minScore = score; + } + + maxScore = score - minScore; + + const uchar *f1 = seq1; + const uchar *f2 = seq2; + while (true) { + int s = scorer[*f1][*f2]; + if (s <= -INF) break; + score += s; + if (score > maxScore) maxScore = score; + else if (score < maxScore - maxScoreDrop) return -INF; + ++f1; ++f2; + } + + reverseLength = seq1 - (r1 + 1); + forwardLength = f1 - seq1; + return score; +} + int gaplessAlignmentScore(const uchar *seq1, const uchar *seq1end, const uchar *seq2, diff --git a/src/gaplessXdrop.hh b/src/gaplessXdrop.hh index e213d3b..7ef7df5 100644 --- a/src/gaplessXdrop.hh +++ b/src/gaplessXdrop.hh @@ -6,6 +6,7 @@ #define GAPLESS_XDROP_HH #include "ScoreMatrixRow.hh" +#include <stddef.h> namespace cbrc { @@ -54,6 +55,20 @@ bool isOptimalGaplessXdrop(const uchar *seq1, const ScoreMatrixRow *scorer, int maxScoreDrop); +// Returns the score, and sets the reverse and forward extension +// lengths, for a gapless "overlap" alignment starting at (seq1, +// seq2). "Overlap" means that the alignment must extend, in each +// direction, until it hits a score <= -INF (presumably from a +// sentinel indicating a sequence end). If the alignment would have +// any region with score < -maxScoreDrop, -INF is returned and the +// extension lengths are not set. +int gaplessXdropOverlap(const uchar *seq1, + const uchar *seq2, + const ScoreMatrixRow *scorer, + int maxScoreDrop, + size_t &reverseLength, + size_t &forwardLength); + // Calculate the score of the gapless alignment starting at (seq1, // seq2) and ending at seq1end. int gaplessAlignmentScore(const uchar *seq1, diff --git a/src/lastal.cc b/src/lastal.cc index 80f3886..3219c3e 100644 --- a/src/lastal.cc +++ b/src/lastal.cc @@ -464,6 +464,12 @@ struct Dispatcher{ (e == Phase::gapped ) ? args.maxDropGapped : args.maxDropFinal ), z( t ? 2 : p ? 1 : 0 ){} + int gaplessOverlap( indexT x, indexT y, size_t &rev, size_t &fwd ) const{ + if( z==0 ) return gaplessXdropOverlap( a+x, b+y, m, d, rev, fwd ); + if( z==1 ) return gaplessPssmXdropOverlap( a+x, p+y, d, rev, fwd ); + return gaplessTwoQualityXdropOverlap( a+x, i+x, b+y, j+y, t, d, rev, fwd ); + } + int forwardGaplessScore( indexT x, indexT y ) const{ if( z==0 ) return forwardGaplessXdropScore( a+x, b+y, m, d ); if( z==1 ) return forwardGaplessPssmXdropScore( a+x, p+y, d ); @@ -523,9 +529,18 @@ static void writeAlignment(LastAligner &aligner, const Alignment &aln, printAndDelete(a.text); } +static void writeSegmentPair(LastAligner &aligner, const SegmentPair &s, + size_t queryNum, char strand, + const uchar* querySeq) { + Alignment a; + a.fromSegmentPair(s); + writeAlignment(aligner, a, queryNum, strand, querySeq); +} + // Find query matches to the suffix array, and do gapless extensions void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns, size_t queryNum, char strand, const uchar* querySeq ){ + const bool isOverlap = (args.globality && args.outputType == 1); Dispatcher dis( Phase::gapless, aligner, queryNum, strand, querySeq ); DiagonalTable dt; // record already-covered positions on each diagonal countT matchCount = 0, gaplessExtensionCount = 0, gaplessAlignmentCount = 0; @@ -563,37 +578,43 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns, args.maxGaplessAlignmentsPerQueryPosition ) break; indexT j = *beg; // coordinate in the reference sequence - if( dt.isCovered( i, j ) ) continue; - - int fs = dis.forwardGaplessScore( j, i ); - int rs = dis.reverseGaplessScore( j, i ); - int score = fs + rs; ++gaplessExtensionCount; - // Tried checking the score after isOptimal & addEndpoint, but - // the number of extensions decreased by < 10%, and it was - // slower overall. - if( score < minScoreGapless ) continue; - - indexT tEnd = dis.forwardGaplessEnd( j, i, fs ); - indexT tBeg = dis.reverseGaplessEnd( j, i, rs ); - indexT qBeg = i - (j - tBeg); - if( !dis.isOptimalGapless( tBeg, tEnd, qBeg ) ) continue; - SegmentPair sp( tBeg, qBeg, tEnd - tBeg, score ); - - if( args.outputType == 1 ){ // we just want gapless alignments - Alignment aln; - aln.fromSegmentPair(sp); - writeAlignment( aligner, aln, queryNum, strand, querySeq ); - } - else{ - gaplessAlns.add(sp); // add the gapless alignment to the pot + if( isOverlap ){ + size_t revLen, fwdLen; + int score = dis.gaplessOverlap( j, i, revLen, fwdLen ); + if( score < minScoreGapless ) continue; + SegmentPair sp( j - revLen, i - revLen, revLen + fwdLen, score ); + dt.addEndpoint( sp.end2(), sp.end1() ); + writeSegmentPair( aligner, sp, queryNum, strand, querySeq ); + }else{ + int fs = dis.forwardGaplessScore( j, i ); + int rs = dis.reverseGaplessScore( j, i ); + int score = fs + rs; + + // Tried checking the score after isOptimal & addEndpoint, + // but the number of extensions decreased by < 10%, and it + // was slower overall. + if( score < minScoreGapless ) continue; + + indexT tEnd = dis.forwardGaplessEnd( j, i, fs ); + indexT tBeg = dis.reverseGaplessEnd( j, i, rs ); + indexT qBeg = i - (j - tBeg); + if( !dis.isOptimalGapless( tBeg, tEnd, qBeg ) ) continue; + SegmentPair sp( tBeg, qBeg, tEnd - tBeg, score ); + dt.addEndpoint( sp.end2(), sp.end1() ); + + if( args.outputType == 1 ){ // we just want gapless alignments + writeSegmentPair( aligner, sp, queryNum, strand, querySeq ); + } + else{ + gaplessAlns.add(sp); // add the gapless alignment to the pot + } } ++gaplessAlignmentsPerQueryPosition; ++gaplessAlignmentCount; - dt.addEndpoint( sp.end2(), sp.end1() ); } } } diff --git a/src/version.hh b/src/version.hh index 7f9046d..406a230 100644 --- a/src/version.hh +++ b/src/version.hh @@ -1 +1 @@ -"755" +"759" -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git _______________________________________________ debian-med-commit mailing list debian-med-commit@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit