This is an automated email from the git hooks/post-receive script. malex-guest pushed a commit to branch master in repository gmap.
commit 3e968d2ee29a6e79037cc23cf34a802ed412f022 Author: Alexandre Mestiashvili <[email protected]> Date: Thu Dec 21 14:49:39 2017 +0100 New upstream version 2017-11-15 --- ChangeLog | 17 ++++++ VERSION | 2 +- configure | 24 ++++---- src/dynprog_end.c | 21 +++++-- src/outbuffer.c | 8 ++- src/stage1hr.c | 6 +- src/stage3.c | 178 ++++++++++++++++++++++++++++++++---------------------- 7 files changed, 162 insertions(+), 94 deletions(-) diff --git a/ChangeLog b/ChangeLog index 43227ec..4e99c79 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2017-11-16 twu + + * VERSION, public-2017-09-05, src: Updated version number + + * stage3.c: Merged revision 211227 from trunk to fix issue with trimming of + chimeras not being re-extended back to the breakpoint, and to fix issue + with CIGAR strings from shortgaps comps being treated differently from + indel comps. + + * stage1hr.c: Appending greedy results to subs + + * dynprog_end.c: Merged revision 211220 from trunk to handle the case where + rev_goffset is negative + + * outbuffer.c: Merged revision 211219 from trunk to fix fatal bug when + trying to write SAM headers to OUTPUT_NONE split output, which is now NULL + 2017-10-30 twu * stage3.c: Merged revision 210882 from trunk to fix trim_end_indel diff --git a/VERSION b/VERSION index 75cebdc..535447a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2017-10-30 \ No newline at end of file +2017-11-15 \ No newline at end of file diff --git a/configure b/configure index ad2a907..941ec1c 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for gmap 2017-10-30. +# Generated by GNU Autoconf 2.69 for gmap 2017-11-15. # # Report bugs to <Thomas Wu <[email protected]>>. # @@ -590,8 +590,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='gmap' PACKAGE_TARNAME='gmap' -PACKAGE_VERSION='2017-10-30' -PACKAGE_STRING='gmap 2017-10-30' +PACKAGE_VERSION='2017-11-15' +PACKAGE_STRING='gmap 2017-11-15' PACKAGE_BUGREPORT='Thomas Wu <[email protected]>' PACKAGE_URL='' @@ -1369,7 +1369,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures gmap 2017-10-30 to adapt to many kinds of systems. +\`configure' configures gmap 2017-11-15 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1440,7 +1440,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of gmap 2017-10-30:";; + short | recursive ) echo "Configuration of gmap 2017-11-15:";; esac cat <<\_ACEOF @@ -1577,7 +1577,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -gmap configure 2017-10-30 +gmap configure 2017-11-15 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2183,7 +2183,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by gmap $as_me 2017-10-30, which was +It was created by gmap $as_me 2017-11-15, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2533,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5 $as_echo_n "checking package version... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-10-30" >&5 -$as_echo "2017-10-30" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-11-15" >&5 +$as_echo "2017-11-15" >&6; } ### Read defaults @@ -4401,7 +4401,7 @@ fi # Define the identity of the package. PACKAGE='gmap' - VERSION='2017-10-30' + VERSION='2017-11-15' cat >>confdefs.h <<_ACEOF @@ -19978,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by gmap $as_me 2017-10-30, which was +This file was extended by gmap $as_me 2017-11-15, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20044,7 +20044,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -gmap config.status 2017-10-30 +gmap config.status 2017-11-15 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/src/dynprog_end.c b/src/dynprog_end.c index bf1740f..ed034e4 100644 --- a/src/dynprog_end.c +++ b/src/dynprog_end.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog_end.c 205259 2017-04-12 23:55:04Z twu $"; +static char rcsid[] = "$Id: dynprog_end.c 211225 2017-11-16 03:47:12Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -1299,7 +1299,7 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma debug6( printf("%c: ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A'); - printf("Aligning 5' end gap with endalign %d\n",endalign); + printf("Aligning 5' end gap with endalign = %d, roffset %d, goffset %d\n",endalign,roffset,goffset); ); mismatchtype = ENDQ; @@ -1327,7 +1327,13 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma debug6(printf("rlength %d is too long. Chopping to %d\n",rlength,dynprog->max_rlength)); rlength = dynprog->max_rlength; } - if (glength <= 0) { + if (rev_goffset < 0) { + /* No room to extend more 5' */ + debug6(printf("rev_goffset %d < 0, so returning NULL\n",rev_goffset)); + *nmatches = *nmismatches = *nopens = *nindels = 0; + *finalscore = 0; + return (List_T) NULL; + } else if (glength <= 0) { /* Needed to avoid abort by Matrix16_alloc */ debug6(printf("glength %d <= 0, so returning NULL\n",glength)); *nmatches = *nmismatches = *nopens = *nindels = 0; @@ -1342,6 +1348,10 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma debug6(printf("At query offset %d-%d, %.*s\n",rev_roffset-rlength+1,rev_roffset,rlength,&(rev_rsequence[-rlength+1]))); + /* Situation is handled above */ + /* assert(rev_goffset >= 0); */ + + #ifdef EXTRACT_GENOMICSEG debug6(printf("At genomic offset %d-%d, %.*s\n", rev_goffset-glength+1,rev_goffset,glength,&(rev_gsequence[-glength+1]))); @@ -1899,7 +1909,7 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma debug6( printf("%c: ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A'); - printf("Aligning 3' end gap with endalign = %d\n",endalign); + printf("Aligning 3' end gap with endalign = %d, roffset %d, goffset %d\n",endalign,roffset,goffset); ); mismatchtype = ENDQ; @@ -1942,7 +1952,8 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma #ifdef EXTRACT_GENOMICSEG debug6(printf("At genomic offset %d-%d, %.*s\n",goffset,goffset+glength-1,glength,gsequence)); #endif - + /* Need to assert here, because if rlength is 0, then goffset doesn't matter */ + assert(goffset >= 0); gsequence = (char *) MALLOCA((glength+1) * sizeof(char)); gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char)); diff --git a/src/outbuffer.c b/src/outbuffer.c index ddc2d14..2839fc4 100644 --- a/src/outbuffer.c +++ b/src/outbuffer.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: outbuffer.c 210517 2017-10-12 22:41:05Z twu $"; +static char rcsid[] = "$Id: outbuffer.c 211224 2017-11-16 03:46:32Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -147,6 +147,12 @@ print_file_headers ( FILE *output #endif ) { + + if (output == NULL) { + /* Possible since we are no longer creating a file for OUTPUT_NONE */ + return; + } + #ifdef GSNAP if (output_sam_p == true && sam_headers_p == true) { SAM_header_print_HD(output,nworkers,orderedp); diff --git a/src/stage1hr.c b/src/stage1hr.c index 9768290..adaef9c 100644 --- a/src/stage1hr.c +++ b/src/stage1hr.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage1hr.c 209609 2017-09-01 21:57:26Z twu $"; +static char rcsid[] = "$Id: stage1hr.c 211226 2017-11-16 03:49:07Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -21713,8 +21713,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c debug(printf("After pairing one mismatch, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == true) { - *hits5 = subs5; - *hits3 = subs3; + *hits5 = List_append(greedy5,subs5); /* Was subs5 */ + *hits3 = List_append(greedy3,subs3); /* Was subs3 */ hitpairs = Stage3pair_remove_circular_alias(hitpairs); #if 0 hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true); diff --git a/src/stage3.c b/src/stage3.c index 3aac474..64010f7 100644 --- a/src/stage3.c +++ b/src/stage3.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage3.c 210883 2017-10-30 21:42:56Z twu $"; +static char rcsid[] = "$Id: stage3.c 211229 2017-11-16 03:58:47Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -189,13 +189,6 @@ static const Except_T coordinate_error = {"Coordinate error"}; #define debug1(x) #endif -/* Chimeras */ -#ifdef DEBUG2 -#define debug2(x) x -#else -#define debug2(x) -#endif - /* path_trim */ #ifdef DEBUG3 #define debug3(x) x @@ -512,9 +505,9 @@ Stage3_straintype (T this) { int Stage3_goodness (T this) { - debug2(printf("Overall goodness:\n")); - debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n", - this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness)); + debug(printf("Overall goodness:\n")); + debug(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n", + this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness)); return this->goodness; } @@ -738,23 +731,23 @@ Stage3_chimeric_goodness (int *matches1, int *matches2, T part1, T part2, int br ncanonical2, nsemicanonical2, nnoncanonical2; querystart = Pair_querypos(&(part1->pairarray[0])); - debug2(printf("Chimeric goodness requested for part %d..%d\n",querystart+1,breakpoint)); + debug10(printf("Chimeric goodness requested for part %d..%d\n",querystart+1,breakpoint)); Pair_fracidentity_bounded(&(*matches1),&unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, &ncanonical1,&nsemicanonical1,&nnoncanonical1, part1->pairarray,part1->npairs,part1->cdna_direction, querystart,breakpoint); goodness1 = (*matches1) + MISMATCH*mismatches1 + QOPEN*qopens1 + QINDEL*qindels1 + TOPEN*topens1 + TINDEL*tindels1; - debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n", + debug10(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n", *matches1,mismatches1,qopens1,qindels1,topens1,tindels1,goodness1)); queryend = Pair_querypos(&(part2->pairarray[part2->npairs-1])); - debug2(printf("Chimeric goodness requested for part %d..%d\n",breakpoint+1,queryend+1)); + debug10(printf("Chimeric goodness requested for part %d..%d\n",breakpoint+1,queryend+1)); Pair_fracidentity_bounded(&(*matches2),&unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, &ncanonical2,&nsemicanonical2,&nnoncanonical2, part2->pairarray,part2->npairs,part2->cdna_direction, breakpoint,queryend); goodness2 = (*matches2) + MISMATCH*mismatches2 + QOPEN*qopens2 + QINDEL*qindels2 + TOPEN*topens2 + TINDEL*tindels2; - debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n", + debug10(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n", *matches2,mismatches2,qopens2,qindels2,topens2,tindels2,goodness2)); return goodness1 + goodness2; @@ -3743,7 +3736,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length, } exon = (List_T) NULL; - while (pairs != NULL && pair->gapp == false && pair->comp != INDEL_COMP) { + while (pairs != NULL && pair->gapp == false && pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP) { pairptr = pairs; pairs = Pairpool_pop(pairs,&pair); #ifdef WASTE @@ -3753,7 +3746,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length, #endif } - while (pairs != NULL && pair->gapp == false && ((Pair_T) pairs->first)->comp == INDEL_COMP) { + while (pairs != NULL && pair->gapp == false && (((Pair_T) pairs->first)->comp == INDEL_COMP || ((Pair_T) pairs->first)->comp == SHORTGAP_COMP)) { pairptr = pairs; pairs = Pairpool_pop(pairs,&pair); #ifdef WASTE @@ -3773,7 +3766,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length, } else { p = exon; nindels = 1; - while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) { + while (p != NULL && (((Pair_T) p->first)->comp == INDEL_COMP || ((Pair_T) p->first)->comp == SHORTGAP_COMP)) { p = List_next(p); nindels++; } @@ -3850,6 +3843,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length, chroffset,chrhigh,watsonp,jump_late_p,pairpool, extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true); debug(printf("CONTINUOUS AT 5 (trim_end5_indels)?\n")); + debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n")); debug(Pair_dump_list(continuous_gappairs_medialgap,true)); debug3(printf("continuous finalscore %d\n",finalscore)); @@ -3893,7 +3887,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs, List_T path, exon, pairptr, p; Pair_T pair, splice = NULL, gappair; int max_nmatches = 0, max_nmismatches; - int nmatches = 0, nmismatches /* = -1 because of the gap */, i; + int nmatches = 0, nmismatches /* = -1 because of the gap */; int max_score, score; /* bool nearindelp = false; */ double medial_prob; @@ -3930,7 +3924,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs, } exon = (List_T) NULL; - while (pairs != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP*/) { + while (pairs != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP */) { pairptr = pairs; pairs = Pairpool_pop(pairs,&pair); #ifdef WASTE @@ -4085,6 +4079,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs, chroffset,chrhigh,watsonp,jump_late_p,pairpool, extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true); debug(printf("CONTINUOUS AT 5 (trim_end5_exons)?\n")); + debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n")); debug(Pair_dump_list(continuous_gappairs_medialgap,true)); debug3(printf("continuous finalscore %d\n",finalscore)); @@ -4151,7 +4146,7 @@ trim_end3_indels (List_T path, int ambig_end_length, List_T pairs, exon, pairptr, p; Pair_T pair, medial; int max_nmatches = 0, max_nmismatches; - int nmatches = 0, nmismatches /* = -1 because of the gap */, i; + int nmatches = 0, nmismatches /* = -1 because of the gap */; int max_score, score; bool nearindelp = false; int nindels; @@ -4177,7 +4172,7 @@ trim_end3_indels (List_T path, int ambig_end_length, } exon = (List_T) NULL; - while (path != NULL && pair->gapp == false && pair->comp != INDEL_COMP) { + while (path != NULL && pair->gapp == false && pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP) { pairptr = path; path = Pairpool_pop(path,&pair); #ifdef WASTE @@ -4187,7 +4182,7 @@ trim_end3_indels (List_T path, int ambig_end_length, #endif } - while (path != NULL && pair->gapp == false && ((Pair_T) path->first)->comp == INDEL_COMP) { + while (path != NULL && pair->gapp == false && (((Pair_T) path->first)->comp == INDEL_COMP || ((Pair_T) path->first)->comp == SHORTGAP_COMP)) { pairptr = path; path = Pairpool_pop(path,&pair); #ifdef WASTE @@ -4207,7 +4202,7 @@ trim_end3_indels (List_T path, int ambig_end_length, } else { p = exon; nindels = 1; - while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) { + while (p != NULL && (((Pair_T) p->first)->comp == INDEL_COMP || ((Pair_T) p->first)->comp == SHORTGAP_COMP)) { p = List_next(p); nindels++; } @@ -4285,6 +4280,7 @@ trim_end3_indels (List_T path, int ambig_end_length, chroffset,chrhigh,watsonp,jump_late_p,pairpool, extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true); debug(printf("CONTINUOUS AT 3 (trim_end3_indels)?\n")); + debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n")); debug(Pair_dump_list(continuous_gappairs_medialgap,true)); debug3(printf("continuous finalscore %d\n",finalscore)); @@ -4519,6 +4515,7 @@ trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path, chroffset,chrhigh,watsonp,jump_late_p,pairpool, extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true); debug(printf("CONTINUOUS AT 3 (trim_end3_exons)?\n")); + debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n")); debug(Pair_dump_list(continuous_gappairs_medialgap,true)); debug3(printf("continuous finalscore %d\n",finalscore)); @@ -9688,8 +9685,9 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor, Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high, char *queryseq_ptr, char *queryuc_ptr, int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool, - Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign) { - List_T continuous_gappairs_distalgap = NULL, peeled_pairs; + Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign, + bool forcep) { + List_T continuous_gappairs_distalgap = NULL, peeled_pairs = NULL; int queryjump, genomejump; int querydp5, querydp3_distalgap; Chrpos_T genomedp3_distalgap; @@ -9758,7 +9756,8 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor, continuous_gappairs_distalgap = Dynprog_end5_gap(&(*dynprogindex_minor),&(*finalscore), &nmatches,&nmismatches,&nopens,&nindels,dynprog, &(queryseq_ptr[querydp3_distalgap]),&(queryuc_ptr[querydp3_distalgap]), - queryjump,genomejump,querydp3_distalgap,genomedp3_distalgap, + /*rlength*/queryjump,/*glength*/genomejump, + /*rev_roffset*/querydp3_distalgap,/*rev_goffset*/genomedp3_distalgap, chroffset,chrhigh,watsonp,jump_late_p,pairpool, extraband_end,defect_rate,endalign,/*require_pos_score_p*/false); *ambig_end_length = 0; @@ -9768,7 +9767,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor, debug(printf(" finalscore: %d\n",*finalscore)); if (continuous_gappairs_distalgap == NULL) { - return (List_T) NULL; + return peeled_pairs; } else { firstpair = List_head(continuous_gappairs_distalgap); if (0 && firstpair->querypos != querydp3_distalgap) { @@ -9776,11 +9775,14 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor, /* Must have an indel between the gappairs and the rest of the read */ debug(printf("Detected indel between gappairs %d and the rest of the read %d\n", firstpair->querypos,querydp3_distalgap)); - return (List_T) NULL; + return peeled_pairs; + } else if (forcep == true) { + /* For example, needed for extending middles of chimeras */ + return continuous_gappairs_distalgap; } else if (*finalscore <= 0) { *knownsplicep = false; - return (List_T) NULL; + return peeled_pairs; } else { return continuous_gappairs_distalgap; } @@ -9910,8 +9912,8 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore, char *queryseq_ptr, char *queryuc_ptr, int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback, - double defect_rate, Endalign_T endalign) { - List_T continuous_gappairs_distalgap = NULL, peeled_path; + double defect_rate, Endalign_T endalign, bool forcep) { + List_T continuous_gappairs_distalgap = NULL, peeled_path = NULL; int queryjump, genomejump; int querydp5_distalgap, querydp3; Chrpos_T genomedp5_distalgap; @@ -9986,7 +9988,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore, debug(printf(" finalscore: %d\n",*finalscore)); if (continuous_gappairs_distalgap == NULL) { - return (List_T) NULL; + return peeled_path; } else { continuous_gappairs_distalgap = List_reverse(continuous_gappairs_distalgap); firstpair = List_head(continuous_gappairs_distalgap); @@ -9995,11 +9997,14 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore, /* Must have an indel between the gappairs and the rest of the read */ debug(printf("Detected indel between gappairs %d and the rest of the read %d\n", firstpair->querypos,querydp5_distalgap)); - return (List_T) NULL; + return peeled_path; + } else if (forcep == true) { + /* For example, needed for extending middle of chimeras */ + return continuous_gappairs_distalgap; } else if (*finalscore <= 0) { *knownsplicep = false; - return (List_T) NULL; + return peeled_path; } else { return continuous_gappairs_distalgap; } @@ -10852,26 +10857,30 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa chrend = (chrhigh - chroffset) - genomedp5; } - assert(chrend >= chrstart); - - debug14(printf("Starting stage2 with chrstart %u, chrend %u, watsonp %d\n", - chrstart,chrend,watsonp)); - gappairs = Stage2_compute_one( + /* chrend can be equal to chrstart - 1 if peelback fails on both ends */ + assert(chrend + 1 >= chrstart); + if (chrend < chrstart) { + gappairs = (List_T) NULL; + } else { + debug14(printf("Starting stage2 with chrstart %u, chrend %u, watsonp %d\n", + chrstart,chrend,watsonp)); + gappairs = Stage2_compute_one( #ifdef PMAP - &(queryaaseq_ptr[querydp5]),&(queryaaseq_ptr[querydp5]), - /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5*3, + &(queryaaseq_ptr[querydp5]),&(queryaaseq_ptr[querydp5]), + /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5*3, #else - &(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]), - /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5, + &(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]), + /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5, #endif - chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand, + chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand, - oligoindices_minor,pairpool,diagpool,cellpool, - /*localp should be false*/true,/*skip_repetitive_p*/false, - /*favor_right_p*/false,/*debug_graphic_p*/false); - - debug14(printf("Internal stage2 result:\n")); - debug14(Pair_dump_list(gappairs,true)); + oligoindices_minor,pairpool,diagpool,cellpool, + /*localp should be false*/true,/*skip_repetitive_p*/false, + /*favor_right_p*/false,/*debug_graphic_p*/false); + + debug14(printf("Internal stage2 result:\n")); + debug14(Pair_dump_list(gappairs,true)); + } if (gappairs == NULL) { pairs = Pairpool_transfer(pairs,peeled_pairs); @@ -11028,7 +11037,7 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi char *queryseq_ptr, char *queryuc_ptr, int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback, double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogL, - bool extendp, Endalign_T endalign) { + bool extendp, Endalign_T endalign, bool forcep) { List_T gappairs; Pair_T leftpair; /* int genomejump */ @@ -11093,7 +11102,7 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high, queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p,pairpool,dynprogL,maxpeelback, - defect_rate,endalign); + defect_rate,endalign,forcep); } else { /* Looks like we aren't calling this anymore */ abort(); @@ -11139,7 +11148,7 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb char *queryseq_ptr, char *queryuc_ptr, int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback, double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogR, - bool extendp, Endalign_T endalign) { + bool extendp, Endalign_T endalign, bool forcep) { List_T gappairs; Pair_T rightpair; int queryjump, leftquerypos; @@ -11200,7 +11209,7 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high, queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p,pairpool,dynprogR, - maxpeelback,defect_rate,endalign); + maxpeelback,defect_rate,endalign,forcep); } else { /* Looks like we aren't calling this anymore */ abort(); @@ -13364,8 +13373,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogR, - /*extendp*/true,/*endalign*/QUERYEND_GAP); - + /*extendp*/true,/*endalign*/QUERYEND_GAP,/*forcep*/false); /* Necessary to insert gaps and assign gap types (fills in cDNA @@ -13412,7 +13420,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true, - /*endalign*/BEST_LOCAL); + /*endalign*/BEST_LOCAL,/*forcep*/false); debug3(printf("AFTER 5' REBUILD\n")); debug3(Pair_dump_list(pairs,true)); } @@ -13432,7 +13440,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true, - /*endalign*/QUERYEND_INDELS); + /*endalign*/QUERYEND_INDELS,/*forcep*/false); #endif } @@ -13446,7 +13454,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogR, - /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); + /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false); debug(Pair_dump_list(pairs,true)); debug(printf("End of path_compute_end5\n")); @@ -13514,7 +13522,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogL, - /*extendp*/true,/*endalign*/QUERYEND_GAP); + /*extendp*/true,/*endalign*/QUERYEND_GAP,/*forcep*/false); /* Necessary to insert gaps and assign gap types (fills in cDNA insertions, so they don't get trimmed), in case an insertion was @@ -13561,7 +13569,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true, - /*endalign*/BEST_LOCAL); + /*endalign*/BEST_LOCAL,/*forcep*/false); debug3(printf("AFTER 3' REBUILD\n")); debug3(Pair_dump_list(path,true)); } @@ -13580,7 +13588,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true, - /*endalign*/QUERYEND_NOGAPS); + /*endalign*/QUERYEND_NOGAPS,/*forcep*/false); #endif } @@ -13594,7 +13602,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do queryseq_ptr,queryuc_ptr, cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogL, - /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); + /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false); debug(Pair_dump_list(path,true)); debug(printf("End of path_compute_end3\n")); @@ -15024,7 +15032,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3, queryseq_ptr,queryuc_ptr, *cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogR, - /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); + /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false); pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh, queryseq_ptr,queryuc_ptr,querylength,*cdna_direction,watsonp, jump_late_p,pairpool,defect_rate); @@ -15049,7 +15057,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3, queryseq_ptr,queryuc_ptr, *cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogL, - /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); + /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false); path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh, queryseq_ptr,queryuc_ptr,querylength, *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); @@ -15954,13 +15962,31 @@ Stage3_merge_chimera (T this_left, T this_right, Chrpos_T genomedp5, genomedp3; bool protectedp; + debug10(printf("Entered stage3_merge_chimera with minpos1 %d, maxpos1 %d, minpos2 %d, maxpos2 %d\n", + minpos1,maxpos1,minpos2,maxpos2)); orig_left_pairs = Pairpool_copy(this_left->pairs,pairpool); orig_right_pairs = Pairpool_copy(this_right->pairs,pairpool); +#ifdef DEBUG10 + printf("ORIG LEFT PAIRS:\n"); + Pair_dump_list(orig_left_pairs,true); + printf("ORIG RIGHT PAIRS:\n"); + Pair_dump_list(orig_right_pairs,true); +#endif + + this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1); this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2); +#ifdef DEBUG10 + printf("CLIPPED LEFT PAIRS:\n"); + Pair_dump_list(this_left->pairs,true); + printf("CLIPPED RIGHT PAIRS:\n"); + Pair_dump_list(this_right->pairs,true); +#endif + + if (this_left->pairs == NULL && this_right->pairs == NULL) { this_left->pairs = orig_left_pairs; this_right->pairs = orig_right_pairs; @@ -15983,15 +16009,17 @@ Stage3_merge_chimera (T this_left, T this_right, } else { path = List_reverse(this_left->pairs); - /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/ + /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then re-clip*/ endpair = (Pair_T) path->first; querydp5 = endpair->querypos + 1; genomedp5 = endpair->genomepos + 1; protectedp = false; path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,path,&querydp5,&genomedp5, - maxpeelback,/*stop_at_indels_p*/false); + maxpeelback,/*stop_at_indels_p*/true); path = clean_path_end3_gap_indels(path); + /* Have to use forcep == true, because we cannot put back the + pairs removed from peel_leftward and clean_path_end3_gap_indels */ path = build_path_end3(&knownsplicep,&ambig_end_length_3,&ambig_splicetype,&ambig_prob_3, &chop_exon_p,&dynprogindex_minor,path, this_left->chroffset,this_left->chrhigh,/*querylength*/maxpos1+1, @@ -16000,12 +16028,14 @@ Stage3_merge_chimera (T this_left, T this_right, this_left->cdna_direction,this_left->watsonp, /*jump_late_p*/this_left->watsonp ? false : true, maxpeelback,/*defect_rate*/0.0,pairpool,dynprogL, - /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); + /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/true); this_left->pairs = List_reverse(path); this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1); + debug10(printf("CLEANED LEFT PAIRS:\n")); + debug10(Pair_dump_list(this_left->pairs,true)); - /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/ + /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then re-clip*/ pairs = this_right->pairs; endpair = (Pair_T) pairs->first; @@ -16013,9 +16043,11 @@ Stage3_merge_chimera (T this_left, T this_right, genomedp3 = endpair->genomepos - 1; protectedp = false; pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3, - maxpeelback,/*stop_at_indels_p*/false); + maxpeelback,/*stop_at_indels_p*/true); pairs = clean_pairs_end5_gap_indels(pairs); + /* Have to use forcep == true, because we cannot put back the + pairs removed from peel_rightward and clean_pairs_end5_gap_indels */ pairs = build_pairs_end5(&knownsplicep,&ambig_end_length_5,&ambig_splicetype,&ambig_prob_5, &chop_exon_p,&dynprogindex_minor,pairs, this_right->chroffset,this_right->chrhigh, @@ -16024,8 +16056,10 @@ Stage3_merge_chimera (T this_left, T this_right, this_right->cdna_direction,this_right->watsonp, /*jump_late_p*/this_right->watsonp ? false : true, maxpeelback,/*defect_rate*/0.0,pairpool,dynprogR, - /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); + /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/true); this_right->pairs = Pair_clip_bounded_list(pairs,minpos2,maxpos2); + debug10(printf("CLEANED RIGHT PAIRS:\n")); + debug10(Pair_dump_list(this_right->pairs,true)); if (this_left->pairs == NULL || this_right->pairs == NULL) { this_left->pairs = orig_left_pairs; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
