This is an automated email from the git hooks/post-receive script. malex-guest pushed a commit to branch master in repository gmap.
commit 1250ec2ea5db7b724dda8c3d03b04b81500909f2 Author: Alexandre Mestiashvili <[email protected]> Date: Mon Aug 8 10:41:03 2016 +0200 Imported Upstream version 2016-07-11.v4 --- ChangeLog | 34 +++++++++ src/atoiindex.c | 10 +-- src/comp.h | 2 +- src/inbuffer.c | 10 ++- src/pair.c | 2 +- src/pairpool.c | 2 +- src/sarray-read.c | 5 +- src/shortread.c | 173 +++++++++++++++++++++++++++++++-------------- src/stage3.c | 5 +- src/stage3hr.c | 23 ++++-- util/gtf_genes.pl.in | 12 ++-- util/gtf_introns.pl.in | 12 ++-- util/gtf_splicesites.pl.in | 6 +- 13 files changed, 208 insertions(+), 88 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6abfd3f..d5366eb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +2016-08-02 twu + + * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in, util: Merged + revision 195495 from trunk to use preference order of desired keys, rather + than the text + + * comp.h, pair.c, pairpool.c, src, stage3.c, stage3hr.c: Merging revisions + 195548 through 195550 from trunk to revert recent changes to extraexon + comp, addition of dual breaks, and restoring final procedure in + Stage3pair_optimal_score + + * sarray-read.c: Limiting suffix array procedure by nmisses_allowed again + + * inbuffer.c: Fixed problem where --force-single-end terminated when file + had reads that were a multiple of --input-buffer-size + + * shortread.c: Changed debugging statements to write to stderr + + * atoiindex.c: Merged revision 194848 from trunk to fix calculation of oligo + using new block algorithm + + * stage3.c: Merged revision 195487 from trunk to use shortgap comp instead + of extraexon comp for representing dual breaks + + * comp.h, pair.c, pairpool.c: Merged revision 195484 from trunk to use + shortgap comp instead of extraexon comp for representing dual breaks + + * shortread.c: Merged revision 195483 from trunk to fix issues in reading + multiple pairs of files from command line + +2016-07-18 twu + + * filestring.c: Implemented right-justified strings + 2016-07-12 twu * 2016-07-01-better-triage, archive.html, doublelist.c, doublelist.h, diff --git a/src/atoiindex.c b/src/atoiindex.c index 17e62cf..793d1f7 100644 --- a/src/atoiindex.c +++ b/src/atoiindex.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: atoiindex.c 184156 2016-02-12 18:30:44Z twu $"; +static char rcsid[] = "$Id: atoiindex.c 195489 2016-08-02 00:16:53Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -691,7 +691,7 @@ compute_ag (char *new_pointers_filename, char *new_offsets_filename, block_start = oldoffsets[ii]; block_end = oldoffsets[ii+1]; #ifdef WORDS_BIGENDIAN - reduced = Atoi_reduce_ag(oligoi) & mask; + reduced = Atoi_reduce_ag(oligoi + ii) & mask; offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm); if (coord_values_8p == true) { for (j = Bigendian_convert_uint(block_start); j < Bigendian_convert_uint(block_end); j++) { @@ -719,7 +719,7 @@ compute_ag (char *new_pointers_filename, char *new_offsets_filename, } #else if (block_end > block_start) { - reduced = Atoi_reduce_ag(oligoi) & mask; + reduced = Atoi_reduce_ag(oligoi + ii) & mask; offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm); if (coord_values_8p == true) { for (j = block_start; j < block_end; j++) { @@ -888,7 +888,7 @@ compute_tc (char *new_pointers_filename, char *new_offsets_filename, block_start = oldoffsets[ii]; block_end = oldoffsets[ii+1]; #ifdef WORDS_BIGENDIAN - reduced = Atoi_reduce_tc(oligoi) & mask; + reduced = Atoi_reduce_tc(oligoi + ii) & mask; offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm); if (coord_values_8p == true) { for (j = Bigendian_convert_uint(block_start); j < Bigendian_convert_uint(block_end); j++) { @@ -916,7 +916,7 @@ compute_tc (char *new_pointers_filename, char *new_offsets_filename, } #else if (block_end > block_start) { - reduced = Atoi_reduce_tc(oligoi) & mask; + reduced = Atoi_reduce_tc(oligoi + ii) & mask; offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm); if (coord_values_8p == true) { for (j = block_start; j < block_end; j++) { diff --git a/src/comp.h b/src/comp.h index 4c1e2cc..e3aa9ef 100644 --- a/src/comp.h +++ b/src/comp.h @@ -1,4 +1,4 @@ -/* $Id: comp.h 40271 2011-05-28 02:29:18Z twu $ */ +/* $Id: comp.h 195553 2016-08-02 17:32:39Z twu $ */ #ifndef COMP_INCLUDED #define COMP_INCLUDED diff --git a/src/inbuffer.c b/src/inbuffer.c index 93ee3cf..064d817 100644 --- a/src/inbuffer.c +++ b/src/inbuffer.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: inbuffer.c 179288 2015-11-20 00:45:00Z twu $"; +static char rcsid[] = "$Id: inbuffer.c 195493 2016-08-02 02:01:39Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -712,6 +712,7 @@ fill_buffer (T this) { bool skipp; int nchars1 = 0, nchars2 = 0; /* Returned only because MPI master needs it. Doesn't need to be saved as a field in Inbuffer_T. */ + /* fprintf(stderr,"Entered fill_buffer\n"); */ while (nread < this->nspaces && (queryseq1 = Shortread_read(&this->nextchar,&nchars1,&nchars2,&queryseq2, &this->input,&this->input2, @@ -750,6 +751,7 @@ fill_buffer (T this) { } this->inputid++; } + /* fprintf(stderr,"Read %d reads\n",nread); */ this->nleft = nread; this->ptr = 0; @@ -827,11 +829,13 @@ Inbuffer_get_request (Sequence_T *pairalign_segment, T this) request = this->buffer[this->ptr++]; this->nleft -= 1; +#if 0 } else if (this->nextchar == EOF) { - /* ? Causes stall at end */ - /* Already know it is pointless to fill buffer */ + /* Causes --force-single-end to fail when reads in a file are a multiple of nspaces */ + /* Want to call fill_buffer to find out if the input is exhausted */ Outbuffer_add_nread(this->outbuffer,/*nread*/0); request = NULL; +#endif } else { #ifdef USE_MPI diff --git a/src/pair.c b/src/pair.c index 6085751..e0a0d5c 100644 --- a/src/pair.c +++ b/src/pair.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: pair.c 193885 2016-07-12 03:21:37Z twu $"; +static char rcsid[] = "$Id: pair.c 195553 2016-08-02 17:32:39Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif diff --git a/src/pairpool.c b/src/pairpool.c index adc6af8..aa61a0a 100644 --- a/src/pairpool.c +++ b/src/pairpool.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: pairpool.c 184430 2016-02-17 19:56:26Z twu $"; +static char rcsid[] = "$Id: pairpool.c 195553 2016-08-02 17:32:39Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif diff --git a/src/sarray-read.c b/src/sarray-read.c index de751cf..bc01892 100644 --- a/src/sarray-read.c +++ b/src/sarray-read.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: sarray-read.c 193899 2016-07-12 04:41:34Z twu $"; +static char rcsid[] = "$Id: sarray-read.c 195552 2016-08-02 17:31:06Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -8201,8 +8201,11 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand)); if (nmisses_allowed < 0) { nmisses_allowed = 0; +#if 0 } else { + /* It is possible that this makes GSNAP too slow */ nmisses_allowed = querylength; +#endif } *found_score = querylength; diff --git a/src/shortread.c b/src/shortread.c index 0ac4f4b..66d6389 100644 --- a/src/shortread.c +++ b/src/shortread.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: shortread.c 193894 2016-07-12 04:09:51Z twu $"; +static char rcsid[] = "$Id: shortread.c 195492 2016-08-02 02:00:54Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -414,6 +414,7 @@ Shortread_input_init (int *nchars, FILE *fp) { int c; bool okayp = false; + debugf(fprintf(stderr,"Calling Shortread_input_init on %p\n",fp)); Header[0] = '\0'; while (okayp == false && (c = fgetc(fp)) != EOF) { @@ -445,6 +446,7 @@ Shortread_input_init_filecontents (char **filecontents) { int c; bool okayp = false; + debugf(fprintf(stderr,"Calling Shortread_input_init_filecontents on %p\n",filecontents)); Header[0] = '\0'; while (okayp == false && (c = *(*filecontents)++) != EOF && c != '\0') { @@ -473,6 +475,7 @@ Shortread_input_init_gzip (gzFile fp) { int c; bool okayp = false; + debugf(fprintf(stderr,"Calling Shortread_input_init_gzip on %p\n",fp)); Header[0] = '\0'; while (okayp == false && (c = gzgetc(fp)) != EOF) { @@ -500,6 +503,7 @@ Shortread_input_init_bzip2 (Bzip2_T fp) { int c; bool okayp = false; + debugf(fprintf(stderr,"Calling Shortread_input_init_bzip2 on %p\n",fp)); Header[0] = '\0'; while (okayp == false && (c = bzgetc(fp)) != EOF) { @@ -2893,10 +2897,13 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse } else { if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false && (*input2 = FOPEN_READ_TEXT((*files)[0])) != NULL) { - debugf(fprintf(stderr,"Master opening input file 1\n")); + debugf(fprintf(stderr,"Master opening input file 2\n")); (*files) += 1; (*nfiles) -= 1; - nextchar2 = '\0'; + if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) { + fclose(*input2); + *input2 = NULL; + } } if (*input2 != NULL) { @@ -3073,12 +3080,14 @@ read_fasta_filecontents (int *nextchar, T *queryseq2, (*nfiles) -= 1; *nextchar = '\0'; return (T) NULL; +#if 0 } else { - debugf(fprintf(stderr,"Slave opening input file 1\n")); + debugf(fprintf(stderr,"Slave opening input file 2\n")); *input2 = NULL; (*files) += 1; (*nfiles) -= 1; *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */ +#endif } #else if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) { @@ -3087,12 +3096,14 @@ read_fasta_filecontents (int *nextchar, T *queryseq2, (*nfiles) -= 1; *nextchar = EOF; return (T) NULL; +#if 0 } else { debugf(fprintf(stderr,"Slave opening input file 2\n")); *input2 = NULL; (*files) += 1; (*nfiles) -= 1; nextchar2 = '\0'; +#endif } #endif @@ -3200,7 +3211,16 @@ read_fasta_filecontents (int *nextchar, T *queryseq2, #endif (*files) += 1; (*nfiles) -= 1; - nextchar2 = '\0'; + if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) { +#ifdef USE_MPI_FILE_INPUT + debugf(fprintf(stderr,"Slave closing input 2 using MPI_File_close\n")); + MPI_File_close(&(*input2)); +#else + debugf(fprintf(stderr,"Slave closing input 2 using fclose\n")); + fclose(*input2); +#endif + *input2 = NULL; + } } if (*filecontents2 != NULL) { @@ -3329,10 +3349,12 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2, queryseq1 = *queryseq2 = (T) NULL; if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */ if (*input1 != NULL) { + debugf(fprintf(stderr,"Master closing input 1 using gzclose\n")); gzclose(*input1); *input1 = NULL; } if (*input2 != NULL) { + debugf(fprintf(stderr,"Master closing input 2 using gzclose\n")); gzclose(*input2); *input2 = NULL; } @@ -3349,6 +3371,7 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2, *nextchar = EOF; return (T) NULL; } else { + debugf(fprintf(stderr,"Master opening input file 1\n")); #ifdef HAVE_ZLIB_GZBUFFER gzbuffer(*input1,GZBUFFER_SIZE); #endif @@ -3361,8 +3384,8 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2, } else { while (*nfiles > 0 && (*input1 = gzopen((*files)[0],"rb")) == NULL) { fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]); - (*files)++; - (*nfiles)--; + (*files) += 1; + (*nfiles) -= 1; } if (*input1 == NULL) { *nextchar = EOF; @@ -3371,8 +3394,8 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2, #ifdef HAVE_ZLIB_GZBUFFER gzbuffer(*input1,GZBUFFER_SIZE); #endif - (*files)++; - (*nfiles)--; + (*files) += 1; + (*nfiles) -= 1; *nextchar = '\0'; } } @@ -3480,7 +3503,10 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2, #endif (*files) += 1; (*nfiles) -= 1; - nextchar2 = '\0'; + if ((nextchar2 = Shortread_input_init_gzip(*input2)) == EOF) { + gzclose(*input2); + *input2 = NULL; + } } if (*input2 != NULL) { @@ -3650,10 +3676,12 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2, queryseq1 = *queryseq2 = (T) NULL; if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */ if (*input1 != NULL) { + debugf(fprintf(stderr,"Master closing input 1 using Bzip2_free\n")); Bzip2_free(&(*input1)); *input1 = NULL; } if (*input2 != NULL) { + debugf(fprintf(stderr,"Master closing input 2 using Bzip2_free\n")); Bzip2_free(&(*input2)); *input2 = NULL; } @@ -3670,6 +3698,7 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2, *nextchar = EOF; return (T) NULL; } else { + debugf(fprintf(stderr,"Master opening input file 1\n")); *input2 = NULL; (*files) += 1; (*nfiles) -= 1; @@ -3679,15 +3708,15 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2, } else { while (*nfiles > 0 && (*input1 = Bzip2_new((*files)[0])) == NULL) { fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]); - (*files)++; - (*nfiles)--; + (*files) += 1; + (*nfiles) -= 1; } if (*input1 == NULL) { *nextchar = EOF; return (T) NULL; } else { - (*files)++; - (*nfiles)--; + (*files) += 1; + (*nfiles) -= 1; *nextchar = '\0'; } } @@ -3792,7 +3821,10 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2, (*input2 = Bzip2_new((*files)[0])) != NULL) { (*files) += 1; (*nfiles) -= 1; - nextchar2 = '\0'; + if ((nextchar2 = Shortread_input_init_bzip2(*input2)) == EOF) { + Bzip2_free(&(*input2)); + *input2 = NULL; + } } if (*input2 != NULL) { @@ -4004,6 +4036,9 @@ Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryse (*files) += 2; (*nfiles) -= 2; *nextchar = '\0'; + if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) { + return (T) NULL; + } } } } @@ -4221,6 +4256,9 @@ read_fastq_filecontents (int *nextchar, T *queryseq2, (*files) += 2; (*nfiles) -= 2; *nextchar = '\0'; + if ((nextchar2 = Shortread_input_init_filecontents(&(*filecontents2))) == '\0') { + return (T) NULL; + } } #else while (*nfiles > 0 && @@ -4239,6 +4277,9 @@ read_fastq_filecontents (int *nextchar, T *queryseq2, (*files) += 2; (*nfiles) -= 2; *nextchar = '\0'; + if ((nextchar2 = Shortread_input_init_filecontents(&(*filecontents2))) == '\0') { + return (T) NULL; + } } #endif } @@ -4377,10 +4418,12 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2, queryseq1 = *queryseq2 = (T) NULL; if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */ if (*input1 != NULL) { + debugf(fprintf(stderr,"Master closing input 1 using gzclose\n")); gzclose(*input1); *input1 = NULL; } if (*input2 != NULL) { + debugf(fprintf(stderr,"Master closing input 2 using gzclose\n")); gzclose(*input2); *input2 = NULL; } @@ -4391,44 +4434,50 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2, } else if (*nfiles == 1 || force_single_end_p == true) { if ((*input1 = gzopen((*files)[0],"rb")) == NULL) { - fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]); - exit(9); + fprintf(stderr,"Cannot open gzipped file %s => skipping.\n",(*files)[0]); + (*files) += 1; + (*nfiles) -= 1; + *nextchar = EOF; + return (T) NULL; } else { + debugf(fprintf(stderr,"Master opening input file 1\n")); #ifdef HAVE_ZLIB_GZBUFFER gzbuffer(*input1,GZBUFFER_SIZE); #endif + *input2 = NULL; + (*files) += 1; + (*nfiles) -= 1; + *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */ } - *input2 = NULL; - (*files) += 1; - (*nfiles) -= 1; - *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */ } else { - if ((*input1 = gzopen((*files)[0],"rb")) == NULL) { - fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]); - exit(9); - } else { -#ifdef HAVE_ZLIB_GZBUFFER - gzbuffer(*input1,GZBUFFER_SIZE); -#endif + while (*nfiles > 0 && + ((*input1 = gzopen((*files)[0],"rb")) == NULL || + (*input2 = gzopen((*files)[1],"rb")) == NULL)) { + fprintf(stderr,"Can't open file %s or %s => skipping both.\n", + (*files)[0],(*files)[1]); + (*files) += 2; + (*nfiles) -= 2; } - - if ((*input2 = gzopen((*files)[1],"rb")) == NULL) { - fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[1]); - exit(9); + if (*input1 == NULL) { + *nextchar = EOF; + return (T) NULL; } else { + debugf(fprintf(stderr,"Master opening input files 1 and 2\n")); #ifdef HAVE_ZLIB_GZBUFFER + gzbuffer(*input1,GZBUFFER_SIZE); gzbuffer(*input2,GZBUFFER_SIZE); #endif + (*files) += 2; + (*nfiles) -= 2; + *nextchar = '\0'; + if ((nextchar2 = Shortread_input_init_gzip(*input2)) == EOF) { + return (T) NULL; + } } - - (*files) += 2; - (*nfiles) -= 2; - *nextchar = '\0'; } } - if (*nextchar == '\0') { if ((*nextchar = Shortread_input_init_gzip(*input1)) == EOF) { return (T) NULL; @@ -4593,10 +4642,12 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2, queryseq1 = *queryseq2 = (T) NULL; if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */ if (*input1 != NULL) { + debugf(fprintf(stderr,"Master closing input 1 using Bzip2_free\n")); Bzip2_free(&(*input1)); *input1 = NULL; } if (*input2 != NULL) { + debugf(fprintf(stderr,"Master closing input 2 using Bzip2_free\n")); Bzip2_free(&(*input2)); *input2 = NULL; } @@ -4607,28 +4658,40 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2, } else if (*nfiles == 1 || force_single_end_p == true) { if ((*input1 = Bzip2_new((*files)[0])) == NULL) { - fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]); - exit(9); + fprintf(stderr,"Can't open file %s => skipping.\n",(*files)[0]); + (*files) += 1; + (*nfiles) -= 1; + *nextchar = EOF; + return (T) NULL; + } else { + debugf(fprintf(stderr,"Master opening input file 1\n")); + *input2 = NULL; + (*files) += 1; + (*nfiles) -= 1; + *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */ } - *input2 = NULL; - (*files) += 1; - (*nfiles) -= 1; - *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */ - + } else { - if ((*input1 = Bzip2_new((*files)[0])) == NULL) { - fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]); - exit(9); + while (*nfiles > 0 && + ((*input1 = Bzip2_new((*files)[0])) == NULL || + (*input2 = Bzip2_new((*files)[1])) == NULL)) { + fprintf(stderr,"Can't open file %s or %s => skipping both.\n", + (*files)[0],(*files)[1]); + (*files) += 2; + (*nfiles) -= 2; } - - if ((*input2 = Bzip2_new((*files)[1])) == NULL) { - fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[1]); - exit(9); + if (*input1 == NULL) { + *nextchar = EOF; + return (T) NULL; + } else { + debugf(fprintf(stderr,"Master opening input files 1 and 2\n")); + (*files) += 2; + (*nfiles) -= 2; + *nextchar = '\0'; + if ((nextchar2 = Shortread_input_init_bzip2(*input2)) == EOF) { + return (T) NULL; + } } - - (*files) += 2; - (*nfiles) -= 2; - *nextchar = '\0'; } } diff --git a/src/stage3.c b/src/stage3.c index dabb211..b13c3ab 100644 --- a/src/stage3.c +++ b/src/stage3.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage3.c 193899 2016-07-12 04:41:34Z twu $"; +static char rcsid[] = "$Id: stage3.c 195553 2016-08-02 17:32:39Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -8581,7 +8581,8 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa lastpair = (Pair_T) gappairs->first; firstpair = (Pair_T) List_last_value(gappairs); debug14(printf("gappairs goes from %d to %d\n",firstpair->querypos,lastpair->querypos)); - if (firstpair->querypos == querydp5 && lastpair->querypos == querydp3) { + if (1 || (firstpair->querypos == querydp5 && lastpair->querypos == querydp3)) { + /* Note: We have to take this branch, otherwise we get unexpected comp errors */ /* fprintf(stderr,"%d..%d .. %d..%d\n",querydp5,firstpair->querypos,lastpair->querypos,querydp3); */ debug14(printf(" => entire query sequence bridged or not, but taking it regardless\n")); pairs = Pairpool_transfer(pairs,gappairs); diff --git a/src/stage3hr.c b/src/stage3hr.c index 0f617cf..006780a 100644 --- a/src/stage3hr.c +++ b/src/stage3hr.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $"; +static char rcsid[] = "$Id: stage3hr.c 195553 2016-08-02 17:32:39Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -66,6 +66,7 @@ static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $"; #define TERMINAL_COMPUTE_MINLENGTH 40 #define SCORE_INDELS_EVENTRIM 1 /* Needed to compare genomic positions with and without indels */ #define EVENTRIM_BADINTRON_PENALTY 2 +#define DO_FINAL 1 #define OUTERLENGTH_SLOP 100 @@ -16488,7 +16489,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int n; int minscore5 = querylength5, minscore3 = querylength3, minscore = querylength5 + querylength3; int best_nsegments, best_nsegments_5, best_nsegments_3; - /* int max_nmatches = 0; */ + int max_nmatches = 0; #ifdef USE_OPTIMAL_SCORE_BINGO int minscore_bingo = querylength5 + querylength3; #endif @@ -16842,8 +16843,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, } } else { - /* Final: Previously based purely on nmatches. However, this leads to indelbreaks and bad introns. */ -#if 0 +#ifdef DO_FINAL + /* Final: based on nmatches. ? leads to indelbreaks and bad introns. But skipping this leads to nearly-identical alignments */ max_nmatches = 0; for (p = hitpairlist; p != NULL; p = p->rest) { hitpair = (Stage3pair_T) p->first; @@ -16858,6 +16859,10 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, max_nmatches = hitpair->nmatches; } } + + cutoff_level = max_nmatches - subopt_levels; + debug6(printf("cutoff level %d = max_nmatches %d - subopt %d\n",cutoff_level,max_nmatches,subopt_levels)); + #else for (p = hitpairlist; p != NULL; p = p->rest) { hitpair = (Stage3pair_T) p->first; @@ -16901,6 +16906,15 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, hitpair,hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),hitpair->score)); optimal = List_push(optimal,hitpair); +#ifdef DO_FINAL + } else if (hitpair->nmatches < cutoff_level) { + debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches %d < cutoff_level %d (finalp %d)\n", + hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset, + hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset, + hitpair->nmatches,cutoff_level,finalp)); + *eliminatedp = true; + Stage3pair_free(&hitpair); +#else } else if (hitpair->hit5->score_eventrim + hitpair->hit3->score_eventrim > cutoff_level) { debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with scores %d+%d > cutoff_level %d (finalp %d)\n", hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset, @@ -16908,6 +16922,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level,finalp)); *eliminatedp = true; Stage3pair_free(&hitpair); +#endif } else { debug6(printf("Final: Keeping hit pair %p with scores %d+%d (vs cutoff_level %d)\n", diff --git a/util/gtf_genes.pl.in b/util/gtf_genes.pl.in index 44e88b9..3ec4f0b 100644 --- a/util/gtf_genes.pl.in +++ b/util/gtf_genes.pl.in @@ -94,9 +94,9 @@ sub get_info { my $info = shift @_; my @desired_keys = @_; - foreach $item (@ {$info}) { - ($key,$value) = $item =~ /(\S+) (.+)/; - foreach $desired_key (@desired_keys) { + foreach $desired_key (@desired_keys) { + foreach $item (@ {$info}) { + ($key,$value) = $item =~ /(\S+) (.+)/; if ($key eq $desired_key) { return $value; } @@ -112,9 +112,9 @@ sub get_info_optional { my $info = shift @_; my @desired_keys = @_; - foreach $item (@ {$info}) { - ($key,$value) = $item =~ /(\S+) (.+)/; - foreach $desired_key (@desired_keys) { + foreach $desired_key (@desired_keys) { + foreach $item (@ {$info}) { + ($key,$value) = $item =~ /(\S+) (.+)/; if ($key eq $desired_key) { return $value; } diff --git a/util/gtf_introns.pl.in b/util/gtf_introns.pl.in index ae3c364..232dc40 100755 --- a/util/gtf_introns.pl.in +++ b/util/gtf_introns.pl.in @@ -177,9 +177,9 @@ sub get_info { my $info = shift @_; my @desired_keys = @_; - foreach $item (@ {$info}) { - ($key,$value) = $item =~ /(\S+) (.+)/; - foreach $desired_key (@desired_keys) { + foreach $desired_key (@desired_keys) { + foreach $item (@ {$info}) { + ($key,$value) = $item =~ /(\S+) (.+)/; if ($key eq $desired_key) { return $value; } @@ -195,9 +195,9 @@ sub get_info_optional { my $info = shift @_; my @desired_keys = @_; - foreach $item (@ {$info}) { - ($key,$value) = $item =~ /(\S+) (.+)/; - foreach $desired_key (@desired_keys) { + foreach $desired_key (@desired_keys) { + foreach $item (@ {$info}) { + ($key,$value) = $item =~ /(\S+) (.+)/; if ($key eq $desired_key) { return $value; } diff --git a/util/gtf_splicesites.pl.in b/util/gtf_splicesites.pl.in index e018f0e..d296a9c 100755 --- a/util/gtf_splicesites.pl.in +++ b/util/gtf_splicesites.pl.in @@ -177,9 +177,9 @@ sub get_info { my $info = shift @_; my @desired_keys = @_; - foreach $item (@ {$info}) { - ($key,$value) = $item =~ /(\S+) (.+)/; - foreach $desired_key (@desired_keys) { + foreach $desired_key (@desired_keys) { + foreach $item (@ {$info}) { + ($key,$value) = $item =~ /(\S+) (.+)/; if ($key eq $desired_key) { return $value; } -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
