This is an automated email from the git hooks/post-receive script. malex-guest pushed a commit to annotated tag upstream/2017-09-05 in repository gmap.
commit cc7268574a17a7b06974584350930c42eb5cc1ed Author: Alexandre Mestiashvili <[email protected]> Date: Mon Sep 11 13:32:54 2017 +0200 New upstream version 2017-09-05 --- ChangeLog | 20 ++++ VERSION | 2 +- configure | 24 ++-- src/get-genome.c | 88 ++++++++++++++- src/gmapindex.c | 32 +++--- src/sarray-search.c | 307 +++++++++++++++++++++++++++------------------------- src/stage1hr.c | 6 +- 7 files changed, 294 insertions(+), 185 deletions(-) diff --git a/ChangeLog b/ChangeLog index 646dd5e..a5e0526 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2017-09-05 twu + + * sarray-search.c: Fixed bug resulting from check of common diagonal over + circular origin + +2017-09-01 twu + + * index.html: Updated for latest version + + * stage1hr.c: Fixed criterion for looking for spliceends with nmismatches + less than max_splice_mismatches on each end + + * sarray-search.c: Checking right and left diagonals for collinearity with + middle diagonal in query coordinates + + * gmapindex.c: Casting all Univcoord_T lengths to UINT4 for suffix array + procedures + + * get-genome.c: Added option --gsequence to print exons and introns + 2017-08-15 twu * stage3hr.c: Changed checks on circularalias to circularpos diff --git a/VERSION b/VERSION index c710ef3..a6b5878 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2017-08-15 \ No newline at end of file +2017-09-05 \ No newline at end of file diff --git a/configure b/configure index ec2f52c..d0d7d18 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for gmap 2017-08-15. +# Generated by GNU Autoconf 2.69 for gmap 2017-09-05. # # Report bugs to <Thomas Wu <[email protected]>>. # @@ -590,8 +590,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='gmap' PACKAGE_TARNAME='gmap' -PACKAGE_VERSION='2017-08-15' -PACKAGE_STRING='gmap 2017-08-15' +PACKAGE_VERSION='2017-09-05' +PACKAGE_STRING='gmap 2017-09-05' PACKAGE_BUGREPORT='Thomas Wu <[email protected]>' PACKAGE_URL='' @@ -1369,7 +1369,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures gmap 2017-08-15 to adapt to many kinds of systems. +\`configure' configures gmap 2017-09-05 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1440,7 +1440,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of gmap 2017-08-15:";; + short | recursive ) echo "Configuration of gmap 2017-09-05:";; esac cat <<\_ACEOF @@ -1577,7 +1577,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -gmap configure 2017-08-15 +gmap configure 2017-09-05 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2183,7 +2183,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by gmap $as_me 2017-08-15, which was +It was created by gmap $as_me 2017-09-05, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2533,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5 $as_echo_n "checking package version... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-08-15" >&5 -$as_echo "2017-08-15" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-09-05" >&5 +$as_echo "2017-09-05" >&6; } ### Read defaults @@ -4401,7 +4401,7 @@ fi # Define the identity of the package. PACKAGE='gmap' - VERSION='2017-08-15' + VERSION='2017-09-05' cat >>confdefs.h <<_ACEOF @@ -19978,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by gmap $as_me 2017-08-15, which was +This file was extended by gmap $as_me 2017-09-05, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20044,7 +20044,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -gmap config.status 2017-08-15 +gmap config.status 2017-09-05 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/src/get-genome.c b/src/get-genome.c index 560fda6..1a751b2 100644 --- a/src/get-genome.c +++ b/src/get-genome.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: get-genome.c 207146 2017-06-10 00:20:34Z twu $"; +static char rcsid[] = "$Id: get-genome.c 209604 2017-09-01 21:51:43Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -62,6 +62,7 @@ static char *map_iitfile = NULL; static int nflanking = 0; static bool exonsp = false; static bool sequencep = false; +static bool genome_sequence_p = false; /* introns plus exons */ static bool uniquep = false; static bool force_label_p = false; @@ -104,6 +105,7 @@ static struct option long_options[] = { {"flanking", required_argument, 0, 'u'}, /* nflanking */ {"exons", no_argument, 0, 'E'}, /* exonsp */ {"sequence", no_argument, 0, 'S'}, /* sequencep */ + {"gsequence", no_argument, 0, 0}, /* genome_sequence_p */ {"nunique", no_argument, 0, 0}, /* uniquep */ {"exact", no_argument, 0, 0}, /* exactp */ {"signed", no_argument, 0, 's'}, /* signedp */ @@ -171,7 +173,8 @@ External map file options\n\ -M, --mapdir=directory Map directory\n\ -m, --map=iitfile Map file. If argument is '?' (with the quotes),\n\ this lists available map files.\n\ - -S, --sequence For a gene map file, prints the sequence\n\ + -S, --sequence For a gene map file, prints the coding sequence\n\ + --gsequence For a gene map file, prints the gene sequence (exons plus introns), one per line\n\ -E, --exons For a gene map file, prints the sequence, one exon per line\n\ --nunique For a gene map file, also prints the number of unique positions\n\ -k, --ranks Prints levels for non-overlapping printing of map hits\n\ @@ -971,6 +974,76 @@ genemap_print_sequence (char *annot, Univcoord_T chroffset, Genome_T genome) { static void +genemap_print_gsequence (char *annot, Univcoord_T chroffset, Genome_T genome) { + char *p; + char *gbuffer; + Chrpos_T exonstart, exonend, exonlow, exonhigh, exonlength, intronlength, + prev_exonlow = 0, prev_exonhigh = 0; + + /* Print header */ + p = annot; + while (*p != '\0' && *p != '\n') { + putchar(*p); + p++; + } + if (*p == '\n') p++; + printf("\n"); + + while (*p != '\0') { + if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) { + fprintf(stderr,"Can't parse exon coordinates in %s\n",p); + abort(); + } else { + if (exonstart <= exonend) { + exonlow = exonstart; + exonhigh = exonend; + if (prev_exonhigh > 0) { + intronlength = exonlow - prev_exonhigh - 1; + gbuffer = (char *) CALLOC(intronlength+1,sizeof(char)); + Genome_fill_buffer_simple(genome,/*left*/chroffset-1U+(prev_exonhigh+1), + intronlength,gbuffer); + printf("%s\n",gbuffer); + FREE(gbuffer); + } + exonlength = exonhigh - exonlow + 1; + gbuffer = (char *) CALLOC(exonlength+1,sizeof(char)); + Genome_fill_buffer_simple(genome,/*left*/chroffset-1U+exonlow,exonlength,gbuffer); + printf("%s\n",gbuffer); + FREE(gbuffer); + + } else { + exonlow = exonend; + exonhigh = exonstart; + if (prev_exonlow > 0) { + intronlength = prev_exonlow - 1 - exonhigh; + gbuffer = (char *) CALLOC(intronlength+1,sizeof(char)); + Genome_fill_buffer_simple(genome,/*left*/chroffset-1U+(exonhigh+1), + intronlength,gbuffer); + make_complement_inplace(gbuffer,intronlength); + printf("%s\n",gbuffer); + FREE(gbuffer); + } + exonlength = exonhigh - exonlow + 1; + gbuffer = (char *) CALLOC(exonlength+1,sizeof(char)); + Genome_fill_buffer_simple(genome,/*left*/chroffset-1U+exonlow,exonlength,gbuffer); + make_complement_inplace(gbuffer,exonlength); + printf("%s\n",gbuffer); + FREE(gbuffer); + } + + prev_exonlow = exonlow; + prev_exonhigh = exonhigh; + } + + while (*p != '\0' && *p != '\n') p++; + if (*p == '\n') p++; + } + + return; +} + + +static void genemap_print_unique (char *annot, Intlist_T unique_positions, Intlist_T unique_splicep, bool print_geneline_p) { char *p; Chrpos_T exonstart, exonend; @@ -1028,7 +1101,7 @@ print_interval (char *divstring, int index, IIT_T iit, int ndivs, Univ_IIT_T chr Univcoord_T chroffset; Intlist_T unique_positions, unique_splicep; - if (exonsp == true || sequencep == true || uniquep == true) { + if (exonsp == true || sequencep == true || genome_sequence_p == true || uniquep == true) { label = IIT_label(iit,index,&allocp); printf(">%s ",label); if (allocp == true) { @@ -1058,6 +1131,8 @@ print_interval (char *divstring, int index, IIT_T iit, int ndivs, Univ_IIT_T chr genemap_print_exons(annotation,chroffset,genome); } else if (sequencep == true) { genemap_print_sequence(annotation,chroffset,genome); + } else if (genome_sequence_p == true) { + genemap_print_gsequence(annotation,chroffset,genome); } else if (uniquep == true) { unique_positions = IIT_unique_positions(iit,index,divno); unique_splicep = IIT_unique_splicep(iit,index,divno); @@ -1178,6 +1253,9 @@ main (int argc, char *argv[]) { print_program_usage(); exit(0); + } else if (!strcmp(long_name,"gsequence")) { + genome_sequence_p = true; + } else if (!strcmp(long_name,"nunique")) { uniquep = true; @@ -1317,7 +1395,7 @@ main (int argc, char *argv[]) { } FREE(mapdir); - if (sequencep == true || exonsp == true) { + if (sequencep == true || genome_sequence_p == true || exonsp == true) { /* User is requesting all sequences or exons in the map file */ if ((map_iit = IIT_read(iitfile,/*name*/NULL,true,/*divread*/READ_ALL,/*divstring*/NULL, /*add_iit_p*/true)) == NULL) { @@ -1606,7 +1684,7 @@ main (int argc, char *argv[]) { exit(9); } - if (exonsp == true || sequencep == true) { + if (exonsp == true || sequencep == true || genome_sequence_p == true) { if (snps_root == NULL || print_snps_mode == 0) { genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS, uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false); diff --git a/src/gmapindex.c b/src/gmapindex.c index 15bce5e..12f3d82 100644 --- a/src/gmapindex.c +++ b/src/gmapindex.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: gmapindex.c 186742 2016-03-31 00:44:01Z twu $"; +static char rcsid[] = "$Id: gmapindex.c 209605 2017-09-01 21:52:54Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -1766,7 +1766,7 @@ main (int argc, char *argv[]) { genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS, /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false); - Sarray_write_array(sarrayfile,genomecomp,genomelength); + Sarray_write_array(sarrayfile,genomecomp,(UINT4) genomelength); /* Bucket array */ #ifdef USE_SEPARATE_BUCKETS @@ -1780,7 +1780,7 @@ main (int argc, char *argv[]) { sprintf(indexjcompfile,"%s/%s.saindexj64strm",destdir,fileroot); Sarray_write_index_separate(indexiptrsfile,indexicompfile,indexjptrsfile,indexjcompfile, - sarrayfile,genomecomp,genomelength,/*compressp*/true); + sarrayfile,genomecomp,(UINT4) genomelength,/*compressp*/true); FREE(indexjcompfile); FREE(indexjptrsfile); FREE(indexicompfile); @@ -1791,7 +1791,7 @@ main (int argc, char *argv[]) { indexijcompfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".saindex64strm")+1,sizeof(char)); sprintf(indexijcompfile,"%s/%s.saindex64strm",destdir,fileroot); Sarray_write_index_interleaved(indexijptrsfile,indexijcompfile, - sarrayfile,genomecomp,genomelength,/*compressp*/true, + sarrayfile,genomecomp,(UINT4) genomelength,/*compressp*/true, CHARTABLE); FREE(indexijcompfile); FREE(indexijptrsfile); @@ -1842,7 +1842,7 @@ main (int argc, char *argv[]) { permuted_sarray_file = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".permuted_sarray")+1,sizeof(char)); sprintf(permuted_sarray_file,"%s/%s.permuted_sarray",destdir,fileroot); #if 0 - lcp = Sarray_compute_lcp(rankfile,permuted_sarray_file,sarrayfile,n); + lcp = Sarray_compute_lcp(rankfile,permuted_sarray_file,sarrayfile,(UINT4) n); FREE(permuted_sarray_file); FREE(rankfile); @@ -1854,13 +1854,13 @@ main (int argc, char *argv[]) { lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".salcpguide1024")+1,sizeof(char)); sprintf(lcpguidefile,"%s/%s.salcpguide1024",destdir,fileroot); - lcp_bytes = Bytecoding_write_exceptions_only(lcpexcfile,lcpguidefile,lcp,genomelength,/*guide_interval*/1024); + lcp_bytes = Bytecoding_write_exceptions_only(lcpexcfile,lcpguidefile,lcp,(UINT4) genomelength,/*guide_interval*/1024); FREE(lcpguidefile); FREE(lcpexcfile); FREE(lcp); /* Use lcp_bytes, which are more memory-efficient than lcp */ #else - lcp_bytes = Sarray_compute_lcp_bytes(&lcp_exceptions,&n_lcp_exceptions,rankfile,permuted_sarray_file,sarrayfile,n); + lcp_bytes = Sarray_compute_lcp_bytes(&lcp_exceptions,&n_lcp_exceptions,rankfile,permuted_sarray_file,sarrayfile,(UINT4) n); FREE(permuted_sarray_file); FREE(rankfile); @@ -1872,7 +1872,7 @@ main (int argc, char *argv[]) { lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".salcpguide1024")+1,sizeof(char)); sprintf(lcpguidefile,"%s/%s.salcpguide1024",destdir,fileroot); Bytecoding_write_bytes(/*bytesfile*/NULL,lcpexcfile,lcpguidefile,lcp_bytes,lcp_exceptions,n_lcp_exceptions, - genomelength,/*guide_interval*/1024); + (UINT4) genomelength,/*guide_interval*/1024); FREE(lcpguidefile); FREE(lcpexcfile); #endif @@ -1891,7 +1891,7 @@ main (int argc, char *argv[]) { /* Compute discriminating chars (DC) array */ discrim_chars = Sarray_discriminating_chars(&nbytes,sarrayfile,genomecomp,lcp_bytes,lcp_guide, - lcp_exceptions,/*guide_interval*/1024,n,CHARTABLE); + lcp_exceptions,/*guide_interval*/1024,(UINT4) n,CHARTABLE); FREE(sarrayfile); Genome_free(&genomecomp); /* No need to munmap SA anymore */ @@ -1901,7 +1901,7 @@ main (int argc, char *argv[]) { #if 0 /* Compute child array (relative values) */ - child = Sarray_compute_child(lcp_bytes,lcp_guide,lcp_exceptions,n); + child = Sarray_compute_child(lcp_bytes,lcp_guide,lcp_exceptions,(UINT4) n); FREE(lcp_exceptions); FREE(lcp_guide); @@ -1913,7 +1913,7 @@ main (int argc, char *argv[]) { childguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".sachildguide1024")+1,sizeof(char)); sprintf(childguidefile,"%s/%s.sachildguide1024",destdir,fileroot); Bytecoding_write_lcpchilddc(lcpchilddcfile,childexcfile,childguidefile,child, - discrim_chars,lcp_bytes,genomelength,/*guide_interval*/1024); + discrim_chars,lcp_bytes,(UINT4) genomelength,/*guide_interval*/1024); FREE(childguidefile); FREE(childexcfile); FREE(lcpchilddcfile); @@ -1921,7 +1921,7 @@ main (int argc, char *argv[]) { FREE(child); #else /* Compute child array (relative values, directly to bytes and exceptions) */ - child_bytes = Sarray_compute_child_bytes(&child_exceptions,&n_child_exceptions,lcp_bytes,lcp_guide,lcp_exceptions,n); + child_bytes = Sarray_compute_child_bytes(&child_exceptions,&n_child_exceptions,lcp_bytes,lcp_guide,lcp_exceptions,(UINT4) n); FREE(lcp_exceptions); FREE(lcp_guide); @@ -1930,7 +1930,7 @@ main (int argc, char *argv[]) { childguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".sachildguide1024")+1,sizeof(char)); sprintf(childguidefile,"%s/%s.sachildguide1024",destdir,fileroot); Bytecoding_write_bytes(/*bytesfile*/NULL,childexcfile,childguidefile,child_bytes,child_exceptions,n_child_exceptions, - genomelength,/*guide_interval*/1024); + (UINT4) genomelength,/*guide_interval*/1024); FREE(childguidefile); FREE(childexcfile); FREE(child_exceptions); @@ -1938,7 +1938,7 @@ main (int argc, char *argv[]) { /* Write combined lcpchilddc file */ lcpchilddcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".salcpchilddc")+1,sizeof(char)); sprintf(lcpchilddcfile,"%s/%s.salcpchilddc",destdir,fileroot); - Bytecoding_interleave_lcpchilddc(lcpchilddcfile,child_bytes,discrim_chars,lcp_bytes,genomelength); + Bytecoding_interleave_lcpchilddc(lcpchilddcfile,child_bytes,discrim_chars,lcp_bytes,(UINT4) genomelength); FREE(lcpchilddcfile); FREE(child_bytes); #endif @@ -2014,7 +2014,7 @@ main (int argc, char *argv[]) { genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS, /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false); Sarray_write_csa(csaptrfiles,csacompfiles,sasampleqfile,sasamplesfile,saindex0file, - sarrayfile,rankfile,genomecomp,genomelength,CHARTABLE); + sarrayfile,rankfile,genomecomp,(UINT4) genomelength,CHARTABLE); FREE(genomecomp); remove(rankfile); /* Need to delete remove(rankfile) from Sarray_compute_lcp */ @@ -2096,7 +2096,7 @@ main (int argc, char *argv[]) { FREE(childexcfile); Sarray_child_uncompress(genomecomp,lcpchilddc,lcp_guide,lcp_exceptions,n_lcp_exceptions, - child_guide,child_exceptions,n_child_exceptions,SA,genomelength,start,end); + child_guide,child_exceptions,n_child_exceptions,SA,(UINT4) genomelength,start,end); FREE(child_exceptions); /* Not using shared */ FREE(child_guide); /* Not using shared */ diff --git a/src/sarray-search.c b/src/sarray-search.c index a614cf4..37b7bca 100644 --- a/src/sarray-search.c +++ b/src/sarray-search.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: sarray-search.c 209125 2017-08-15 19:33:55Z twu $"; +static char rcsid[] = "$Id: sarray-search.c 209644 2017-09-05 17:41:06Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -4341,10 +4341,15 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T right_diagonals = List_push(right_diagonals,(void *) diagonal); } else if (elt->querystart_leftward < elt->queryend_leftward) { for (j = elt->npositions - 1; j >= 0; --j) { /* Go in this order to avoid reversing list at the end */ - debug13(printf("Creating right diagonal: query %d..%d (leftward %d..%d), diagonal %u\n", - elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset)); - right_diagonals = List_push(right_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward, - /*univdiagonal*/elt->positions[j])); + if (elt->querystart_leftward <= (*middle_diagonal)->querystart) { + debug13(printf("Not creating right diagonal that extends left of middle diagonal: query %d..%d (leftward %d..%d), diagonal %u\n", + elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset)); + } else { + debug13(printf("Creating right diagonal: query %d..%d (leftward %d..%d), diagonal %u\n", + elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset)); + right_diagonals = List_push(right_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward, + /*univdiagonal*/elt->positions[j])); + } } } if (elt->temporaryp == true) { @@ -4368,10 +4373,15 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T left_diagonals = List_push(left_diagonals,(void *) diagonal); } else if (elt->querystart_leftward < elt->queryend_leftward) { for (j = 0; j < elt->npositions; j++) { /* Go in this order to avoid reversing list at the end */ - debug13(printf("Creating left diagonal: query %d..%d (leftward %d..%d), diagonal %u\n", - elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset)); - left_diagonals = List_push(left_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward, - /*univdiagonal*/elt->positions[j])); + if (elt->queryend_leftward >= (*middle_diagonal)->queryend) { + debug13(printf("Not creating left diagonal that extends right of middle diagonal: query %d..%d (leftward %d..%d), diagonal %u\n", + elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset)); + } else { + debug13(printf("Creating left diagonal: query %d..%d (leftward %d..%d), diagonal %u\n", + elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset)); + left_diagonals = List_push(left_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward, + /*univdiagonal*/elt->positions[j])); + } } } if (elt->temporaryp == true) { @@ -5431,24 +5441,26 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T #endif /* SUBDIVIDE_ENDS */ + /* C5. Process left diagonals in reverse */ diagonal_path = (List_T) NULL; - /* C5. Process left diagonals in reverse */ while (common_diagonal != NULL) { - diagonal_path = List_push(diagonal_path,(void *) common_diagonal); - common_diagonal = common_diagonal->prev; - } - /* Pops off in reverse */ - for (p = diagonal_path; p != NULL; p = List_next(p)) { - diagonal = (Univdiag_T) List_head(p); if (middle_diagonal->univdiagonal > chroffset + chrlength && common_diagonal->univdiagonal < chroffset + chrlength) { debug13(printf("Cannot handle common diagonal across circular origin: query %d..%d, diagonal %u\n", diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset)); } else { + diagonal_path = List_push(diagonal_path,(void *) common_diagonal); debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n", - diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset)); - middle_path = List_push(middle_path,(void *) diagonal); + common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal - chroffset)); } + common_diagonal = common_diagonal->prev; + } + + /* Pops off in reverse */ + debug13(printf("Putting common diagonals (length %d) onto middle\n",List_length(diagonal_path))); + for (p = diagonal_path; p != NULL; p = List_next(p)) { + diagonal = (Univdiag_T) List_head(p); + middle_path = List_push(middle_path,(void *) diagonal); } List_free(&diagonal_path); @@ -7483,75 +7495,75 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); /* *chrhigh += 1U; */ } - middle_path_plus[i] = find_best_path(&(right_paths_plus[i]),&right_endpoints_sense,&right_endpoints_antisense, - &right_queryends_sense,&right_queryends_antisense, - &right_ambcoords_sense,&right_ambcoords_antisense, - &right_amb_knowni_sense,&right_amb_knowni_antisense, - &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense, - &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense, - &right_amb_probsi_sense,&right_amb_probsi_antisense, - &right_amb_probsj_sense,&right_amb_probsj_antisense, - &(left_paths_plus[i]),&left_endpoints_sense,&left_endpoints_antisense, - &left_querystarts_sense,&left_querystarts_antisense, - &left_ambcoords_sense,&left_ambcoords_antisense, - &left_amb_knowni_sense,&left_amb_knowni_antisense, - &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense, - &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense, - &left_amb_probsi_sense,&left_amb_probsi_antisense, - &left_amb_probsj_sense,&left_amb_probsj_antisense, - &(fillin_diagonals_plus[i]),diagonal,best_right_diagonals_plus[i],best_left_diagonals_plus[i], - querylength,query_compress_fwd,chroffset,chrlength, - /*plusp*/true,genestrand,/*nmismatches_allowed*/nmisses_allowed); - - hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_plus[i], - right_endpoints_sense,right_endpoints_antisense, - right_queryends_sense,right_queryends_antisense, - right_ambcoords_sense,right_ambcoords_antisense, - right_amb_knowni_sense,right_amb_knowni_antisense, - right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense, - right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense, - right_amb_probsi_sense,right_amb_probsi_antisense, - right_amb_probsj_sense,right_amb_probsj_antisense, - - left_endpoints_sense,left_endpoints_antisense, - left_querystarts_sense,left_querystarts_antisense, - left_ambcoords_sense,left_ambcoords_antisense, - left_amb_knowni_sense,left_amb_knowni_antisense, - left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense, - left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense, - left_amb_probsi_sense,left_amb_probsi_antisense, - left_amb_probsj_sense,left_amb_probsj_antisense, - - chrnum,chroffset,chrhigh,chrlength, - querylength,query_compress_fwd,/*plusp*/true,genestrand, - /*max_mismatches_allowed*/nmisses_allowed); + if ((middle_path_plus[i] = find_best_path(&(right_paths_plus[i]),&right_endpoints_sense,&right_endpoints_antisense, + &right_queryends_sense,&right_queryends_antisense, + &right_ambcoords_sense,&right_ambcoords_antisense, + &right_amb_knowni_sense,&right_amb_knowni_antisense, + &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense, + &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense, + &right_amb_probsi_sense,&right_amb_probsi_antisense, + &right_amb_probsj_sense,&right_amb_probsj_antisense, + &(left_paths_plus[i]),&left_endpoints_sense,&left_endpoints_antisense, + &left_querystarts_sense,&left_querystarts_antisense, + &left_ambcoords_sense,&left_ambcoords_antisense, + &left_amb_knowni_sense,&left_amb_knowni_antisense, + &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense, + &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense, + &left_amb_probsi_sense,&left_amb_probsi_antisense, + &left_amb_probsj_sense,&left_amb_probsj_antisense, + &(fillin_diagonals_plus[i]),diagonal,best_right_diagonals_plus[i],best_left_diagonals_plus[i], + querylength,query_compress_fwd,chroffset,chrlength, + /*plusp*/true,genestrand,/*nmismatches_allowed*/nmisses_allowed)) != NULL) { + hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_plus[i], + right_endpoints_sense,right_endpoints_antisense, + right_queryends_sense,right_queryends_antisense, + right_ambcoords_sense,right_ambcoords_antisense, + right_amb_knowni_sense,right_amb_knowni_antisense, + right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense, + right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense, + right_amb_probsi_sense,right_amb_probsi_antisense, + right_amb_probsj_sense,right_amb_probsj_antisense, + + left_endpoints_sense,left_endpoints_antisense, + left_querystarts_sense,left_querystarts_antisense, + left_ambcoords_sense,left_ambcoords_antisense, + left_amb_knowni_sense,left_amb_knowni_antisense, + left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense, + left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense, + left_amb_probsi_sense,left_amb_probsi_antisense, + left_amb_probsj_sense,left_amb_probsj_antisense, + + chrnum,chroffset,chrhigh,chrlength, + querylength,query_compress_fwd,/*plusp*/true,genestrand, + /*max_mismatches_allowed*/nmisses_allowed); #if 0 - if (0 && completep == false) { - *sarray_gmap = run_gmap_plus(*sarray_gmap,middle_path_plus[i],/*start_paths*/left_paths_plus[i],/*end_paths*/right_paths_plus[i], - chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength, - genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, - oligoindices_minor,diagpool,cellpool); + if (0 && completep == false) { + *sarray_gmap = run_gmap_plus(*sarray_gmap,middle_path_plus[i],/*start_paths*/left_paths_plus[i],/*end_paths*/right_paths_plus[i], + chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength, + genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); } #endif - Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense); - Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense); - Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense); - Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense); - Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense); - Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense); - Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense); - Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense); + Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense); + Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense); + Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense); + Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense); + Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense); + Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense); + Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense); + Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense); - Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense); - Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense); - Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense); - Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense); - Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense); - Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense); - Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense); - Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense); + Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense); + Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense); + Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense); + Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense); + Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense); + Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense); + Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense); + Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense); + } } } @@ -7566,76 +7578,75 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); /* *chrhigh += 1U; */ } - middle_path_minus[i] = find_best_path(&(right_paths_minus[i]),&right_endpoints_sense,&right_endpoints_antisense, - &right_queryends_sense,&right_queryends_antisense, - &right_ambcoords_sense,&right_ambcoords_antisense, - &right_amb_knowni_sense,&right_amb_knowni_antisense, - &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense, - &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense, - &right_amb_probsi_sense,&right_amb_probsi_antisense, - &right_amb_probsj_sense,&right_amb_probsj_antisense, - &(left_paths_minus[i]),&left_endpoints_sense,&left_endpoints_antisense, - &left_querystarts_sense,&left_querystarts_antisense, - &left_ambcoords_sense,&left_ambcoords_antisense, - &left_amb_knowni_sense,&left_amb_knowni_antisense, - &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense, - &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense, - &left_amb_probsi_sense,&left_amb_probsi_antisense, - &left_amb_probsj_sense,&left_amb_probsj_antisense, - &(fillin_diagonals_minus[i]),diagonal,best_right_diagonals_minus[i],best_left_diagonals_minus[i], - querylength,query_compress_rev,chroffset,chrlength, - /*plusp*/false,genestrand,/*nmismatches_allowed*/nmisses_allowed); - - hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_minus[i], - right_endpoints_sense,right_endpoints_antisense, - right_queryends_sense,right_queryends_antisense, - right_ambcoords_sense,right_ambcoords_antisense, - right_amb_knowni_sense,right_amb_knowni_antisense, - right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense, - right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense, - right_amb_probsi_sense,right_amb_probsi_antisense, - right_amb_probsj_sense,right_amb_probsj_antisense, - - left_endpoints_sense,left_endpoints_antisense, - left_querystarts_sense,left_querystarts_antisense, - left_ambcoords_sense,left_ambcoords_antisense, - left_amb_knowni_sense,left_amb_knowni_antisense, - left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense, - left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense, - left_amb_probsi_sense,left_amb_probsi_antisense, - left_amb_probsj_sense,left_amb_probsj_antisense, + if ((middle_path_minus[i] = find_best_path(&(right_paths_minus[i]),&right_endpoints_sense,&right_endpoints_antisense, + &right_queryends_sense,&right_queryends_antisense, + &right_ambcoords_sense,&right_ambcoords_antisense, + &right_amb_knowni_sense,&right_amb_knowni_antisense, + &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense, + &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense, + &right_amb_probsi_sense,&right_amb_probsi_antisense, + &right_amb_probsj_sense,&right_amb_probsj_antisense, + &(left_paths_minus[i]),&left_endpoints_sense,&left_endpoints_antisense, + &left_querystarts_sense,&left_querystarts_antisense, + &left_ambcoords_sense,&left_ambcoords_antisense, + &left_amb_knowni_sense,&left_amb_knowni_antisense, + &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense, + &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense, + &left_amb_probsi_sense,&left_amb_probsi_antisense, + &left_amb_probsj_sense,&left_amb_probsj_antisense, + &(fillin_diagonals_minus[i]),diagonal,best_right_diagonals_minus[i],best_left_diagonals_minus[i], + querylength,query_compress_rev,chroffset,chrlength, + /*plusp*/false,genestrand,/*nmismatches_allowed*/nmisses_allowed)) != NULL) { + hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_minus[i], + right_endpoints_sense,right_endpoints_antisense, + right_queryends_sense,right_queryends_antisense, + right_ambcoords_sense,right_ambcoords_antisense, + right_amb_knowni_sense,right_amb_knowni_antisense, + right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense, + right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense, + right_amb_probsi_sense,right_amb_probsi_antisense, + right_amb_probsj_sense,right_amb_probsj_antisense, + + left_endpoints_sense,left_endpoints_antisense, + left_querystarts_sense,left_querystarts_antisense, + left_ambcoords_sense,left_ambcoords_antisense, + left_amb_knowni_sense,left_amb_knowni_antisense, + left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense, + left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense, + left_amb_probsi_sense,left_amb_probsi_antisense, + left_amb_probsj_sense,left_amb_probsj_antisense, - chrnum,chroffset,chrhigh,chrlength, - querylength,query_compress_rev,/*plusp*/false,genestrand, - /*max_mismatches_allowed*/nmisses_allowed); + chrnum,chroffset,chrhigh,chrlength, + querylength,query_compress_rev,/*plusp*/false,genestrand, + /*max_mismatches_allowed*/nmisses_allowed); #if 0 - if (0 && completep == false) { - *sarray_gmap = run_gmap_minus(*sarray_gmap,middle_path_minus[i],/*start_paths*/right_paths_minus[i],/*end_paths*/left_paths_minus[i], - chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength, - genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, - oligoindices_minor,diagpool,cellpool); - } -#endif - - Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense); - Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense); - Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense); - Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense); - Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense); - Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense); - Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense); - Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense); - - Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense); - Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense); - Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense); - Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense); - Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense); - Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense); - Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense); - Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense); - + if (0 && completep == false) { + *sarray_gmap = run_gmap_minus(*sarray_gmap,middle_path_minus[i],/*start_paths*/right_paths_minus[i],/*end_paths*/left_paths_minus[i], + chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength, + genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + } +#endif + + Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense); + Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense); + Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense); + Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense); + Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense); + Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense); + Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense); + Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense); + + Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense); + Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense); + Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense); + Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense); + Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense); + Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense); + Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense); + Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense); + } } } diff --git a/src/stage1hr.c b/src/stage1hr.c index 790be8c..9768290 100644 --- a/src/stage1hr.c +++ b/src/stage1hr.c @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage1hr.c 209122 2017-08-15 19:29:33Z twu $"; +static char rcsid[] = "$Id: stage1hr.c 209609 2017-09-01 21:57:26Z twu $"; #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -22846,7 +22846,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c distantsplicing5 = (List_T) NULL; distantsplicing3 = (List_T) NULL; - if (max_splice_mismatches_5 >= 0) { + if (nmismatches <= max_splice_mismatches_5) { debug4e(printf("Sorting splice ends\n")); startfrags_plus_5[nmismatches] = Substring_sort_siteN_halves(startfrags_plus_5[nmismatches],/*ascendingp*/true); endfrags_plus_5[nmismatches] = Substring_sort_siteN_halves(endfrags_plus_5[nmismatches],/*ascendingp*/true); @@ -22881,7 +22881,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c } } - if (max_splice_mismatches_3 >= 0) { + if (nmismatches <= max_splice_mismatches_3) { debug4e(printf("Sorting splice ends\n")); startfrags_plus_3[nmismatches] = Substring_sort_siteN_halves(startfrags_plus_3[nmismatches],/*ascendingp*/true); endfrags_plus_3[nmismatches] = Substring_sort_siteN_halves(endfrags_plus_3[nmismatches],/*ascendingp*/true); -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
