This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository clustalo.
commit bacfc626b5362497d5be8f54cb866d70cbdff095 Author: Andreas Tille <[email protected]> Date: Mon Jul 4 11:14:49 2016 +0200 Imported Upstream version 1.2.2 --- Doxyfile | 2 +- INSTALL | 64 ++++ configure | 20 +- configure.ac | 6 +- src/clustal-omega-config.h | 10 +- src/clustal-omega.c | 78 ++++- src/clustal-omega.h | 2 + src/clustal/hhalign_wrapper.c | 171 ++++++++-- src/clustal/ktuple_pair.c | 8 +- src/clustal/mbed.c | 11 +- src/clustal/pair_dist.c | 38 ++- src/clustal/pair_dist.h | 6 +- src/clustal/seq.c | 106 ++++++- src/clustal/seq.h | 8 +- src/clustal/symmatrix.c | 48 ++- src/hhalign/hash-C.h | 6 +- src/hhalign/hhalign.cpp | 12 +- src/hhalign/hhalign.h | 4 +- src/hhalign/hhalignment-C.h | 687 ++++++++++++++++++++++++---------------- src/hhalign/hhfullalignment-C.h | 3 +- src/hhalign/hhfunc-C.h | 7 +- src/hhalign/hhhalfalignment-C.h | 3 +- src/hhalign/hhhit-C.h | 7 +- src/hhalign/hhhitlist-C.h | 8 +- src/hhalign/hhhmm-C.h | 3 +- src/hhalign/util-C.h | 3 +- src/mymain.c | 22 +- src/squid/a2m.c | 9 +- src/squid/msa.c | 78 ++++- src/squid/msa.h | 7 +- src/squid/sqio.c | 21 +- src/squid/squid.h | 3 + src/squid/stockholm.c | 99 +++++- 33 files changed, 1160 insertions(+), 400 deletions(-) diff --git a/Doxyfile b/Doxyfile index 6a5c02f..0c9b5c9 100644 --- a/Doxyfile +++ b/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = "Clustal Omega" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.2.1 +PROJECT_NUMBER = 1.2.2 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/INSTALL b/INSTALL index ede75b5..b7c0645 100644 --- a/INSTALL +++ b/INSTALL @@ -282,6 +282,70 @@ not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common + + On Windows do + + 1. Preparation + + 1.1. Install free MinGW64 on Windows 7. Download + mingw-w64-bin_x86_64-mingw_20111101_sezero.zip from + http://sourceforge.net/projects/mingw-w64/files/Toolchains + targetting Win64/Personal Builds/sezero_4.5_20111101/, extract + it, move mingw64 folder to C:\ and rename it to mingw. MinGW64 + provides tools to develop 64-bit Windows applications using + gcc and g++. With MinGW64, some software developed for Linux + platform can be built on Windows. + + 1.2. There is a file named pthreads-w64.zip in C:\mingw + folder. Extract it under C:\mingw folder. + + 1.3. Download MSYS-1.0.11.exe from + http://sourceforge.net/projects/mingw/files/MSYS/Base/msys-core/msys-1.0.11/ + and install it. + + 1.4. Download Clustal Omega source from + http://www.clustal.org/omega/clustal-omega-x.x.x.tar.gz (where + x.x.x is the current version) + + 1.5. Copy downloaded file to MSYS. If you installed MSYS in + C:\msys and your account is Administrator, copy it to + C:\msys\1.0\home\Administrator. You can do it using Windows + explorer. + + 1.6. Download argtable2-13.tar.gz from + http://argtable.sourceforge.net/ and copy it to MSYS. This is + required by Clustal Omega. + + 2. Configuration and Build process + + 2.1. Launch MSYS and extract argtable2 source as tar xfz + argtable2-13.tar.gz. + + 2.2. Extract Clustal Omega source as tar xfz + clustal-omega-1.2.0.tar.gz. + + 2.3. cd argtable2-13; ./configure; make; make install + + 2.4. cd ~/clustal-omega-1.2.0 + + 2.5. ./configure CFLAGS='-I/usr/local/include + -DSRE_STRICT_ANSI' LDFLAGS='-L/usr/local/lib' + + 2.6. make; make install + + 2.7. You can find clustalo.exe in /usr/local/bin folder which + is C:\msys\1.0\local/bin. + + 2.8. Following DLLs are necessary to run clustalo.exe. Put + them in the same folder where clustalo.exe + exists. C:\mingw\bin\libcc_sjlj-1.dll, + C:\mingw\bin\libgomp-1.dll, C:\mingw\bin\libstdc++-6.dll, + C:\mingw\bin\pthreadGC2-w64.dll + + (lifted from + http://www.blaststation.com/freestuff/en/howtoBuildx64ClustalO.php, + as of 2014-10-14) + Specifying the System Type ========================== diff --git a/configure b/configure index 2bc6c4e..b2a728b 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.63 for Clustal Omega 1.2.1. +# Generated by GNU Autoconf 2.63 for Clustal Omega 1.2.2. # # Report bugs to <[email protected]>. # @@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='Clustal Omega' PACKAGE_TARNAME='clustal-omega' -PACKAGE_VERSION='1.2.1' -PACKAGE_STRING='Clustal Omega 1.2.1' +PACKAGE_VERSION='1.2.2' +PACKAGE_STRING='Clustal Omega 1.2.2' PACKAGE_BUGREPORT='[email protected]' # Factoring default headers for most tests. @@ -1483,7 +1483,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Clustal Omega 1.2.1 to adapt to many kinds of systems. +\`configure' configures Clustal Omega 1.2.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1553,7 +1553,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Clustal Omega 1.2.1:";; + short | recursive ) echo "Configuration of Clustal Omega 1.2.2:";; esac cat <<\_ACEOF @@ -1659,7 +1659,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Clustal Omega configure 1.2.1 +Clustal Omega configure 1.2.2 generated by GNU Autoconf 2.63 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1673,7 +1673,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Clustal Omega $as_me 1.2.1, which was +It was created by Clustal Omega $as_me 1.2.2, which was generated by GNU Autoconf 2.63. Invocation command line was $ $0 $@ @@ -2492,7 +2492,7 @@ fi # Define the identity of the package. PACKAGE='clustal-omega' - VERSION='1.2.1' + VERSION='1.2.2' cat >>confdefs.h <<_ACEOF @@ -21790,7 +21790,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Clustal Omega $as_me 1.2.1, which was +This file was extended by Clustal Omega $as_me 1.2.2, which was generated by GNU Autoconf 2.63. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -21853,7 +21853,7 @@ Report bugs to <[email protected]>." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_version="\\ -Clustal Omega config.status 1.2.1 +Clustal Omega config.status 1.2.2 configured by $0, generated by GNU Autoconf 2.63, with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index d825cb8..aa10e17 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,6 @@ # configure.ac for Clustal Omega # -# RCS $Id: configure.ac 292 2014-02-28 14:26:55Z fabian $ +# RCS $Id: configure.ac 310 2016-06-13 14:11:35Z fabian $ # release @@ -26,7 +26,9 @@ #PACKAGE_CODENAME="FilumVitae" #AC_INIT([Clustal Omega], [1.2.0], [[email protected]]) #PACKAGE_CODENAME="AndreaGiacomo" -AC_INIT([Clustal Omega], [1.2.1], [[email protected]]) +#AC_INIT([Clustal Omega], [1.2.1], [[email protected]]) +#PACKAGE_CODENAME="AndreaGiacomo" +AC_INIT([Clustal Omega], [1.2.2], [[email protected]]) PACKAGE_CODENAME="AndreaGiacomo" # The AC_INIT macro can take any source file as an argument. It just diff --git a/src/clustal-omega-config.h b/src/clustal-omega-config.h index 300343a..ba86373 100644 --- a/src/clustal-omega-config.h +++ b/src/clustal-omega-config.h @@ -199,7 +199,9 @@ /* #undef MINGW */ /* No-debug Mode */ -/* #undef NDEBUG */ +#ifndef CLUSTAL_OMEGA_NDEBUG +#define CLUSTAL_OMEGA_NDEBUG /**/ +#endif /* Some strange OS */ /* #undef OTHEROS */ @@ -226,7 +228,7 @@ /* Define to the full name and version of this package. */ #ifndef CLUSTAL_OMEGA_PACKAGE_STRING -#define CLUSTAL_OMEGA_PACKAGE_STRING "Clustal Omega 1.2.1" +#define CLUSTAL_OMEGA_PACKAGE_STRING "Clustal Omega 1.2.2" #endif /* Define to the one symbol short name of this package. */ @@ -236,7 +238,7 @@ /* Define to the version of this package. */ #ifndef CLUSTAL_OMEGA_PACKAGE_VERSION -#define CLUSTAL_OMEGA_PACKAGE_VERSION "1.2.1" +#define CLUSTAL_OMEGA_PACKAGE_VERSION "1.2.2" #endif /* The size of `fpos_t', as computed by sizeof. */ @@ -299,7 +301,7 @@ /* Version number of package */ #ifndef CLUSTAL_OMEGA_VERSION -#define CLUSTAL_OMEGA_VERSION "1.2.1" +#define CLUSTAL_OMEGA_VERSION "1.2.2" #endif /* Define if using the dmalloc debugging malloc package */ diff --git a/src/clustal-omega.c b/src/clustal-omega.c index 8fff342..f6ccfd2 100644 --- a/src/clustal-omega.c +++ b/src/clustal-omega.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: clustal-omega.c 290 2013-09-20 15:18:12Z fabian $ + * RCS $Id: clustal-omega.c 304 2016-06-13 13:39:13Z fabian $ */ #ifdef HAVE_CONFIG_H @@ -207,7 +207,8 @@ SetDefaultAlnOpts(opts_t *prOpts) { prOpts->ppcHMMInput = NULL; prOpts->iHMMInputFiles = 0; - + prOpts->pcHMMBatch = NULL; /* FS, r291 -> */ + prOpts->iNumIterations = 0; prOpts->bIterationsAuto = FALSE; prOpts->iMaxGuidetreeIterations = INT_MAX; @@ -958,7 +959,7 @@ SetAutoOptions(opts_t *prOpts, int iNumSeq) { int Align(mseq_t *prMSeq, mseq_t *prMSeqProfile, - opts_t *prOpts) { + opts_t *prOpts) { /* Note DEVEL 291: at this stage pppcHMMBNames is set but ppiHMMBindex is not */ /* HMM */ @@ -980,6 +981,10 @@ Align(mseq_t *prMSeq, /* last dAlnScore for iteration */ double dLastAlnScore = -666.666; + /* HMM batch file */ + char **ppcHMMbatch = NULL; /* names of unique HMM files */ + int iHMMbatch = 0; /* number of unique HMM files */ + int i, j; /* aux */ assert(NULL != prMSeq); @@ -1027,17 +1032,57 @@ Align(mseq_t *prMSeq, } - /* Read backgrounds HMMs and store in prHMMs + /* Read backgrounds HMMs and store in prHMMs (Devel 291) * */ - if (0 < prOpts->iHMMInputFiles) { + if (NULL != prOpts->pcHMMBatch){ + int i, j, k; + + for (i = 0; i < prMSeq->nseqs; i++){ + + if (NULL != prMSeq->pppcHMMBNames[i]){ + for (j = 0; NULL != prMSeq->pppcHMMBNames[i][j]; j++){ + + for (k = 0; k < iHMMbatch; k++){ + if (0 == strcmp(ppcHMMbatch[k], prMSeq->pppcHMMBNames[i][j])){ + prMSeq->ppiHMMBindex[i][j] = k; + break; /* HMM already registered */ + } + } /* went through HMM batch files already identified */ + if (k == iHMMbatch){ + FILE *pfHMM = NULL; + if (NULL == (pfHMM = fopen(prMSeq->pppcHMMBNames[i][j], "r"))){ + prMSeq->ppiHMMBindex[i][j] = -1; + Log(&rLog, LOG_WARN, "Background HMM %s for %s (%d/%d) does not exist", + prMSeq->pppcHMMBNames[i][j], prMSeq->sqinfo[i].name, i, j); + } + else { + fclose(pfHMM); pfHMM = NULL; + ppcHMMbatch = (char **)realloc(ppcHMMbatch, (iHMMbatch+1)*sizeof(char *)); + ppcHMMbatch[iHMMbatch] = strdup(prMSeq->pppcHMMBNames[i][j]); + prMSeq->ppiHMMBindex[i][j] = k; + iHMMbatch++; + } + } + + } /* j = 0; NULL != prMSeq->pppcHMMBNames[i][j] */ + } /* NULL != prMSeq->pppcHMMBNames[i] */ + else { + /* void */ + } + } /* 0 <= i < prMSeq->nseqs */ + + } /* there was a HMM batch file */ + + + if (0 < prOpts->iHMMInputFiles) { int iHMMInfileIndex; /** * @warning old structure used to be initialised like this: * hmm_light rHMM = {0}; */ - prHMMs = (hmm_light *) CKMALLOC(prOpts->iHMMInputFiles * sizeof(hmm_light)); + prHMMs = (hmm_light *) CKMALLOC( (prOpts->iHMMInputFiles) * sizeof(hmm_light)); for (iHMMInfileIndex=0; iHMMInfileIndex<prOpts->iHMMInputFiles; iHMMInfileIndex++) { char *pcHMMInput = prOpts->ppcHMMInput[iHMMInfileIndex]; @@ -1077,6 +1122,24 @@ Align(mseq_t *prMSeq, CKFREE(prOpts->ppcHMMInput); } /* there were background HMM files */ + /** read HMMs specific to individual sequences + */ + if (iHMMbatch > 0){ + int i; + + prHMMs = (hmm_light *) realloc( prHMMs, (prOpts->iHMMInputFiles + iHMMbatch + 1) * sizeof(hmm_light)); + + for (i = 0; i < iHMMbatch; i++){ + char *pcHMMInput = ppcHMMbatch[i]; + + if (OK != readHMMWrapper(&prHMMs[i + prOpts->iHMMInputFiles], pcHMMInput)){ + Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMMInput); + return -1; + } + + } /* 0 <= i < iHMMbatch */ + + } /* there were HMM batch files */ /* If the input ("non-profile") sequences are aligned, then turn @@ -1172,6 +1235,9 @@ Align(mseq_t *prMSeq, if (prOpts->iMaxHMMIterations < 0){ Log(&rLog, LOG_VERBOSE, "iMaxHMMIterations < 0 (%d), will not perform alignment", prOpts->iMaxHMMIterations); + if (NULL != piOrderLR){ + free(piOrderLR); piOrderLR = NULL; + } return 0; } diff --git a/src/clustal-omega.h b/src/clustal-omega.h index e6360db..d0d483c 100644 --- a/src/clustal-omega.h +++ b/src/clustal-omega.h @@ -121,6 +121,8 @@ typedef struct { /** number of provided HMM input files. not really a user option but need for ppcHMMInput */ int iHMMInputFiles; + /** HMM batch-file, specify HMMs for individual sequences. FS, r291 -> */ + char *pcHMMBatch; /* Iteration */ diff --git a/src/clustal/hhalign_wrapper.c b/src/clustal/hhalign_wrapper.c index 287d90b..269fc56 100644 --- a/src/clustal/hhalign_wrapper.c +++ b/src/clustal/hhalign_wrapper.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhalign_wrapper.c 290 2013-09-20 15:18:12Z fabian $ + * RCS $Id: hhalign_wrapper.c 306 2016-06-13 13:49:04Z fabian $ */ #ifdef HAVE_CONFIG_H @@ -50,7 +50,7 @@ */ void SetDefaultHhalignPara(hhalign_para *prHhalignPara) { - prHhalignPara->iMacRamMB = 2048; /* 2048 default|give 2GB to MAC algorithm */ + prHhalignPara->iMacRamMB = 8000; /* default|give just under 8GB to MAC algorithm, FS, 2016-04-18, went from 2GB to 8GB */ prHhalignPara->bIsDna = false; /* protein mode unless we say otherwise */ prHhalignPara->bIsRna = false; prHhalignPara->pca = -UNITY; @@ -550,7 +550,8 @@ PosteriorProbabilities(mseq_t *prMSeq, hmm_light rHMMalignment, hhalign_para rHh zcError[0] = '\0'; hhalign(ppcCopy, 1, NULL, ppcRepresent, 0, NULL, - &dScore, &rHMMalignment, NULL, NULL, NULL, NULL, + &dScore, &rHMMalignment, &rHMMalignment, + NULL, NULL, NULL, NULL, rHhalignPara, &prHHscores[iS], 0/* debug */, FALSE /* Log-Level*/, zcAux, zcError); if (NULL != strstr(zcError, "Viterbi")){ @@ -682,7 +683,7 @@ PileUp(mseq_t *prMSeq, hhalign_para rHhalignPara, int iClustersize) * external HMM not used at this stage (prHMM=NULL) */ iHHret = hhalign(ppcProfilePro, iI, NULL/*pdWeightL*/, ppcProfileSeq, 1, NULL/*pdWeightR*/, - &dScore, prHMM, + &dScore, prHMM, prHMM, pcConsensPro, pcReprsntPro, pcConsensSeq, pcReprsntSeq, rHhalignPara, &rHHscores, @@ -732,7 +733,7 @@ PileUp(mseq_t *prMSeq, hhalign_para rHhalignPara, int iClustersize) prHMM = &rHMMprofile; hhalign(ppcReprsnt, 0, NULL, ppcProfileSeq, 1, NULL, - &dScore, prHMM, + &dScore, prHMM, prHMM, pcConsensPro, pcReprsntPro, pcConsensSeq, pcReprsntSeq, rHhalignPara, &rHHscores, @@ -893,7 +894,10 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, double *pdWeightsL = NULL; /* sequence weights of left profile */ double *pdWeightsR = NULL; /* sequence weights of right profile */ int iMergeNodeCounter = 0; - hmm_light *prHMM = NULL; + hmm_light *prHMM = NULL; + hmm_light *prHMMleft = NULL; + hmm_light *prHMMrght = NULL; + hmm_light *prHMMnull = NULL; bool bPrintCR = (rLog.iLogLevelEnabled<=LOG_VERBOSE) ? FALSE : TRUE; #if TIMING char zcStopwatchMsg[1024]; @@ -903,15 +907,28 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, #endif hhalign_scores rHHscores = {0}; - if (NULL != prHMMList) { +#if 0 /* DEVEL 291 */ + if (NULL != prHMMList) { /* FIXME DEVEL 291: replace this outer test with iHMMCount>0*/ if (iHMMCount>1) { Log(&rLog, LOG_WARN, "FIXME: Using only first of %u HMMs (needs implementation)", iHMMCount); } - prHMM = &(prHMMList[0]); + prHMM = &(prHMMList[0]); /* FIXME DEVEL 291: check for NULL */ } else { /* FIXME: prHMM not allowed to be NULL and therefore pseudo allocated here */ prHMM = (hmm_light *) CKCALLOC(1, sizeof(hmm_light)); } +#else + prHMMnull = (hmm_light *) CKCALLOC(1, sizeof(hmm_light)); + if ( (iHMMCount > 0) && (NULL != prHMMList) ){ + prHMM = &(prHMMList[0]); + if (iHMMCount > 1) { + Log(&rLog, LOG_WARN, "FIXME: Using only first of %u HMMs (needs implementation)", iHMMCount); + } + } + else { + prHMM = prHMMnull; /* prHMM not allowed to be NULL and therefore assigned to pseudo allocated */ + } +#endif assert(NULL!=prMSeq); @@ -1075,7 +1092,38 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, } } +#if 1 /* DEVEL 291 */ + /* + * Note: if there is a HMM-batch file, then prMSeq->ppiHMMBindex is not NULL; + * ppiLeafList[iL/iR][0] is the 'lead' sequence in a profile; + * prMSeq->ppiHMMBindex[ppiLeafList[iL][0]] are the HMMs associated with the lead sequence; + * this could be NULL if there are no HMMs associated with this particular sequence + * at the moment only 1st HMM can be used, prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0]; + * the index of this HMM can be '-1' if the specified HMM file does not exist + * Note: we only use prHMMleft/prHMMrght, even if global HMM (--hmm-in) is specified + **/ + if ( (NULL != prMSeq->ppiHMMBindex) && (NULL != prMSeq->ppiHMMBindex[ppiLeafList[iL][0]]) && + (prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0] > -1) ){ + prHMMleft = &(prHMMList[prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0]]); + } + else if (iHMMCount > 0){ + prHMMleft = prHMM; + } + else { + prHMMleft = prHMMnull; + } + if ( (NULL != prMSeq->ppiHMMBindex) && (NULL != prMSeq->ppiHMMBindex[ppiLeafList[iR][0]]) && + (prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0] > -1) ){ + prHMMrght = &(prHMMList[prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0]]); + } + else if (iHMMCount > 0){ + prHMMrght = prHMM; + } + else { + prHMMrght = prHMMnull; + } +#endif /* align individual sequences to HMM; * - use representative sequence to get gapping @@ -1086,15 +1134,16 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, * full HMM but that does not seem to work at all * -- try harder! Fail better! */ - if ( (piLeafCount[iL] <= APPLY_BG_HMM_UP_TO_TREE_DEPTH) && (0 != prHMM->L) ){ + /*if ( (piLeafCount[iL] <= APPLY_BG_HMM_UP_TO_TREE_DEPTH) && (0 != prHMM->L) ){*/ + if (0 != prHMMleft->L){ int i, j; - pcReprsnt1 = CKCALLOC(prHMM->L+strlen(ppcProfile1[0])+1, sizeof(char)); - for (i = 0; i < prHMM->L; i++){ - pcReprsnt1[i] = prHMM->seq[prHMM->ncons][i+1]; + pcReprsnt1 = CKCALLOC(prHMMleft->L+strlen(ppcProfile1[0])+1, sizeof(char)); + for (i = 0; i < prHMMleft->L; i++){ + pcReprsnt1[i] = prHMMleft->seq[prHMMleft->ncons][i+1]; } ppcCopy1 = CKCALLOC(piLeafCount[iL], sizeof(char *)); for (j = 0; j < piLeafCount[iL]; j++){ - ppcCopy1[j] = CKCALLOC(prHMM->L+strlen(ppcProfile1[0])+1, sizeof(char)); + ppcCopy1[j] = CKCALLOC(prHMMleft->L+strlen(ppcProfile1[0])+1, sizeof(char)); for (i = 0; i < (int) strlen(ppcProfile1[0]); i++){ ppcCopy1[j][i] = ppcProfile1[j][i]; } @@ -1118,13 +1167,13 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, FS, r236 -> r237 */ int iLenA = strlen(ppcCopy1[0]); - int iLenH = prHMM->L; + int iLenH = prHMMleft->L; int iHHret = 0; if (iLenH < iLenA){ iHHret = hhalign(ppcReprsnt1, 0/* only one representative seq*/, NULL, ppcCopy1, piLeafCount[iL], pdWeightsL, - &dScore, prHMM, + &dScore, prHMMleft, prHMMleft, NULL, NULL, NULL, NULL, rHhalignPara, &rHHscores, iAux_FS++, /* DEBUG ARGUMENT */ rLog.iLogLevelEnabled, @@ -1133,7 +1182,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, else { iHHret = hhalign(ppcCopy1, piLeafCount[iL], pdWeightsL, ppcReprsnt1, 0/* only one representative seq*/, NULL, - &dScore, prHMM, + &dScore, prHMMleft, prHMMleft, NULL, NULL, NULL, NULL, rHhalignPara, &rHHscores, iAux_FS++, /* DEBUG ARGUMENT */ rLog.iLogLevelEnabled, @@ -1159,8 +1208,8 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, * it only contains a gap if all sequences of the profile * have a gap at this position */ - pcConsens1 = CKCALLOC(prHMM->L+strlen(ppcProfile1[0])+1, sizeof(char)); - for (i = 0; i < prHMM->L; i++){ + pcConsens1 = CKCALLOC(prHMMleft->L+strlen(ppcProfile1[0])+1, sizeof(char)); + for (i = 0; i < prHMMleft->L; i++){ for (j = 0, pcConsens1[i] = '-'; (j < piLeafCount[iL]) && ('-' == pcConsens1[i]); j++){ pcConsens1[i] = ppcCopy1[j][i]; } @@ -1173,16 +1222,17 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, #endif } /* ( (1 == piLeafCount[iL]) && (0 != prHMM->L) ) */ - if ( (piLeafCount[iR] <= APPLY_BG_HMM_UP_TO_TREE_DEPTH) && (0 != prHMM->L) ){ + /*if ( (piLeafCount[iR] <= APPLY_BG_HMM_UP_TO_TREE_DEPTH) && (0 != prHMM->L) ){*/ + if (0 != prHMMrght->L){ int i, j; - pcReprsnt2 = CKCALLOC(prHMM->L+strlen(ppcProfile2[0])+1, sizeof(char)); - for (i = 0; i < prHMM->L; i++){ - pcReprsnt2[i] = prHMM->seq[prHMM->ncons][i+1]; + pcReprsnt2 = CKCALLOC(prHMMrght->L+strlen(ppcProfile2[0])+1, sizeof(char)); + for (i = 0; i < prHMMrght->L; i++){ + pcReprsnt2[i] = prHMMrght->seq[prHMMrght->ncons][i+1]; } ppcCopy2 = CKCALLOC(piLeafCount[iR], sizeof(char *)); for (j = 0; j < piLeafCount[iR]; j++){ - ppcCopy2[j] = CKCALLOC(prHMM->L+strlen(ppcProfile2[0])+1, sizeof(char)); + ppcCopy2[j] = CKCALLOC(prHMMrght->L+strlen(ppcProfile2[0])+1, sizeof(char)); for (i = 0; i < (int) strlen(ppcProfile2[0]); i++){ ppcCopy2[j][i] = ppcProfile2[j][i]; } @@ -1206,13 +1256,13 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, FS, r236 -> r237 */ int iLenA = strlen(ppcCopy2[0]); - int iLenH = prHMM->L; + int iLenH = prHMMrght->L; int iHHret = 0; if (iLenH < iLenA){ iHHret = hhalign(ppcReprsnt2, 0/* only one representative seq */, NULL, ppcCopy2, piLeafCount[iR], pdWeightsR, - &dScore, prHMM, + &dScore, prHMMrght, prHMMrght, NULL, NULL, NULL, NULL, rHhalignPara, &rHHscores, iAux_FS++, /* DEBUG ARGUMENT */ rLog.iLogLevelEnabled, @@ -1221,7 +1271,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, else { iHHret = hhalign(ppcCopy2, piLeafCount[iR], pdWeightsR, ppcReprsnt2, 0/* only one representative seq */, NULL, - &dScore, prHMM, + &dScore, prHMMrght, prHMMrght, NULL, NULL, NULL, NULL, rHhalignPara, &rHHscores, iAux_FS++, /* DEBUG ARGUMENT */ rLog.iLogLevelEnabled, @@ -1247,8 +1297,8 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, * it only contains a gap if all sequences of the profile * have a gap at this position */ - pcConsens2 = CKCALLOC(prHMM->L+strlen(ppcProfile2[0])+1, sizeof(char)); - for (i = 0; i < prHMM->L; i++){ + pcConsens2 = CKCALLOC(prHMMrght->L+strlen(ppcProfile2[0])+1, sizeof(char)); + for (i = 0; i < prHMMrght->L; i++){ for (j = 0, pcConsens2[i] = '-'; (j < piLeafCount[iR]) && ('-' == pcConsens2[i]); j++){ pcConsens2[i] = ppcCopy2[j][i]; } @@ -1291,7 +1341,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, int iOldMacRam = rHhalignPara.iMacRamMB; iHHret = hhalign(ppcProfile1, piLeafCount[iL], pdWeightsL, ppcProfile2, piLeafCount[iR], pdWeightsR, - &dScore, prHMM, + &dScore, prHMMleft, prHMMrght, pcConsens1, pcReprsnt1, pcConsens2, pcReprsnt2, rHhalignPara, &rHHscores, @@ -1310,6 +1360,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, the only thing we can do (easily) is to re-run it in Viterbi mode, for this set MAC-RAM=0, set it back to its original value after 2nd try. FS, r241 -> r243 */ + if (RETURN_FROM_MAC == iHHret){ /* Note: the default way to run hhalign() is to initially select MAC by giving it all the memory it needs. MAC may fail due to overflow (repeats?). @@ -1328,7 +1379,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, iHHret = hhalign(ppcProfile1, piLeafCount[iL], pdWeightsL, ppcProfile2, piLeafCount[iR], pdWeightsR, - &dScore, prHMM, + &dScore, prHMMleft, prHMMrght, pcConsens1, pcReprsnt1, pcConsens2, pcReprsnt2, rHhalignPara, &rHHscores, @@ -1353,7 +1404,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, int iOldMacRam = rHhalignPara.iMacRamMB; iHHret = hhalign(ppcProfile2, piLeafCount[iR], pdWeightsR, ppcProfile1, piLeafCount[iL], pdWeightsL, - &dScore, prHMM, + &dScore, prHMMrght, prHMMleft, pcConsens2, pcReprsnt2, pcConsens1, pcReprsnt1, rHhalignPara, &rHHscores, @@ -1372,6 +1423,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, the only thing we can do (easily) is to re-run it in Viterbi mode, for this set MAC-RAM=0, set it back to its original value after 2nd try. FS, r241 -> r243 */ + if (RETURN_FROM_MAC == iHHret){ /* see above */ rHhalignPara.iMacRamMB = 0; @@ -1382,7 +1434,7 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, iHHret = hhalign(ppcProfile2, piLeafCount[iR], pdWeightsR, ppcProfile1, piLeafCount[iL], pdWeightsL, - &dScore, prHMM, + &dScore, prHMMrght, prHMMleft, pcConsens2, pcReprsnt2, pcConsens1, pcReprsnt1, rHhalignPara, &rHHscores, @@ -1402,7 +1454,56 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, } /* 1st invocation failed */ } /* 2nd profile was shorter than 1st */ - + + /* + * at this stage have performed alignment of 2 profiles/sequences. + * if HMM batch information had been used then have choices: + * (i) if HMM info only intended for initial alignment (of sequences) then make both HMMs stale; + * (iia) if alignment of 2 profiles/sequences where same HMM used, then retain; + * (iib) if alignment of 2 profiles/sequences where different HMMs used, then make both stale; + * (iii) some majority voting + */ +#if 0 /* always make HMM batch stale (after 1st invocation) */ + if ( (NULL != prMSeq->ppiHMMBindex) && (NULL != prMSeq->ppiHMMBindex[ppiLeafList[iL][0]]) ){ + prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0] = -1; + } + if ( (NULL != prMSeq->ppiHMMBindex) && (NULL != prMSeq->ppiHMMBindex[ppiLeafList[iR][0]]) ){ + prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0] = -1; + } +#else /* retain HMMs if they were the same for both profiles */ + if (NULL != prMSeq->ppiHMMBindex) { + int i; + + if ( (NULL != prMSeq->ppiHMMBindex[ppiLeafList[iL][0]]) && (NULL != prMSeq->ppiHMMBindex[ppiLeafList[iR][0]]) ){ + if ( prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0] == -1){ + prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0] = -1; /* this is conservative, could do H[iL] = H[iR] */ + for (i = 0; i < piLeafCount[iR]; i++){ + prMSeq->ppiHMMBindex[ppiLeafList[iR][i]][0] = -1; + } + } + else if ( prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0] == -1){ + prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0] = -1; /* this is conservative, could do H[iR] = H[iL] */ + for (i = 0; i < piLeafCount[iL]; i++){ + prMSeq->ppiHMMBindex[ppiLeafList[iL][i]][0] = -1; + } + } + else if (prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0] != prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0]){ + prMSeq->ppiHMMBindex[ppiLeafList[iL][0]][0] = -1; /* this is NOT conservative, mandatory */ + for (i = 0; i < piLeafCount[iL]; i++){ + prMSeq->ppiHMMBindex[ppiLeafList[iL][i]][0] = -1; + } + prMSeq->ppiHMMBindex[ppiLeafList[iR][0]][0] = -1; /* this is NOT conservative, mandatory */ + for (i = 0; i < piLeafCount[iR]; i++){ + prMSeq->ppiHMMBindex[ppiLeafList[iR][i]][0] = -1; + } + } + else { + /* void, HMMs should be same */ + } + } + } /* there was a HMM batch */ +#endif + if (rLog.iLogLevelEnabled <= LOG_DEBUG){ int i; @@ -1513,9 +1614,13 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, TranslateUnknown2Ambiguity(prMSeq); ReAttachLeadingGaps(prMSeq, iProfProfSeparator); +#if 0 /* DEVEL 291 */ if (NULL == prHMMList){ CKFREE(prHMM); } +#else + CKFREE(prHMMnull); +#endif CKFREE(ppcProfile2); CKFREE(ppcProfile1); CKFREE(ppiLeafList[piOrderLR[DIFF_NODE*(iNodeCount-1)+PRNT_NODE]]); diff --git a/src/clustal/ktuple_pair.c b/src/clustal/ktuple_pair.c index 3ea9ca9..3e49a02 100644 --- a/src/clustal/ktuple_pair.c +++ b/src/clustal/ktuple_pair.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: ktuple_pair.c 230 2011-04-09 15:37:50Z andreas $ + * RCS $Id: ktuple_pair.c 305 2016-06-13 13:46:02Z fabian $ * * * K-Tuple code for pairwise alignment (Wilbur and Lipman, 1983; PMID @@ -649,7 +649,7 @@ KTuplePairDist(symmatrix_t *tmat, mseq_t *mseq, /* int uStepNo, uTotalStepNo; */ ktuple_param_t aln_param = default_protein_param; bool bPrintCR = (rLog.iLogLevelEnabled<=LOG_VERBOSE) ? FALSE : TRUE; - + if(prProgress == NULL) { NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO), @@ -822,7 +822,9 @@ KTuplePairDist(symmatrix_t *tmat, mseq_t *mseq, #pragma omp critical(ktuple) #if 0 { - printf("steps: %d\n", private_step_no); + int tid; + tid = omp_get_thread_num(); + printf("%s:%d: tid %d: steps %d\n", __FILE__, __LINE__, tid, private_step_no); } #endif #endif diff --git a/src/clustal/mbed.c b/src/clustal/mbed.c index 0977607..33fd2de 100644 --- a/src/clustal/mbed.c +++ b/src/clustal/mbed.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: mbed.c 283 2013-06-10 17:42:14Z fabian $ + * RCS $Id: mbed.c 300 2016-06-13 13:29:58Z fabian $ * * * Reimplementation from scratch of mBed (Blackshields et al., 2010; @@ -306,9 +306,9 @@ SeqToVec(double **ppdSeqVec, mseq_t *prMSeq, int iSeqIndex; int iSeedIndex; /* indices for restoring order */ - int *restore; + int *restore = NULL; /* sorted copy of piSeeds */ - int *piSortedSeeds; + int *piSortedSeeds = NULL; #if TIMING Stopwatch_t *stopwatch = StopwatchCreate(); @@ -446,6 +446,7 @@ SeqToVec(double **ppdSeqVec, mseq_t *prMSeq, FreeSymMatrix(&prDistmat); CKFREE(restore); + CKFREE(piSortedSeeds); #if TIMING StopwatchStop(stopwatch); StopwatchDisplay(stdout, "Total time for SeqToVec(): ", stopwatch); @@ -1267,8 +1268,8 @@ Mbed(tree_t **prMbedTree_p, mseq_t *prMSeq, const int iPairDistType, for (iI = 0; iI < prKMeansResult->iNClusters; iI++) { for (iJ=0; iJ<prKMeansResult->piNObjsPerCluster[iI]; iJ++) { int iRealIndex = prKMeansResult->ppiObjIndicesPerCluster[iI][iJ]; - fprintf(pfClust, "Cluster %u: object %u has index %u (=seq %s )\t %s\n", - iI, iJ, iRealIndex, prMSeq->sqinfo[iRealIndex].name, ppcClusterSplits[iRealIndex]); + fprintf(pfClust, "Cluster %u: object %u has index %u (=seq %s %d~len)\t %s\n", + iI, iJ, iRealIndex, prMSeq->sqinfo[iRealIndex].name, prMSeq->sqinfo[iRealIndex].len, ppcClusterSplits[iRealIndex]); } } fclose(pfClust); pfClust = NULL; diff --git a/src/clustal/pair_dist.c b/src/clustal/pair_dist.c index 7d318b5..e6dbdc3 100644 --- a/src/clustal/pair_dist.c +++ b/src/clustal/pair_dist.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: pair_dist.c 288 2013-07-29 13:15:50Z andreas $ + * RCS $Id: pair_dist.c 301 2016-06-13 13:32:55Z fabian $ */ #ifdef HAVE_CONFIG_H @@ -35,6 +35,9 @@ #include "progress.h" #include "util.h" +/* Made iend/jend const unsigned long int (originally just int), FS, 2016-04-04 + */ + /* Up to rev 173 we had a USE_SYM_KTUPLE switch implemented here. When active * ktuple distances were computed twice for each pair and averaged. Idea was @@ -57,8 +60,8 @@ KimuraCorrection(double frac_id); static int SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq, - int istart, int iend, - int jstart, int jend, + int istart, const unsigned long int iend, + int jstart, const unsigned long int jend, bool use_KimuraCorrection, progress_t *prProgress, unsigned long int *ulStepNo, unsigned long int ulTotalStepNo); @@ -167,8 +170,8 @@ KimuraCorrection(double p) */ int SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq, - int istart, int iend, - int jstart, int jend, + int istart, const unsigned long int iend, + int jstart, const unsigned long int jend, bool use_kimura, progress_t *prProgress, unsigned long int *ulStepNo, unsigned long int ulTotalStepNo) { @@ -272,8 +275,8 @@ SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq, */ int PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPercID, - int istart, int iend, - int jstart, int jend, + int istart, const unsigned long int iend, + int jstart, const unsigned long int jend, char *fdist_in, char *fdist_out) { int uSeqIndex; @@ -315,26 +318,33 @@ PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPerc hence making even chunk sizes is slightly fiddlier */ ulTotalStepNo = iend*jend - iend*iend/2 + iend/2; - + /* FIXME: can get rid of iChunkStart, iChunkEnd now that we're using the arrays */ iChunkStart = iend; for(iChunk = 0; iChunk <= iNumberOfThreads; iChunk++) { iChunkEnd = iChunkStart; - if(iChunk == iNumberOfThreads - 1) + if (iChunk == iNumberOfThreads - 1){ iChunkStart = 0; - else + } + else if (iend == jend){ iChunkStart = iend - ((double)(iend - istart) * sqrt(((double)iChunk + 1.0)/(double)iNumberOfThreads)); + } + else { + iChunkStart = iend - (iend - istart) * (iChunk + 1) / (double)(iNumberOfThreads); + } iChunkStarts[iChunk] = iChunkStart; iChunkEnds[iChunk] = iChunkEnd; + /*printf("%s:%d: C=%d, ie=%d, is=%d, je=%d, js=%d, Cstart=%d, Cend=%d, diff=%d\n", + __FILE__, __LINE__, iChunk, iend, istart, jend, jstart, iChunkStart, iChunkEnd, iChunkEnd-iChunkStart);*/ } if (PAIRDIST_KTUPLE == pairdist_type) { Log(&rLog, LOG_INFO, "Calculating pairwise ktuple-distances..."); - + NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO), - "Ktuple-distance calculation progress", bPrintCR); + "Ktuple-distance calculation progress", bPrintCR); #ifdef HAVE_OPENMP #pragma omp parallel for private(iChunk) schedule(dynamic) #endif @@ -394,7 +404,7 @@ PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPerc } #endif /* random/proper distance calculation */ - + /* optional printing of matrix to file */ if (NULL != fdist_out) { @@ -420,7 +430,7 @@ PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPerc ProgressDone(prProgress); FreeProgress(&prProgress); } - + return 0; } /*** end: PairDistances() ***/ diff --git a/src/clustal/pair_dist.h b/src/clustal/pair_dist.h index d210329..e760506 100644 --- a/src/clustal/pair_dist.h +++ b/src/clustal/pair_dist.h @@ -13,7 +13,7 @@ ********************************************************************/ /* - * RCS $Id: pair_dist.h 283 2013-06-10 17:42:14Z fabian $ + * RCS $Id: pair_dist.h 302 2016-06-13 13:35:50Z fabian $ */ @@ -34,8 +34,8 @@ extern int PairDistances(symmatrix_t **distmat, mseq_t *mseq, const int pairdist_type, bool bPercID, - const int istart, const int iend, - const int jstart, const int jend, + const int istart, const unsigned long int iend, + const int jstart, const unsigned long int jend, char *fdist_in, char *fdist_out); #endif diff --git a/src/clustal/seq.c b/src/clustal/seq.c index a7e9323..27cca14 100644 --- a/src/clustal/seq.c +++ b/src/clustal/seq.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: seq.c 291 2014-02-27 18:20:54Z fabian $ + * RCS $Id: seq.c 298 2014-11-07 12:18:36Z fabian $ * * * Module for sequence/alignment IO and misc. @@ -37,7 +37,7 @@ #include "util.h" #include "log.h" #include "seq.h" - +/*#include "../../mymemmonitor.h"*/ #define ALLOW_ONLY_PROTEIN 0 // DD @@ -419,11 +419,11 @@ SeqTypeToStr(int iSeqType) int ReadSequences(mseq_t *prMSeq, char *seqfile, int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs, - int iMaxNumSeq, int iMaxSeqLen) + int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch) { SQFILE *dbfp; /* sequence file descriptor */ - char *cur_seq; - SQINFO cur_sqinfo; + char *cur_seq = NULL; + SQINFO cur_sqinfo = {0}; int iSeqIdx; /* sequence counter */ int iSeqPos; /* sequence string position counter */ @@ -462,8 +462,8 @@ ReadSequences(mseq_t *prMSeq, char *seqfile, */ while (ReadSeq(dbfp, dbfp->format, &cur_seq, - &cur_sqinfo)) { - + &cur_sqinfo)) { + if (prMSeq->nseqs+1>iMaxNumSeq) { Log(&rLog, LOG_ERROR, "Maximum number of sequences (=%d) exceeded after reading sequence '%s' from '%s'", iMaxNumSeq, cur_sqinfo.name, seqfile); @@ -489,7 +489,8 @@ ReadSequences(mseq_t *prMSeq, char *seqfile, prMSeq->sqinfo = (SQINFO *) - CKREALLOC(prMSeq->sqinfo, (prMSeq->nseqs+1) * sizeof(SQINFO)); + CKREALLOC(prMSeq->sqinfo, (prMSeq->nseqs+1) * sizeof(SQINFO)); + memset(&prMSeq->sqinfo[prMSeq->nseqs], 0, sizeof(SQINFO)); SeqinfoCopy(&prMSeq->sqinfo[prMSeq->nseqs], &cur_sqinfo); #ifdef TRACE @@ -549,7 +550,7 @@ ReadSequences(mseq_t *prMSeq, char *seqfile, prMSeq->nseqs++; - FreeSequence(cur_seq, &cur_sqinfo); + FreeSequence(cur_seq, &cur_sqinfo); } SeqfileClose(dbfp); @@ -616,6 +617,83 @@ ReadSequences(mseq_t *prMSeq, char *seqfile, Log(&rLog, LOG_INFO, "Read %d sequences (type: %s) from %s", prMSeq->nseqs, SeqTypeToStr(prMSeq->seqtype), prMSeq->filename); + prMSeq->pppcHMMBNames = NULL; + prMSeq->ppiHMMBindex = NULL; + + /* read HMM-batch file if existent */ + if (NULL != pcHMMBatch) { + + enum {MAXLINE=10000}; + FILE *pfHMMBatch = NULL; + char zcScanline[MAXLINE] = {0}; + char *pcToken = NULL; + char *pcSeqName = NULL; + int iSeq = 0; + + /* check that file exists */ + if (NULL == (pfHMMBatch = fopen(pcHMMBatch, "r"))){ + Log(&rLog, LOG_ERROR, "Failed to open HMM-batch file %s for reading", pcHMMBatch); + return -1; + } + + /* initialise names and indices */ + prMSeq->pppcHMMBNames = (char ***)CKMALLOC(prMSeq->nseqs * sizeof(char **)); + for (iSeq = 0; iSeq < prMSeq->nseqs; iSeq++){ + prMSeq->pppcHMMBNames[iSeq] = NULL; + } + prMSeq->ppiHMMBindex = (int **)CKMALLOC(prMSeq->nseqs * sizeof(int *)); + for (iSeq = 0; iSeq < prMSeq->nseqs; iSeq++){ + prMSeq->ppiHMMBindex[iSeq] = (int *)CKMALLOC(1 * sizeof(int)); + prMSeq->ppiHMMBindex[iSeq][0] = -1; + } + + /* read batch file line-by-line */ + while (NULL != fgets(zcScanline, MAXLINE, pfHMMBatch)){ + + pcToken = strtok(zcScanline, " \040\t\n"); + if (NULL == pcToken){ + continue; + } + else { + pcSeqName = pcToken; + } + /* identify sequence label from batch file in labels read from sequence file */ + for (iSeq = 0; iSeq < prMSeq->nseqs; iSeq++){ + int iHMM = 0; + + if (0 == strcmp(pcSeqName, prMSeq->sqinfo[iSeq].name)){ + + while (NULL != (pcToken = strtok(NULL, " \040\t\n"))){ + prMSeq->pppcHMMBNames[iSeq] = (char **)CKREALLOC(prMSeq->pppcHMMBNames[iSeq], + (iHMM+2) * sizeof(char *)); + prMSeq->pppcHMMBNames[iSeq][iHMM] = CkStrdup(pcToken); + prMSeq->ppiHMMBindex[iSeq] = (int *)CKREALLOC(prMSeq->ppiHMMBindex[iSeq], + (iHMM+2) * sizeof(int)); + prMSeq->ppiHMMBindex[iSeq][iHMM] = 0; + iHMM++; + prMSeq->pppcHMMBNames[iSeq][iHMM] = NULL; + prMSeq->ppiHMMBindex[iSeq][iHMM] = 0; + } + break; + } + + } /* 0 <= iSeq < prMSeq->nseqs */ + if (iSeq >= prMSeq->nseqs) { + Log(&rLog, LOG_WARN, + "sequence %s not found in input sequences (%s), will be ignored", + pcSeqName, seqfile); + } + + } /* !EOF */ + + fclose(pfHMMBatch); pfHMMBatch = NULL; + + } /* there was a HMM batch file */ + else { + prMSeq->pppcHMMBNames = NULL; + prMSeq->ppiHMMBindex = NULL; + } /* there was no HMM batch file */ + return 0; } /*** end: ReadSequences ***/ @@ -644,6 +722,8 @@ NewMSeq(mseq_t **prMSeq) (*prMSeq)->sqinfo = NULL; (*prMSeq)->filename = NULL; (*prMSeq)->tree_order = NULL; + (*prMSeq)->pppcHMMBNames = NULL; + (*prMSeq)->ppiHMMBindex = NULL; } /*** end: NewMSeq ***/ @@ -765,6 +845,14 @@ FreeMSeq(mseq_t **mseq) CKFREE((*mseq)->tree_order); } + if (NULL != (*mseq)->pppcHMMBNames){ /* FS, r291 -> */ + for (i = 0; (*mseq)->pppcHMMBNames[i] && (i < (*mseq)->nseqs); i++){ + int iIter = 0; + for (iIter = 0; NULL != (*mseq)->pppcHMMBNames[i][iIter]; iIter++){ + CKFREE((*mseq)->pppcHMMBNames[i][iIter]); + } + } + } (*mseq)->seqtype = SEQTYPE_UNKNOWN; (*mseq)->nseqs = 0; diff --git a/src/clustal/seq.h b/src/clustal/seq.h index 17f1113..8f9fe1b 100644 --- a/src/clustal/seq.h +++ b/src/clustal/seq.h @@ -13,7 +13,7 @@ ********************************************************************/ /* - * RCS $Id: seq.h 289 2013-09-17 10:09:37Z fabian $ + * RCS $Id: seq.h 296 2014-10-07 12:15:41Z fabian $ */ #ifndef CLUSTALO_SEQ_H @@ -111,6 +111,10 @@ typedef struct { * */ SQINFO *sqinfo; + + /* HMM batch information */ + char ***pppcHMMBNames; + int **ppiHMMBindex; } mseq_t; extern void @@ -131,7 +135,7 @@ SeqTypeToStr(int seqtype); extern int ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile, int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs, - int iMaxNumSeq, int iMaxSeqLen); + int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch); extern void NewMSeq(mseq_t **mseq); diff --git a/src/clustal/symmatrix.c b/src/clustal/symmatrix.c index 350ca65..b37d883 100644 --- a/src/clustal/symmatrix.c +++ b/src/clustal/symmatrix.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: symmatrix.c 288 2013-07-29 13:15:50Z andreas $ + * RCS $Id: symmatrix.c 303 2016-06-13 13:37:47Z fabian $ * * * Functions for symmetric (square) matrices including diagonal. @@ -77,6 +77,10 @@ NewSymMatrix(symmatrix_t **symmat, int nrows, int ncols) fprintf(stderr, "Couldn't allocate memory (%s|%s)\n", __FILE__, __FUNCTION__); return -1; + } + else { + (*symmat)->nrows = nrows; + (*symmat)->ncols = ncols; } (*symmat)->data = (double **) malloc(nrows * sizeof(double *)); @@ -87,6 +91,38 @@ NewSymMatrix(symmatrix_t **symmat, int nrows, int ncols) *symmat = NULL; return -1; } + +#ifdef HAVE_OPENMP + /* one malloc for the full matrix data structure + assumes ncols >= nrows (asserted above) */ + double *mat_data; + int elements_to_date=0; + if ((mat_data = (double *)malloc(((nrows+1)*nrows/2 + (ncols-nrows)*nrows) * sizeof(double))) == NULL) { + fprintf(stderr, "Couldn't allocate MPI memory (%s|%s)\n", __FILE__, __FUNCTION__); + free((*symmat)->data); + free(*symmat); + *symmat = NULL; + return -1; + } + for (i=0; i<nrows; i++) { + (*symmat)->data[i] = &mat_data[elements_to_date]; + elements_to_date += ncols-i; +#ifdef TRACE + fprintf(stderr, "DEBUG(%s|%s():%d) initialising symmat with the number of the beast\n", + __FILE__, __FUNCTION__, __LINE__); +#endif + { + int j; + for (j=0; j<ncols-i; j++) { +#ifdef TRACE + (*symmat)->data[i][j] = -666.0; +#else + (*symmat)->data[i][j] = 0.0; +#endif + } + } + } +#else /* not HAVE_OPENMP */ for (i=0; i<nrows; i++) { (*symmat)->data[i] = (double *) calloc((ncols-i), sizeof(double)); if (NULL == (*symmat)->data[i]) { @@ -112,10 +148,8 @@ NewSymMatrix(symmatrix_t **symmat, int nrows, int ncols) } #endif } +#endif /* HAVE_OPENMP */ - (*symmat)->nrows = nrows; - (*symmat)->ncols = ncols; - return 0; } /*** end: new_symmatrix ***/ @@ -242,12 +276,18 @@ SymMatrixGetValueP(double **val, symmatrix_t *symmat, void FreeSymMatrix(symmatrix_t **symmat) { +#ifndef HAVE_OPENMP int i; +#endif if (NULL != (*symmat)) { if (NULL != (*symmat)->data) { +#ifdef HAVE_OPENMP + free((*symmat)->data[0]); +#else for (i=0; i<(*symmat)->nrows; i++) { free((*symmat)->data[i]); } +#endif free((*symmat)->data); } } diff --git a/src/hhalign/hash-C.h b/src/hhalign/hash-C.h index 24b7815..3ddae4b 100644 --- a/src/hhalign/hash-C.h +++ b/src/hhalign/hash-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hash-C.h 143 2010-10-14 13:11:14Z andreas $ + * RCS $Id: hash-C.h 309 2016-06-13 14:10:11Z fabian $ */ // hash.C @@ -27,7 +27,7 @@ // * works also if maximal size is exceeded, but gets slower by a factor ~num_keys/num_slots /* - * RCS $Id: hash-C.h 143 2010-10-14 13:11:14Z andreas $ + * RCS $Id: hash-C.h 309 2016-06-13 14:10:11Z fabian $ */ #ifndef HASH @@ -59,7 +59,7 @@ using std::ofstream; #endif #include "hash.h" - +//#include "new_new.h" /* memory tracking */ ////////////////////////////////////////////////////////////////////////////// diff --git a/src/hhalign/hhalign.cpp b/src/hhalign/hhalign.cpp index 2905f39..6fdcc96 100644 --- a/src/hhalign/hhalign.cpp +++ b/src/hhalign/hhalign.cpp @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhalign.cpp 290 2013-09-20 15:18:12Z fabian $ + * RCS $Id: hhalign.cpp 309 2016-06-13 14:10:11Z fabian $ */ /* hhalign.C: @@ -677,7 +677,7 @@ ProcessArguments(int argc, char** argv) extern "C" int hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, char **ppcSecndProf, int iSecndCnt, double *pdWeightsR, - double *dScore_p, hmm_light *prHMM, + double *dScore_p, hmm_light *prHMM, hmm_light *prHMM2, char *pcPrealigned1, char *pcRepresent1, char *pcPrealigned2, char *pcRepresent2, hhalign_para rHhalignPara, hhalign_scores *rHHscores, @@ -879,7 +879,7 @@ hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, tL = strlen(ppcSecndProf[0]); } else { - tL = prHMM->L; + tL = prHMM2->L; } @@ -942,7 +942,7 @@ hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, and add pseudocounts etc. */ t.cQT = 't'; if (OK != ReadAndPrepare(INTERN_ALN_2_HMM, - ppcSecndProf, iSecndCnt, prHMM, + ppcSecndProf, iSecndCnt, prHMM2, pcPrealigned2, pcRepresent2, pdWeightsR, infile, t)) { sprintf(zcError, "%s:%s:%d: Problem Reading/Preparing t-profile (len=%d)\n", @@ -1344,7 +1344,7 @@ hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, hitlist.Reset(); while (!hitlist.End()) hitlist.ReadNext().Delete(); // Delete content of hit object - + if (strucfile && par.wstruc>0) { for (int i=0; i<q.L+2; i++){ @@ -1396,7 +1396,7 @@ hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, t.ClobberGlobal(); } hitlist.ClobberGlobal(); - + return iRetVal; } /* this is the end of hhalign() //end main */ diff --git a/src/hhalign/hhalign.h b/src/hhalign/hhalign.h index 5103de6..1942ea5 100644 --- a/src/hhalign/hhalign.h +++ b/src/hhalign/hhalign.h @@ -15,14 +15,14 @@ ********************************************************************/ /* - * RCS $Id: hhalign.h 284 2013-06-12 10:10:11Z fabian $ + * RCS $Id: hhalign.h 296 2014-10-07 12:15:41Z fabian $ */ extern int hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, char **ppcSecndProf, int iSecndCnt, double *pdWeightsR, - double *dScore_p, hmm_light *prHMM, + double *dScore_p, hmm_light *prHMM, hmm_light *prHMM2, char *pcPrealigned1, char *pcRepresent1, char *pcPrealigned2, char *pcRepresent2, hhalign_para rHhalignPara, hhalign_scores *rHHscores, diff --git a/src/hhalign/hhalignment-C.h b/src/hhalign/hhalignment-C.h index c89aa45..a9aa299 100644 --- a/src/hhalign/hhalignment-C.h +++ b/src/hhalign/hhalignment-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhalignment-C.h 236 2011-04-14 11:30:04Z fabian $ + * RCS $Id: hhalignment-C.h 309 2016-06-13 14:10:11Z fabian $ */ @@ -61,6 +61,11 @@ using std::ofstream; #include "hhhmm.h" #endif +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +//#include "new_new.h" /* memory tracking */ + enum {KEEP_NOT = 0, KEEP_CONDITIONALLY, KEEP_ALWAYS}; @@ -516,7 +521,7 @@ Alignment::Compress(const char infile[]) i--; if (L!=i && L!=/*MAXRES*/par.maxResLen-2 && !unequal_lengths) unequal_lengths=k; //sequences have different lengths L=imin(L,i); - } + } // end for (k) if (unequal_lengths) break; //Replace GAP with ENDGAP for all end gaps /* MR1 */ @@ -559,6 +564,10 @@ Alignment::Compress(const char infile[]) // Conversion to integer representation, checking for unequal lengths and initialization if (nres==NULL) nres=new(int[N_in]); +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static), private(l) +#endif for (k=0; k<N_in; k++) { if (!keep[k]) continue; @@ -582,6 +591,10 @@ Alignment::Compress(const char infile[]) for (k=0; k<N_in; k++) if (keep[k]) nl[ (int)X[k][l]]++; for (a=0; a<20; a++) if(nl[a]) naa++; if (!naa) naa=1; //naa=0 when column consists of only gaps and Xs (=ANY) +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) +#endif for (k=0; k<N_in; k++) if (keep[k] && (X[k][l]<20) ) { @@ -601,6 +614,10 @@ Alignment::Compress(const char infile[]) } // Add up percentage of gaps +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(k) +#endif for (l=1; l<=L; l++) { float res=0; @@ -725,6 +742,10 @@ Alignment::Compress(const char infile[]) } i++; this->l[i]=l; +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) +#endif for (k=0; k<N_in; k++) { if (keep[k]) @@ -1032,7 +1053,6 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in int i; // counts residues int n; // number of sequences accepted so far - // Initialize in[k] for (n=k=0; k<N_in; k++) if (keep[k]==KEEP_ALWAYS) {in[k]=2/*KEEP_ALWAYS??*/; n++;} else in[k]=0; @@ -1051,7 +1071,9 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in } // Determine number of residues nres[k]? - if ( (nres==NULL) || (sizeof(nres)<N_in*sizeof(int)) ) + // KB: memory leak as sizeof(nres) just gives the size of an int pointer + //if ( (nres==NULL) || (sizeof(nres)<N_in*sizeof(int)) ) + if (nres==NULL) { nres=new(int[N_in]); for (k=0; k<N_in; k++) // do this for ALL sequences, not only those with in[k]==1 (since in[k] may be display[k]) @@ -1131,7 +1153,7 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in if (diff>=qdiff_max) {keep[k]=KEEP_NOT; continue;} // too different from query? => reject once and for all } // printf(" qsc=%6.2f qid=%6.2f \n",qsc_sum/nres[k],100.0*(1.0-(float)(diff)/nres[k])); - } + } // end for (k) if (seqid1>seqid2) { @@ -1194,6 +1216,10 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in // printf("\n"); // Loop over all candidate sequences kk (-> k) +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(k, i, diff_min_frac, jj, j, first_kj, last_kj, cov_kj, diff_suff, diff) +#endif for (kk=0; kk<N_in; kk++) { if (inkk[kk]) continue; // seq k already accepted @@ -1202,7 +1228,16 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in if (keep[k]==KEEP_ALWAYS) {inkk[kk]=2; continue;} // accept all marked sequences (no n++, since this has been done already) // Calculate max-seq-id threshold seqidk for sequence k (as maximum over idmaxwin[i]) - if (seqid>=100) {in[k]=inkk[kk]=1; n++; continue;} +// if (seqid>=100) {in[k]=inkk[kk]=1; n++; continue;} + if (seqid>=100) { + in[k]=inkk[kk]=1; +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + n++; + continue; + } float seqidk=seqid1; for (i=first[k]; i<=last[k]; i++) if (idmaxwin[i]>seqidk) seqidk=idmaxwin[i]; @@ -1240,8 +1275,18 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in if (jj>=kk) // did loop reach end? => accept k. Otherwise reject k (the shorter of the two) { in[k]=inkk[kk]=1; +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp atomic +#endif n++; - for (i=first[k]; i<=last[k]; i++) N[i]++; // update number of sequences at position i + for (i=first[k]; i<=last[k]; i++) { +#if 0 +//#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + N[i]++; // update number of sequences at position i + } // printf("%i %20.20s accepted\n",k,sname[k]); } // else @@ -1556,7 +1601,8 @@ Alignment::FrequenciesAndTransitions(HMM& q, char* in) for (k=0; k<N_in; k++) X[k][0]=ENDGAP; // make sure that sequences ENTER subalignment j for j=1 for (k=0; k<N_in; k++) X[k][L+1]=ENDGAP; // does it have an influence? -#ifdef HAVE_OPENMP +#if 0 +//#ifdef HAVE_OPENMP if(par.wg != 1) { #pragma omp parallel sections @@ -1836,122 +1882,169 @@ Alignment::Amino_acid_frequencies_and_transitions_from_M_state(HMM& q, char* in) // Global weights? if (par.wg==1) - for (k=0; k<N_in; k++) wi[k]=wg[k]; + for (k=0; k<N_in; k++) + wi[k]=wg[k]; // Initialization q.Neff_HMM=0.0f; Neff[0]=0.0; // if the first column has no residues (i.e. change==0), Neff[i]=Neff[i-1]=Neff[0] n = new(int*[L+2]); - for (j=1; j<=L; j++) n[j]=new(int[NAA+3]); - for (j=1; j<=L; j++) - for (a=0; a<NAA+3; a++) n[j][a]=0; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(a) +#endif + for (j=1; j<=L; j++) { + n[j]=new(int[NAA+3]); + for (a=0; a<NAA+3; a++) + n[j][a]=0; + } ////////////////////////////////////////////////////////////////////////////////////////////// // Main loop through alignment columns for (i=1; i<=L; i++) // Calculate wi[k] at position i as well as Neff[i] - { - + { if (par.wg==0) - { - - change=0; - // Check all sequences k and update n[j][a] and ri[j] if necessary - for (k=0; k<N_in; k++) - { - if (!in[k]) continue; - if (X[k][i-1]>=ANY && X[k][i]<ANY) - { // ... if sequence k was NOT included in i-1 and has to be included for column i - change=1; - nseqi++; - for (int j=1; j<=L; j++) n[j][ (int)X[k][j]]++; - } - else if (X[k][i-1]<ANY && X[k][i]>=ANY) - { // ... if sequence k WAS included in i-1 and has to be thrown out for column i - change=1; - nseqi--; - for (int j=1; j<=L; j++) n[j][ (int)X[k][j]]--; - } - } //end for (k) - nseqs[i]=nseqi; - - // If subalignment changed: update weights wi[k] and Neff[i] - if (change) - { - // Initialize weights and numbers of residues for subalignment i - ncol=0; - for (k=0; k<N_in; k++) wi[k]=1E-8; // for pathological alignments all wi[k] can get 0; /* MR1 */ - - // sum wi[k] over all columns j and sequences k of subalignment - for (j=1; j<=L; j++) - { - // do at least a fraction MAXENDGAPFRAC of sequences in subalignment contain an end gap in j? - if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; - naa=0; for (a=0; a<20; a++) if(n[j][a]) naa++; - if (naa==0) continue; - ncol++; - for (k=0; k<N_in; k++) - { - if (in[k] && X[k][i]<ANY && X[k][j]<ANY) - { - // if (!n[j][ (int)X[k][j]]) {fprintf(stderr,"Error: Mi=%i: n[%i][X[%i]]=0! (X[%i]=%i)\n",i,j,k,k,X[k][j]);} - wi[k]+=1.0/float(n[j][ (int)X[k][j] ]*naa); - } - } - } - - // Check whether number of columns in subalignment is sufficient - if (ncol<NCOLMIN) - // Take global weights - for (k=0; k<N_in; k++) - if(in[k] && X[k][i]<ANY) wi[k]=wg[k]; else wi[k]=0.0; - - // Calculate Neff[i] - Neff[i]=0.0; - for (j=1; j<=L; j++) - { - // do at least a fraction MAXENDGAPFRA of sequences in subalignment contain an end gap in j? - if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; - for (a=0; a<20; a++) fj[a]=0; - for (k=0; k<N_in; k++) - if (in[k] && X[k][i]<ANY && X[k][j]<ANY) - fj[ (int)X[k][j] ]+=wi[k]; - NormalizeTo1(fj,NAA); - for (a=0; a<20; a++) - if (fj[a]>1E-10) Neff[i]-=fj[a]*fast_log2(fj[a]); - } - if (ncol>0) Neff[i]=pow(2.0,Neff[i]/ncol); else Neff[i]=1.0; - - } - - else //no update was necessary; copy values for i-1 - { - Neff[i]=Neff[i-1]; - } - } + { + change=0; + // Check all sequences k and update n[j][a] and ri[j] if necessary + for (k=0; k<N_in; k++) + { + if (!in[k]) continue; + if (X[k][i-1]>=ANY && X[k][i]<ANY) + { // ... if sequence k was NOT included in i-1 and has to be included for column i + change=1; + nseqi++; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) +#endif + for (int j=1; j<=L; j++) + n[j][ (int)X[k][j]]++; + } + else if (X[k][i-1]<ANY && X[k][i]>=ANY) + { // ... if sequence k WAS included in i-1 and has to be thrown out for column i + change=1; + nseqi--; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) +#endif + for (int j=1; j<=L; j++) + n[j][ (int)X[k][j]]--; + } + } //end for (k) + nseqs[i]=nseqi; + + // If subalignment changed: update weights wi[k] and Neff[i] + if (change) + { + // Initialize weights and numbers of residues for subalignment i + ncol=0; + for (k=0; k<N_in; k++) + wi[k]=1E-8; // for pathological alignments all wi[k] can get 0; /* MR1 */ + + // sum wi[k] over all columns j and sequences k of subalignment +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(naa, a, k) +#endif + for (j=1; j<=L; j++) + { + // do at least a fraction MAXENDGAPFRAC of sequences in subalignment contain an end gap in j? + if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; + naa=0; + for (a=0; a<20; a++) + if(n[j][a]) + naa++; + if (naa==0) continue; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + ncol++; + for (k=0; k<N_in; k++) + { + if (in[k] && X[k][i]<ANY && X[k][j]<ANY) + { + // if (!n[j][ (int)X[k][j]]) {fprintf(stderr,"Error: Mi=%i: n[%i][X[%i]]=0! (X[%i]=%i)\n",i,j,k,k,X[k][j]);} +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + wi[k]+=1.0/float(n[j][ (int)X[k][j] ]*naa); + } + } + } // end for (j) + + // Check whether number of columns in subalignment is sufficient + if (ncol<NCOLMIN) + // Take global weights + for (k=0; k<N_in; k++) + if(in[k] && X[k][i]<ANY) + wi[k]=wg[k]; + else wi[k]=0.0; + + // Calculate Neff[i] + Neff[i]=0.0; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(a, k, fj) +#endif + for (j=1; j<=L; j++) + { + // do at least a fraction MAXENDGAPFRA of sequences in subalignment contain an end gap in j? + if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; + for (a=0; a<20; a++) fj[a]=0; + for (k=0; k<N_in; k++) + if (in[k] && X[k][i]<ANY && X[k][j]<ANY) + fj[ (int)X[k][j] ]+=wi[k]; + NormalizeTo1(fj,NAA); + for (a=0; a<20; a++) + if (fj[a]>1E-10) +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + Neff[i]-=fj[a]*fast_log2(fj[a]); + } + if (ncol>0) + Neff[i]=pow(2.0,Neff[i]/ncol); + else + Neff[i]=1.0; + } // end change + + else //no update was necessary; copy values for i-1 + { + Neff[i]=Neff[i-1]; + } + } // end par.wg==0 // Calculate amino acid frequencies q.f[i][a] from weights wi[k] - for (a=0; a<20; a++) q.f[i][a]=0; - for (k=0; k<N_in; k++) if (in[k]) q.f[i][ (int)X[k][i] ]+=wi[k]; + for (a=0; a<20; a++) + q.f[i][a]=0; + for (k=0; k<N_in; k++) + if (in[k]) + q.f[i][ (int)X[k][i] ]+=wi[k]; NormalizeTo1(q.f[i],NAA,pb); // Calculate transition probabilities from M state q.tr[i][M2M]=q.tr[i][M2D]=q.tr[i][M2I]=0.0; for (k=0; k<N_in; k++) //for all sequences - { - if (!in[k]) continue; - //if input alignment is local ignore transitions from and to end gaps - if (X[k][i]<ANY) //current state is M - { - if (I[k][i]) //next state is I - q.tr[i][M2I]+=wi[k]; - else if (X[k][i+1]<=ANY) //next state is M - q.tr[i][M2M]+=wi[k]; - else if (X[k][i+1]==GAP) //next state is D - q.tr[i][M2D]+=wi[k]; - } - } // end for(k) + { + if (!in[k]) continue; + //if input alignment is local ignore transitions from and to end gaps + if (X[k][i]<ANY) //current state is M + { + if (I[k][i]) //next state is I + q.tr[i][M2I]+=wi[k]; + else if (X[k][i+1]<=ANY) //next state is M + q.tr[i][M2M]+=wi[k]; + else if (X[k][i+1]==GAP) //next state is D + q.tr[i][M2D]+=wi[k]; + } + } // end for(k) // Normalize and take log sum = q.tr[i][M2M]+q.tr[i][M2I]+q.tr[i][M2D]+FLT_MIN; q.tr[i][M2M]=log2(q.tr[i][M2M]/sum); @@ -1959,17 +2052,17 @@ Alignment::Amino_acid_frequencies_and_transitions_from_M_state(HMM& q, char* in) q.tr[i][M2D]=log2(q.tr[i][M2D]/sum); // for (k=0; k<N_in; k++) if (in[k]) w[k][i]=wi[k]; - } - // DD TODO:fill in all the missing Neff values + } // end loop through alignment columns i + // DD TODO:fill in all the missing Neff values - // end loop through alignment columns i ////////////////////////////////////////////////////////////////////////////////////////////// delete[](wi); wi=NULL; // delete n[][] for (j=1; j<=L; j++){ - delete[](n[j]); (n[j]) = NULL; + delete[](n[j]); + (n[j]) = NULL; } delete[](n); (n) = NULL; @@ -1982,41 +2075,60 @@ Alignment::Amino_acid_frequencies_and_transitions_from_M_state(HMM& q, char* in) q.Neff_M[0]=99.999; // Neff_av[0] is used for calculation of transition pseudocounts for the start state // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities - for (a=0; a<20; a++) q.f[0][a]=q.f[L+1][a]=pb[a]; + for (a=0; a<20; a++) + q.f[0][a]=q.f[L+1][a]=pb[a]; // Assign Neff_M[i] and calculate average over alignment, Neff_M[0] if (par.wg==1) - { + { +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(a) +#endif for (i=1; i<=L; i++) - { - float sum=0.0f; - for (a=0; a<20; a++) - if (q.f[i][a]>1E-10) sum -= q.f[i][a]*fast_log2(q.f[i][a]); - q.Neff_HMM+=pow(2.0,sum); - } + { + float sum=0.0f; + for (a=0; a<20; a++) + if (q.f[i][a]>1E-10) + sum -= q.f[i][a]*fast_log2(q.f[i][a]); +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + q.Neff_HMM+=pow(2.0,sum); + } q.Neff_HMM/=L; float Nlim=fmax(10.0,q.Neff_HMM+1.0); // limiting Neff float scale=log2((Nlim-q.Neff_HMM)/(Nlim-1.0)); // for calculating Neff for those seqs with inserts at specific pos +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(k) +#endif for (i=1; i<=L; i++) - { - float w_M=-1.0/N_filtered; - for (k=0; k<N_in; k++) - if (in[k] && X[k][i]<=ANY) w_M+=wg[k]; - if (w_M<0) q.Neff_M[i]=1.0; - else q.Neff_M[i] = Nlim - (Nlim-1.0)*fpow2(scale*w_M); - // fprintf(stderr,"M i=%3i ncol=--- Neff_M=%5.2f Nlim=%5.2f w_M=%5.3f Neff_M=%5.2f\n",i,q.Neff_HMM,Nlim,w_M,q.Neff_M[i]); - } - } + { + float w_M=-1.0/N_filtered; + for (k=0; k<N_in; k++) + if (in[k] && X[k][i]<=ANY) + w_M+=wg[k]; + if (w_M<0) + q.Neff_M[i]=1.0; + else + q.Neff_M[i] = Nlim - (Nlim-1.0)*fpow2(scale*w_M); + // fprintf(stderr,"M i=%3i ncol=--- Neff_M=%5.2f Nlim=%5.2f w_M=%5.3f Neff_M=%5.2f\n",i,q.Neff_HMM,Nlim,w_M,q.Neff_M[i]); + } + } else - { + { for (i=1; i<=L; i++) - { - q.Neff_HMM+=Neff[i]; - q.Neff_M[i]=Neff[i]; - if (q.Neff_M[i] == 0) { q.Neff_M[i] = 1; } /* MR1 */ - } + { + q.Neff_HMM+=Neff[i]; + q.Neff_M[i]=Neff[i]; + if (q.Neff_M[i] == 0) { + q.Neff_M[i] = 1; + } /* MR1 */ + } q.Neff_HMM/=L; - } + } // end par.wg==1 delete[] Neff; Neff = NULL; @@ -2266,158 +2378,200 @@ Alignment::Transitions_from_D_state(HMM& q, char* in) // Global weights? if (par.wg==1) { - for (k=0; k<N_in; k++) wi[k]=wg[k]; + for (k=0; k<N_in; k++) + wi[k]=wg[k]; Nlim=fmax(10.0,q.Neff_HMM+1.0); // limiting Neff scale=log2((Nlim-q.Neff_HMM)/(Nlim-1.0)); // for calculating Neff for those seqs with dels at specific pos } // Initialization n = new(int*[L+2]); - for (j=1; j<=L; j++) n[j]=new(int[NAA+3]); - for (j=1; j<=L; j++) - for (a=0; a<NAA+3; a++) n[j][a]=0; - - +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(a) +#endif + for (j=1; j<=L; j++) { + n[j]=new(int[NAA+3]); + for (a=0; a<NAA+3; a++) + n[j][a]=0; + } ////////////////////////////////////////////////////////////////////////////////////////////// // Main loop through alignment columns for (i=1; i<=L; i++) // Calculate wi[k] at position i as well as Neff[i] + { + if (par.wg==0) // if local weights { - if (par.wg==0) // if local weights + change=0; + // Check all sequences k and update n[j][a] and ri[j] if necessary + for (k=0; k<N_in; k++) + { + if (!in[k]) continue; + if (X[k][i-1]!=GAP && X[k][i]==GAP) + { // ... if sequence k was NOT included in i-1 and has to be included for column i + change=1; + nseqi++; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) +#endif + for (int j=1; j<=L; j++) + n[j][ (int)X[k][j]]++; + } + else if (X[k][i-1]==GAP && X[k][i]!=GAP) + { // ... if sequence k WAS included in i-1 and has to be thrown out for column i + change=1; + nseqi--; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) +#endif + for (int j=1; j<=L; j++) + n[j][ (int)X[k][j]]--; + } + } //end for (k) + nseqs[i]=nseqi; + + // If there is no sequence in subalignment j ... + if (nseqi==0) + { + ncol=0; + Neff[i]=0.0; // effective number of sequences = 0! + q.tr[i][D2M]=-100000; + q.tr[i][D2D]=-100000; + continue; + } + + // If subalignment changed: update weights wi[k] and Neff[i] + if (change) + { + // Initialize weights and numbers of residues for subalignment i + ncol=0; + for (k=0; k<N_in; k++) + wi[k]=0.0; + + // sum wg[k][i] over all columns j and sequences k of subalignment +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(naa, a, k) +#endif + for (j=1; j<=L; j++) { - change=0; - // Check all sequences k and update n[j][a] and ri[j] if necessary + if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; + naa=0; + for (a=0; a<20; a++) + if(n[j][a]) + naa++; + if (naa==0) continue; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + ncol++; for (k=0; k<N_in; k++) + { + if (in[k] && X[k][i]==GAP && X[k][j]<ANY) { - if (!in[k]) continue; - if (X[k][i-1]!=GAP && X[k][i]==GAP) - { // ... if sequence k was NOT included in i-1 and has to be included for column i - change=1; - nseqi++; - for (int j=1; j<=L; j++) n[j][ (int)X[k][j]]++; - } - else if (X[k][i-1]==GAP && X[k][i]!=GAP) - { // ... if sequence k WAS included in i-1 and has to be thrown out for column i - change=1; - nseqi--; - for (int j=1; j<=L; j++) n[j][ (int)X[k][j]]--; - } - } //end for (k) - nseqs[i]=nseqi; - - // If there is no sequence in subalignment j ... - if (nseqi==0) - { - ncol=0; - Neff[i]=0.0; // effective number of sequences = 0! - q.tr[i][D2M]=-100000; - q.tr[i][D2D]=-100000; - continue; + if (!n[j][ (int)X[k][j]]) {fprintf(stderr,"Error: Di=%i: n[%i][X[%i]]=0! (X[%i]=%i)\n",i,j,k,k,X[k][j]);} +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + wi[k]+=1.0/float(n[j][ (int)X[k][j] ]*naa); } + } + } // end for (j) + // Check whether number of columns in subalignment is sufficient + if (ncol<NCOLMIN) + // Take global weights + for (k=0; k<N_in; k++) + if(in[k] && X[k][i]==GAP) + wi[k]=wg[k]; + else + wi[k]=0.0; + + // Calculate Neff[i] + Neff[i]=0.0; +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp parallel for schedule(static) private(a, k, fj) +#endif + for (j=1; j<=L; j++) + { + if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; + for (a=0; a<20; a++) fj[a]=0; + for (k=0; k<N_in; k++) + if (in[k] && X[k][i]==GAP && X[k][j]<ANY) + fj[ (int)X[k][j] ]+=wi[k]; + NormalizeTo1(fj,NAA); + for (a=0; a<20; a++) + if (fj[a]>1E-10) +//#if 0 +#ifdef HAVE_OPENMP +#pragma omp atomic +#endif + Neff[i]-=fj[a]*fast_log2(fj[a]); + } + if (ncol>0) Neff[i]=pow(2.0,Neff[i]/ncol); else Neff[i]=1.0; - // If subalignment changed: update weights wi[k] and Neff[i] - if (change) - { - // Initialize weights and numbers of residues for subalignment i - ncol=0; - for (k=0; k<N_in; k++) wi[k]=0.0; - - // sum wg[k][i] over all columns j and sequences k of subalignment - for (j=1; j<=L; j++) - { - if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; - naa=0; for (a=0; a<20; a++) if(n[j][a]) naa++; - if (naa==0) continue; - ncol++; - for (k=0; k<N_in; k++) - { - if (in[k] && X[k][i]==GAP && X[k][j]<ANY) - { - if (!n[j][ (int)X[k][j]]) {fprintf(stderr,"Error: Di=%i: n[%i][X[%i]]=0! (X[%i]=%i)\n",i,j,k,k,X[k][j]);} - wi[k]+=1.0/float(n[j][ (int)X[k][j] ]*naa); - } - } - } - - // Check whether number of columns in subalignment is sufficient - if (ncol<NCOLMIN) - // Take global weights - for (k=0; k<N_in; k++) - if(in[k] && X[k][i]==GAP) wi[k]=wg[k]; else wi[k]=0.0; - - // Calculate Neff[i] - Neff[i]=0.0; - for (j=1; j<=L; j++) - { - if (n[j][ENDGAP]>MAXENDGAPFRAC*nseqi) continue; - for (a=0; a<20; a++) fj[a]=0; - for (k=0; k<N_in; k++) - if (in[k] && X[k][i]==GAP && X[k][j]<ANY) - fj[ (int)X[k][j] ]+=wi[k]; - NormalizeTo1(fj,NAA); - for (a=0; a<20; a++) - if (fj[a]>1E-10) Neff[i]-=fj[a]*fast_log2(fj[a]); - } - if (ncol>0) Neff[i]=pow(2.0,Neff[i]/ncol); else Neff[i]=1.0; - - } + } - else //no update was necessary; copy values for i-1 - { - Neff[i]=Neff[i-1]; - } + else //no update was necessary; copy values for i-1 + { + Neff[i]=Neff[i-1]; + } - // Calculate transition probabilities from D state - q.tr[i][D2M]=q.tr[i][D2D]=0.0; - for (k=0; k<N_in; k++) //for all sequences - { - if (in[k] && X[k][i]==GAP) //current state is D - { - if (X[k][i+1]==GAP) //next state is D - q.tr[i][D2D]+=wi[k]; - else if (X[k][i+1]<=ANY) //next state is M - q.tr[i][D2M]+=wi[k]; - } - } // end for(k) + // Calculate transition probabilities from D state + q.tr[i][D2M]=q.tr[i][D2D]=0.0; + for (k=0; k<N_in; k++) //for all sequences + { + if (in[k] && X[k][i]==GAP) //current state is D + { + if (X[k][i+1]==GAP) //next state is D + q.tr[i][D2D]+=wi[k]; + else if (X[k][i+1]<=ANY) //next state is M + q.tr[i][D2M]+=wi[k]; } + } // end for(k) + } - else // fast global weights? + else // fast global weights? + { + float w_D=-1.0/N_filtered; + ncol=0; + q.tr[i][D2M]=q.tr[i][D2D]=0.0; + // Calculate amino acid frequencies fj[a] from weights wg[k] + for (k=0; k<N_in; k++) //for all sequences + if (in[k] && X[k][i]==GAP) //current state is D { - float w_D=-1.0/N_filtered; - ncol=0; - q.tr[i][D2M]=q.tr[i][D2D]=0.0; - // Calculate amino acid frequencies fj[a] from weights wg[k] - for (k=0; k<N_in; k++) //for all sequences - if (in[k] && X[k][i]==GAP) //current state is D - { - ncol++; - w_D+=wg[k]; - if (X[k][i+1]==GAP) //next state is D - q.tr[i][D2D]+=wi[k]; - else if (X[k][i+1]<=ANY) //next state is M - q.tr[i][D2M]+=wi[k]; - } - if (ncol>0) - { - if (w_D<0) Neff[i]=1.0; - else Neff[i] = Nlim - (Nlim-1.0)*fpow2(scale*w_D); - // fprintf(stderr,"D i=%3i ncol=%3i Neff_M=%5.2f Nlim=%5.2f w_D=%5.3f Neff_D=%5.2f\n",i,ncol,q.Neff_HMM,Nlim,w_D,Neff[i]); - } - else - { - Neff[i]=0.0; // effective number of sequences = 0! - q.tr[i][D2M]=-100000; - q.tr[i][D2D]=-100000; - continue; - } + ncol++; + w_D+=wg[k]; + if (X[k][i+1]==GAP) //next state is D + q.tr[i][D2D]+=wi[k]; + else if (X[k][i+1]<=ANY) //next state is M + q.tr[i][D2M]+=wi[k]; } + if (ncol>0) + { + if (w_D<0) Neff[i]=1.0; + else Neff[i] = Nlim - (Nlim-1.0)*fpow2(scale*w_D); + // fprintf(stderr,"D i=%3i ncol=%3i Neff_M=%5.2f Nlim=%5.2f w_D=%5.3f Neff_D=%5.2f\n",i,ncol,q.Neff_HMM,Nlim,w_D,Neff[i]); + } + else + { + Neff[i]=0.0; // effective number of sequences = 0! + q.tr[i][D2M]=-100000; + q.tr[i][D2D]=-100000; + continue; + } + } - // Normalize and take log - sum = q.tr[i][D2M]+q.tr[i][D2D]; - q.tr[i][D2M]=log2(q.tr[i][D2M]/sum); - q.tr[i][D2D]=log2(q.tr[i][D2D]/sum); + // Normalize and take log + sum = q.tr[i][D2M]+q.tr[i][D2D]; + q.tr[i][D2M]=log2(q.tr[i][D2M]/sum); + q.tr[i][D2D]=log2(q.tr[i][D2D]/sum); - } + } // end loop through alignment columns i ////////////////////////////////////////////////////////////////////////////////////////////// @@ -2426,15 +2580,14 @@ Alignment::Transitions_from_D_state(HMM& q, char* in) q.Neff_D[0]=99.999; // Assign Neff_D[i] - for (i=1; i<=L; i++) + for (i=1; i<=L; i++) { q.Neff_D[i]=Neff[i]; - - delete[](wi); wi = NULL;/* FIXME: FS */ - // delete n[][] - for (j=1; j<=L; j++){ - delete[](n[j]); (n[j]) = NULL; + delete[](n[i]); + (n[i]) = NULL; } + delete[](n); (n) = NULL; + delete[](wi); wi = NULL;/* FIXME: FS */ delete[] Neff; Neff = NULL; return; diff --git a/src/hhalign/hhfullalignment-C.h b/src/hhalign/hhfullalignment-C.h index 5fa477d..9140c12 100644 --- a/src/hhalign/hhfullalignment-C.h +++ b/src/hhalign/hhfullalignment-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhfullalignment-C.h 284 2013-06-12 10:10:11Z fabian $ + * RCS $Id: hhfullalignment-C.h 309 2016-06-13 14:10:11Z fabian $ */ // hhfullalignment.C @@ -48,6 +48,7 @@ using std::endl; #include "hhhit.h" #include "hhhalfalignment.h" #endif +//#include "new_new.h" /* memory tracking */ diff --git a/src/hhalign/hhfunc-C.h b/src/hhalign/hhfunc-C.h index cb41ef2..8b4152b 100644 --- a/src/hhalign/hhfunc-C.h +++ b/src/hhalign/hhfunc-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhfunc-C.h 290 2013-09-20 15:18:12Z fabian $ + * RCS $Id: hhfunc-C.h 309 2016-06-13 14:10:11Z fabian $ */ /* @@ -29,6 +29,7 @@ // hhfunc.C +//#include "new_new.h" /* memory tracking */ /** * AddBackgroundFrequencies() @@ -218,7 +219,7 @@ AddBackgroundFrequencies(float **ppfFreq, float **ppfPseudoF, float **ppfPseudoT * * @param[in] iRnPtype * type of read/prepare - * enum {INTERN_ALN_2_HMM = 0, READ_ALN_2_HMM, READ_HMM_2_HMM}; + * enum {INTERN_ALN_2_HMM = 0, READ_ALN_2_HMM, READ_HMM_2_HMM, INTERN_HMM_2_HMM}; * @param[in] ppcProf * alignment * @param[in] iCnt @@ -1141,7 +1142,7 @@ readHMMWrapper(hmm_light *rHMM_p, } /* (0 <= iA < AMINOACIDS) */ rHMM_p->seq[rHMM_p->ncons][i] = i2aa(iAmax); } /* (0 <= i < rHMM_p->L) */ - + rHMM_p->seq[rHMM_p->ncons][i] = '\0'; /* FS, r291 -> */ } /* ncons not set */ diff --git a/src/hhalign/hhhalfalignment-C.h b/src/hhalign/hhhalfalignment-C.h index 25c4e3c..53f1d97 100644 --- a/src/hhalign/hhhalfalignment-C.h +++ b/src/hhalign/hhhalfalignment-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhhalfalignment-C.h 227 2011-03-28 17:03:09Z fabian $ + * RCS $Id: hhhalfalignment-C.h 309 2016-06-13 14:10:11Z fabian $ */ // hhfullalignment.C @@ -47,6 +47,7 @@ using std::endl; #include "hhalignment.h" // class Alignment #include "hhhit.h" #endif +//#include "new_new.h" /* memory tracking */ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// diff --git a/src/hhalign/hhhit-C.h b/src/hhalign/hhhit-C.h index 981ad18..e047946 100644 --- a/src/hhalign/hhhit-C.h +++ b/src/hhalign/hhhit-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhhit-C.h 284 2013-06-12 10:10:11Z fabian $ + * RCS $Id: hhhit-C.h 308 2016-06-13 14:07:35Z fabian $ */ // hhhit.C @@ -47,6 +47,7 @@ using std::ofstream; #include "hhalignment.h" // class Alignment #include "hhhitlist.h" // class HitList #endif +//#include "new_new.h" /* memory tracking */ #define CALCULATE_MAX6(max, var1, var2, var3, var4, var5, var6, varb) \ if (var1>var2) { max=var1; varb=STOP;} \ @@ -159,10 +160,10 @@ Hit::Delete() delete[] dbfile; dbfile = NULL; for (int k=0; k<n_display; k++) { - //delete[] sname[k]; sname[k] = NULL; + delete[] sname[k]; sname[k] = NULL; /* this seems to be the long lost piece of memory, FS, 2016-06-09 */ delete[] seq[k]; seq[k] = NULL; } - //delete[] sname; sname = NULL; + delete[] sname; sname = NULL; /* this seems to be the long lost piece of memory, FS, 2016-06-09 */ delete[] seq; seq = NULL; } } diff --git a/src/hhalign/hhhitlist-C.h b/src/hhalign/hhhitlist-C.h index ec2a5ef..95777fc 100644 --- a/src/hhalign/hhhitlist-C.h +++ b/src/hhalign/hhhitlist-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhhitlist-C.h 284 2013-06-12 10:10:11Z fabian $ + * RCS $Id: hhhitlist-C.h 307 2016-06-13 14:04:39Z fabian $ */ // hhhitlist.C @@ -49,6 +49,7 @@ using std::endl; #include "hhhalfalignment.h" #include "hhfullalignment.h" #endif +//#include "new_new.h" /* memory tracking */ ////////////////////////////////////////////////////////////////////////////// @@ -3191,7 +3192,10 @@ HitList::ClobberGlobal(void){ } // printf("POINTER:\t\t\t%p=TAIL\n", tail); - + if ( (NULL != current) && (head != current) ){ + delete current; /* this seems to be the long lost piece of memory, FS, 2016-04-15 */ + current = NULL; + } head->next = tail; tail->prev = head; size = 0; diff --git a/src/hhalign/hhhmm-C.h b/src/hhalign/hhhmm-C.h index 84760a3..ce4d7db 100644 --- a/src/hhalign/hhhmm-C.h +++ b/src/hhalign/hhhmm-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: hhhmm-C.h 284 2013-06-12 10:10:11Z fabian $ + * RCS $Id: hhhmm-C.h 309 2016-06-13 14:10:11Z fabian $ */ @@ -45,6 +45,7 @@ using std::ofstream; #include "hhdecl-C.h" #include "hhutil-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc. #endif +//#include "new_new.h" /* memory tracking */ // #ifndef WNLIB // #define WNLIB diff --git a/src/hhalign/util-C.h b/src/hhalign/util-C.h index b422f35..224ce17 100644 --- a/src/hhalign/util-C.h +++ b/src/hhalign/util-C.h @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: util-C.h 155 2010-11-17 12:18:47Z fabian $ + * RCS $Id: util-C.h 309 2016-06-13 14:10:11Z fabian $ */ // Utility subroutines @@ -29,6 +29,7 @@ #include <time.h> // clock #endif #include <sys/time.h> +//#include "new_new.h" /* memory tracking */ ///////////////////////////////////////////////////////////////////////////////////// // Arithmetics diff --git a/src/mymain.c b/src/mymain.c index c2dba54..16806e5 100644 --- a/src/mymain.c +++ b/src/mymain.c @@ -15,7 +15,7 @@ ********************************************************************/ /* - * RCS $Id: mymain.c 290 2013-09-20 15:18:12Z fabian $ + * RCS $Id: mymain.c 296 2014-10-07 12:15:41Z fabian $ */ #ifdef HAVE_CONFIG_H @@ -140,6 +140,7 @@ SetDefaultUserOpts(cmdline_opts_t *opts) opts->bIsProfile = FALSE; opts->aln_opts.bUseKimura = FALSE; opts->aln_opts.bPercID = FALSE; + opts->aln_opts.pcHMMBatch = NULL; opts->iMaxNumSeq = INT_MAX; opts->iMaxSeqLen = INT_MAX; @@ -358,6 +359,8 @@ ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv) struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>", /*min*/ 0, /*max*/ 128, "HMM input files"); + struct arg_file *opt_hmm_batch = arg_file0(NULL, "hmm-batch", "<file>", /* FS, r291 -> */ + "specify HMMs for individual sequences"); struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign", "Dealign input sequences"); struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1", @@ -491,6 +494,7 @@ ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv) void *argtable[] = {rem_seq_input, opt_seqin, opt_hmm_in, + opt_hmm_batch, /* FS, r291 -> */ opt_dealign, opt_profile1, opt_profile2, @@ -785,6 +789,16 @@ ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv) } + /* HMM Batch, FS, r291 -> + */ + user_opts->aln_opts.pcHMMBatch = NULL; + if (opt_hmm_batch->count>0){ + user_opts->aln_opts.pcHMMBatch = CkStrdup(opt_hmm_batch->filename[0]); + if (! FileExists(user_opts->aln_opts.pcHMMBatch)){ + Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcHMMBatch); + } + } + /* Pair distance method */ if (opt_pairdist->count > 0) { @@ -1058,7 +1072,7 @@ MyMain(int argc, char **argv) if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile, cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat, cmdline_opts.bIsProfile, cmdline_opts.bDealignInputSeqs, - cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) { + cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen, cmdline_opts.aln_opts.pcHMMBatch)) { Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile); } #if TRACE @@ -1106,7 +1120,7 @@ MyMain(int argc, char **argv) if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile, cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat, cmdline_opts.bIsProfile, cmdline_opts.bDealignInputSeqs, - cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) { + cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen, cmdline_opts.aln_opts.pcHMMBatch)) { Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed", cmdline_opts.pcProfile1Infile); } @@ -1132,7 +1146,7 @@ MyMain(int argc, char **argv) if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile, cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat, cmdline_opts.bIsProfile, cmdline_opts.bDealignInputSeqs, - cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) { + cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen, cmdline_opts.aln_opts.pcHMMBatch)) { Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed", cmdline_opts.pcProfile2Infile); } diff --git a/src/squid/a2m.c b/src/squid/a2m.c index 28fe3a3..b5032ff 100644 --- a/src/squid/a2m.c +++ b/src/squid/a2m.c @@ -11,7 +11,7 @@ * * reading/writing A2M (aligned FASTA) files. * - * RCS $Id: a2m.c 290 2013-09-20 15:18:12Z fabian $ (Original squid RCS Id: a2m.c,v 1.1 1999/07/15 22:26:40 eddy Exp) + * RCS $Id: a2m.c 297 2014-10-31 13:02:37Z fabian $ (Original squid RCS Id: a2m.c,v 1.1 1999/07/15 22:26:40 eddy Exp) */ #include <stdio.h> @@ -148,4 +148,9 @@ WriteA2M(FILE *fp, MSA *msa) fprintf(fp, "\n");*/ } -} + +#ifdef CLUSTALO + free(buf); buf = NULL; +#endif + +} /* this is the end of WriteA2M() */ diff --git a/src/squid/msa.c b/src/squid/msa.c index 20200dd..7c19858 100644 --- a/src/squid/msa.c +++ b/src/squid/msa.c @@ -13,7 +13,7 @@ * SQUID's interface for multiple sequence alignment * manipulation: access to the MSA object. * - * RCS $Id: msa.c 291 2014-02-27 18:20:54Z fabian $ (Original squid RCS Id: msa.c,v 1.18 2002/10/12 04:40:35 eddy Exp) + * RCS $Id: msa.c 297 2014-10-31 13:02:37Z fabian $ (Original squid RCS Id: msa.c,v 1.18 2002/10/12 04:40:35 eddy Exp) */ #include <stdio.h> @@ -98,6 +98,8 @@ MSAAlloc(int nseq, int alen) msa->sslen = NULL; msa->sa = NULL; msa->salen = NULL; + msa->co = NULL; + msa->colen = NULL; msa->index = GKIInit(); msa->lastidx = 0; @@ -253,6 +255,7 @@ MSAFree(MSA *msa) Free2DArray((void **) msa->sqdesc, msa->nseq); Free2DArray((void **) msa->ss, msa->nseq); Free2DArray((void **) msa->sa, msa->nseq); + Free2DArray((void **) msa->co, msa->nseq); if (msa->sqlen != NULL) free(msa->sqlen); if (msa->wgt != NULL) free(msa->wgt); @@ -663,7 +666,7 @@ MSAVerifyParse(MSA *msa) if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s", msa->name != NULL ? msa->name : ""); - msa->alen = msa->sqlen[0]; + msa->alen = msa->sqlen[0]; /* We can rely on msa->sqname[] being valid for any index, * because of the way the line parsers always store any name @@ -681,7 +684,7 @@ MSAVerifyParse(MSA *msa) msa->sqname[idx], msa->name != NULL ? msa->name : ""); /* all aseq must be same length. */ - if (msa->sqlen[idx] != msa->alen) + if (msa->sqlen[idx] != msa->alen) Die("Parse error: sequence %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->sqlen[idx], msa->alen, msa->name != NULL ? msa->name : ""); @@ -698,13 +701,13 @@ MSAVerifyParse(MSA *msa) } /* if cons SS is present, must have length right */ - if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen) + if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen) /* FIXME */ Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s", strlen(msa->ss_cons), msa->alen, msa->name != NULL ? msa->name : ""); /* if cons SA is present, must have length right */ - if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen) + if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen) /* FIXME */ Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s", strlen(msa->sa_cons), msa->alen, msa->name != NULL ? msa->name : ""); @@ -728,6 +731,70 @@ MSAVerifyParse(MSA *msa) } +/* @* MSAVerifyParseDub */ +void +MSAVerifyParseDub(MSA *msa) +{ + int idx; + + if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s", + msa->name != NULL ? msa->name : ""); + + msa->alen = msa->sqlen[0]; + + /* We can rely on msa->sqname[] being valid for any index, + * because of the way the line parsers always store any name + * they add to the index. + */ + for (idx = 0; idx < msa->nseq; idx++) + { + /* aseq is required. */ + if (msa->aseq[idx] == NULL) + Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx], + msa->name != NULL ? msa->name : ""); + /* either all weights must be set, or none of them */ + if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0) + Die("Parse error: some weights are set, but %s doesn't have one in alignment %s", + msa->sqname[idx], + msa->name != NULL ? msa->name : ""); + /* this is Dublin format, aseq don't have to be same length. */ + if (msa->sqlen[idx] > msa->alen) { + msa->alen = msa->sqlen[idx]; + } + /* if SS is present, must have length right */ + if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->sqlen[idx]) + Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s", + msa->sqname[idx], msa->sslen[idx], msa->sqlen[idx], + msa->name != NULL ? msa->name : ""); + /* if SA is present, must have length right */ + if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->sqlen[idx]) + Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s", + msa->sqname[idx], msa->salen[idx], msa->sqlen[idx], + msa->name != NULL ? msa->name : ""); + + /* if CO is present, must have length right */ + if (msa->co != NULL && msa->co[idx] != NULL && msa->colen[idx] != msa->sqlen[idx]) + Die("Parse error: #=GR CO annotation for %s: length %d, expected %d in alignment %s", + msa->sqname[idx], msa->colen[idx], msa->sqlen[idx], + msa->name != NULL ? msa->name : ""); + } + + + + + /* Check that all or no weights are set */ + if (!(msa->flags & MSA_SET_WGT)) + FSet(msa->wgt, msa->nseq, 1.0); /* default weights */ + + /* Clean up a little from the parser */ + if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; } + if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; } + if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; } + if (msa->colen != NULL) { free(msa->colen); msa->colen = NULL; } + + return; +} /* this is the end of MSAVerifyParseDub() */ + /* Function: MSAFileOpen() @@ -919,6 +986,7 @@ MSAFileRead(MSAFILE *afp) case MSAFILE_CLUSTAL: msa = ReadClustal(afp); break; case MSAFILE_SELEX: msa = ReadSELEX(afp); break; case MSAFILE_PHYLIP: msa = ReadPhylip(afp); break; + case MSAFILE_DUBLIN: msa = ReadDublin(afp); break; /* -> r296, FS */ default: Die("MSAFILE corrupted: bad format index"); } diff --git a/src/squid/msa.h b/src/squid/msa.h index dec244b..ef0854d 100644 --- a/src/squid/msa.h +++ b/src/squid/msa.h @@ -16,7 +16,7 @@ * Header file for SQUID's multiple sequence alignment * manipulation code. * - * RCS $Id: msa.h 291 2014-02-27 18:20:54Z fabian $ (Original squid RCS Id: msa.h,v 1.12 2002/10/12 04:40:35 eddy Exp) + * RCS $Id: msa.h 297 2014-10-31 13:02:37Z fabian $ (Original squid RCS Id: msa.h,v 1.12 2002/10/12 04:40:35 eddy Exp) */ #include <stdio.h> /* FILE support */ @@ -130,6 +130,7 @@ typedef struct msa_struct { char **sqacc; /* accession numbers for individual sequences */ char **sqdesc; /* description lines for individual sequences */ char **ss; /* per-seq secondary structure annotation, or NULL */ + char **co; /* per-seq confidence of secondary structure annotation, or NULL, -> r296, FS */ char **sa; /* per-seq surface accessibility annotation, or NULL */ float cutoff[MSA_MAXCUTOFFS]; /* NC, TC, GA cutoffs propagated to Pfam/Rfam */ int cutoff_is_set[MSA_MAXCUTOFFS];/* TRUE if a cutoff is set; else FALSE */ @@ -171,6 +172,7 @@ typedef struct msa_struct { int *sqlen; /* individual sequence lengths during parsing */ int *sslen; /* individual ss lengths during parsing */ int *salen; /* individual sa lengths during parsing */ + int *colen; /* individual co lengths during parsing */ int lastidx; /* last index we saw; use for guessing next */ } MSA; #define MSA_SET_WGT (1 << 0) /* track whether wgts were set, or left at default 1.0 */ @@ -214,6 +216,7 @@ typedef struct msafile_struct { #define MSAFILE_EPS 107 /* Encapsulated PostScript (output only) */ #ifdef CLUSTALO #define MSAFILE_VIENNA 108 /* Vienna: concatenated fasta */ +#define MSAFILE_DUBLIN 109 /* Dublin: modified Stockholm format */ #endif #define IsAlignmentFormat(fmt) ((fmt) > 100) @@ -248,6 +251,7 @@ extern void MSAAppendGC(MSA *msa, char *tag, char *value); extern char *MSAGetGC(MSA *msa, char *tag); extern void MSAAppendGR(MSA *msa, char *tag, int seqidx, char *value); extern void MSAVerifyParse(MSA *msa); +extern void MSAVerifyParseDub(MSA *msa); extern int MSAGetSeqidx(MSA *msa, char *name, int guess); extern MSA *MSAFromAINFO(char **aseq, AINFO *ainfo); @@ -304,6 +308,7 @@ extern void WriteSELEXOneBlock(FILE *fp, MSA *msa); /* from stockholm.c */ +extern MSA *ReadDublin(MSAFILE *afp); extern MSA *ReadStockholm(MSAFILE *afp); extern void WriteStockholm(FILE *fp, MSA *msa); extern void WriteStockholmOneBlock(FILE *fp, MSA *msa); diff --git a/src/squid/sqio.c b/src/squid/sqio.c index aa17744..dfd403a 100644 --- a/src/squid/sqio.c +++ b/src/squid/sqio.c @@ -336,7 +336,12 @@ FreeSequence(char *seq, SQINFO *sqinfo) free(sqinfo->sa); } } -} + if (sqinfo->flags & SQINFO_CO){ + if (NULL != sqinfo->co){ /* FS, r296 -> */ + free(sqinfo->co); + } + } +} /* this is the end of FreeSequence() */ int SetSeqinfoString(SQINFO *sqinfo, char *sptr, int flag) @@ -438,6 +443,7 @@ SeqinfoCopy(SQINFO *sq1, SQINFO *sq2) if (sq2->flags & SQINFO_TYPE) sq1->type = sq2->type; if (sq2->flags & SQINFO_SS) sq1->ss = Strdup(sq2->ss); if (sq2->flags & SQINFO_SA) sq1->sa = Strdup(sq2->sa); + if (sq2->flags & SQINFO_CO) sq1->co = Strdup(sq2->co); } /* Function: ToDNA() @@ -1124,6 +1130,16 @@ ReadSeq(SQFILE *V, int format, char **ret_seq, SQINFO *sqinfo) #endif sqinfo->flags |= SQINFO_SA; } + if (V->msa->co != NULL && V->msa->co[V->msa->lastidx] != NULL) { +/* AW: stopping squid from dealigning sequences and corresponding info */ +#ifdef CLUSTALO + sqinfo->co = sre_strdup(V->msa->co[V->msa->lastidx], V->msa->alen); +#else + MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen, + V->msa->co[V->msa->lastidx], &(sqinfo->co)); +#endif + sqinfo->flags |= SQINFO_CO; + } /* co */ V->msa->lastidx++; } else { @@ -1236,6 +1252,9 @@ SeqfileFormat(FILE *fp) strncmp(buf, "!!NA_SEQUENCE", 13) == 0) { fmt = SQFILE_GCG; goto DONE; } + if (strncmp(buf, "# DUBLIN 1.", 11) == 0) /* -> r296, FS */ + { fmt = MSAFILE_DUBLIN; goto DONE; } + if (strncmp(buf, "# STOCKHOLM 1.", 14) == 0) { fmt = MSAFILE_STOCKHOLM; goto DONE; } diff --git a/src/squid/squid.h b/src/squid/squid.h index f86bb5c..a987bfb 100644 --- a/src/squid/squid.h +++ b/src/squid/squid.h @@ -147,6 +147,7 @@ struct seqinfo_s { int type; /* kRNA, kDNA, kAmino, or kOther */ char *ss; /* 0..len-1 secondary structure string */ char *sa; /* 0..len-1 % side chain surface access. */ + char *co; /* 0..len-1 secondary struct confidence */ }; typedef struct seqinfo_s SQINFO; @@ -161,6 +162,7 @@ typedef struct seqinfo_s SQINFO; #define SQINFO_OLEN (1 << 8) #define SQINFO_SS (1 << 9) #define SQINFO_SA (1 << 10) +#define SQINFO_CO (1 << 11) /**************************************************** @@ -244,6 +246,7 @@ extern int aa_index[]; /* convert 0..19 indices to 0..26 */ /* 17 was Clustal. Now alignment format*/ #ifdef CLUSTALO #define SQFILE_VIENNA 18 /* Vienna format: concatenated fasta */ +#define SQFILE_DUBLIN 19 /* unaligned version of Stockholm */ #endif #define IsUnalignedFormat(fmt) ((fmt) && (fmt) < 100) diff --git a/src/squid/stockholm.c b/src/squid/stockholm.c index b5f0cb4..9e7aee5 100644 --- a/src/squid/stockholm.c +++ b/src/squid/stockholm.c @@ -24,7 +24,7 @@ * MSAFree(msa); * } * - * RCS $Id: stockholm.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: stockholm.c,v 1.7 2002/10/12 04:40:36 eddy Exp) + * RCS $Id: stockholm.c 297 2014-10-31 13:02:37Z fabian $ (Original squid RCS Id: stockholm.c,v 1.7 2002/10/12 04:40:36 eddy Exp) */ #include <stdio.h> #include <string.h> @@ -157,6 +157,89 @@ alignment file, right?), please either:\n\ return msa; } +/* Function: ReadDublin() + * Date: 2014-10-15 + * + * Purpose: Parse the next sequences from an open Dublin + * format file. Return the sequences, or + * NULL if there are no more sequences in the file. + * + * Args: afp - open alignment file + * + * Returns: MSA * - an alignment object. + * caller responsible for an MSAFree() + * NULL if no more alignments + * + * Diagnostics: + * Will Die() here with a (potentially) useful message + * if a parsing error occurs + */ +MSA * +ReadDublin(MSAFILE *afp) +{ + MSA *msa; + char *s; + int status; + + if (feof(afp->f)) return NULL; + + /* Initialize allocation of the MSA. + */ + msa = MSAAlloc(10, 0); + + /* Check the magic Dublin header line. + * We have to skip blank lines here, else we perceive + * trailing blank lines in a file as a format error when + * reading in multi-record mode. + */ + do { + if ((s = MSAFileGetLine(afp)) == NULL) { + MSAFree(msa); + return NULL; + } + } while (IsBlankline(s)); + + if (strncmp(s, "# DUBLIN 1.", 11) != 0) + Die("\ +File %s doesn't appear to be in Dublin format.\n", + afp->fname); + + /* Read the alignment file one line at a time. + */ + while ((s = MSAFileGetLine(afp)) != NULL) + { + while (*s == ' ' || *s == '\t') s++; /* skip leading whitespace */ + + if (*s == '#') { + if (strncmp(s, "#=GF", 4) == 0) status = parse_gf(msa, s); + else if (strncmp(s, "#=GS", 4) == 0) status = parse_gs(msa, s); + else if (strncmp(s, "#=GC", 4) == 0) status = parse_gc(msa, s); + else if (strncmp(s, "#=GR", 4) == 0) status = parse_gr(msa, s); + else status = parse_comment(msa, s); + } + else if (strncmp(s, "//", 2) == 0) break; + else if (*s == '\n') continue; + else status = parse_sequence(msa, s); + + if (status == 0) + Die("Dublin format parse error: line %d of file %s while reading alignment %s", + afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name); + } + + if (s == NULL && msa->nseq != 0) + Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name); + + if (s == NULL && msa->nseq == 0) { + /* probably just some junk at end of file */ + MSAFree(msa); + return NULL; + } + + MSAVerifyParseDub(msa); + return msa; + +} /* this is the end of ReadDublin() */ + /* Function: WriteStockholm() * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis] @@ -579,6 +662,20 @@ parse_gr(MSA *msa, char *buf) } msa->salen[seqidx] = sre_strcat(&(msa->sa[seqidx]), msa->salen[seqidx], text, len); } + else if (strcmp(featurename, "CO") == 0) + { + if (msa->co == NULL) + { + msa->co = MallocOrDie(sizeof(char *) * msa->nseqalloc); + msa->colen = MallocOrDie(sizeof(int) * msa->nseqalloc); + for (j = 0; j < msa->nseqalloc; j++) + { + msa->co[j] = NULL; + msa->colen[j] = 0; + } + } + msa->colen[seqidx] = sre_strcat(&(msa->co[seqidx]), msa->colen[seqidx], text, len); + } else MSAAppendGR(msa, featurename, seqidx, text); -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/clustalo.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
