This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository iqtree.
commit a68a042d6b808c14b5aa4ff28daa98c9afba848a Author: Andreas Tille <[email protected]> Date: Fri Jun 24 21:45:20 2016 +0200 Imported Upstream version 1.4.2+dfsg --- CMakeLists.txt | 2 +- alignment.cpp | 4 +- alignment.h | 4 +- iqtree.cpp | 114 +++++---- model/modelfactory.cpp | 124 +++++++++- model/modelfactory.h | 8 + model/modelgtr.cpp | 1 + model/partitionmodel.cpp | 33 +++ model/partitionmodel.h | 8 + model/ratefree.cpp | 1 + model/ratefree.h | 2 +- model/ratefreeinvar.cpp | 2 +- model/rategamma.cpp | 1 + model/rategamma.h | 2 +- model/rategammainvar.cpp | 36 ++- model/rategammainvar.h | 9 + model/rateheterogeneity.h | 16 ++ model/rateinvar.cpp | 6 +- model/ratekategory.cpp | 1 + msetsblock.cpp | 1 + pda.cpp | 2 +- phyloanalysis.cpp | 34 +-- phylosupertreeplen.cpp | 7 + phylosupertreeplen.h | 9 + phylotesting.cpp | 41 +++- phylotree.cpp | 5 +- phylotree.h | 34 +-- quartet.cpp | 611 +++++++++++++++++++++++++--------------------- superalignment.cpp | 20 +- superalignment.h | 4 +- tools.cpp | 52 +++- tools.h | 23 +- 32 files changed, 811 insertions(+), 406 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3eea0ea..22a4864 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ add_definitions(-DIQ_TREE) # The version number. set (iqtree_VERSION_MAJOR 1) set (iqtree_VERSION_MINOR 4) -set (iqtree_VERSION_PATCH "0") +set (iqtree_VERSION_PATCH "2") set(BUILD_SHARED_LIBS OFF) diff --git a/alignment.cpp b/alignment.cpp index bf6d195..4a20602 100644 --- a/alignment.cpp +++ b/alignment.cpp @@ -1889,7 +1889,7 @@ void Alignment::printFasta(const char *file_name, bool append, const char *aln_s } -void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char) { +void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa, IntVector *kept_partitions) { IntVector::iterator it; for (it = seq_id.begin(); it != seq_id.end(); it++) { assert(*it >= 0 && *it < aln->getNSeq()); @@ -1930,6 +1930,8 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t countConstSite(); buildSeqStates(); assert(size() <= aln->size()); + if (kept_partitions) + kept_partitions->push_back(0); } diff --git a/alignment.h b/alignment.h index a9dba09..1b736c9 100644 --- a/alignment.h +++ b/alignment.h @@ -352,8 +352,10 @@ public: @param aln original input alignment @param seq_id ID of sequences to extract from @param min_true_cher the minimum number of non-gap characters, true_char<min_true_char -> delete the sequence + @param min_taxa only keep alignment that has >= min_taxa sequences + @param[out] kept_partitions (for SuperAlignment) indices of kept partitions */ - virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char); + virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa = 0, IntVector *kept_partitions = NULL); /** extract a sub-set of patterns diff --git a/iqtree.cpp b/iqtree.cpp index e38a1ed..2179c96 100644 --- a/iqtree.cpp +++ b/iqtree.cpp @@ -773,54 +773,54 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) { } void IQTree::initializePLL(Params ¶ms) { - pllAttr.rateHetModel = PLL_GAMMA; - pllAttr.fastScaling = PLL_FALSE; - pllAttr.saveMemory = PLL_FALSE; - pllAttr.useRecom = PLL_FALSE; - pllAttr.randomNumberSeed = params.ran_seed; - pllAttr.numberOfThreads = params.num_threads; /* This only affects the pthreads version */ - if (pllInst != NULL) { - pllDestroyInstance(pllInst); - } - /* Create a PLL instance */ - pllInst = pllCreateInstance(&pllAttr); - - /* Read in the alignment file */ - stringstream pllAln; - if (aln->isSuperAlignment()) { - ((SuperAlignment*) aln)->printCombinedAlignment(pllAln); - } else { - aln->printPhylip(pllAln); - } - string pllAlnStr = pllAln.str(); - pllAlignment = pllParsePHYLIPString(pllAlnStr.c_str(), pllAlnStr.length()); - - /* Read in the partition information */ - // BQM: to avoid printing file - stringstream pllPartitionFileHandle; - createPLLPartition(params, pllPartitionFileHandle); - pllQueue *partitionInfo = pllPartitionParseString(pllPartitionFileHandle.str().c_str()); - - /* Validate the partitions */ - if (!pllPartitionsValidate(partitionInfo, pllAlignment)) { - outError("pllPartitionsValidate"); - } - - /* Commit the partitions and build a partitions structure */ - pllPartitions = pllPartitionsCommit(partitionInfo, pllAlignment); - - /* We don't need the the intermediate partition queue structure anymore */ - pllQueuePartitionsDestroy(&partitionInfo); - - /* eliminate duplicate sites from the alignment and update weights vector */ - pllAlignmentRemoveDups(pllAlignment, pllPartitions); - - pllTreeInitTopologyForAlignment(pllInst, pllAlignment); - - /* Connect the alignment and partition structure with the tree structure */ - if (!pllLoadAlignment(pllInst, pllAlignment, pllPartitions)) { - outError("Incompatible tree/alignment combination"); - } + pllAttr.rateHetModel = PLL_GAMMA; + pllAttr.fastScaling = PLL_FALSE; + pllAttr.saveMemory = PLL_FALSE; + pllAttr.useRecom = PLL_FALSE; + pllAttr.randomNumberSeed = params.ran_seed; + pllAttr.numberOfThreads = params.num_threads; /* This only affects the pthreads version */ + if (pllInst != NULL) { + pllDestroyInstance(pllInst); + } + /* Create a PLL instance */ + pllInst = pllCreateInstance(&pllAttr); + + /* Read in the alignment file */ + stringstream pllAln; + if (aln->isSuperAlignment()) { + ((SuperAlignment *) aln)->printCombinedAlignment(pllAln); + } else { + aln->printPhylip(pllAln); + } + string pllAlnStr = pllAln.str(); + pllAlignment = pllParsePHYLIPString(pllAlnStr.c_str(), pllAlnStr.length()); + + /* Read in the partition information */ + // BQM: to avoid printing file + stringstream pllPartitionFileHandle; + createPLLPartition(params, pllPartitionFileHandle); + pllQueue *partitionInfo = pllPartitionParseString(pllPartitionFileHandle.str().c_str()); + + /* Validate the partitions */ + if (!pllPartitionsValidate(partitionInfo, pllAlignment)) { + outError("pllPartitionsValidate"); + } + + /* Commit the partitions and build a partitions structure */ + pllPartitions = pllPartitionsCommit(partitionInfo, pllAlignment); + + /* We don't need the the intermediate partition queue structure anymore */ + pllQueuePartitionsDestroy(&partitionInfo); + + /* eliminate duplicate sites from the alignment and update weights vector */ + pllAlignmentRemoveDups(pllAlignment, pllPartitions); + + pllTreeInitTopologyForAlignment(pllInst, pllAlignment); + + /* Connect the alignment and partition structure with the tree structure */ + if (!pllLoadAlignment(pllInst, pllAlignment, pllPartitions)) { + outError("Incompatible tree/alignment combination"); + } } @@ -1424,7 +1424,7 @@ double IQTree::getAlphaFromPLL() { void IQTree::inputModelPLL2IQTree() { // TODO add support for partitioned model - dynamic_cast<RateGamma*>(getRate())->setGammaShape(pllPartitions->partitionData[0]->alpha); + getRate()->setGammaShape(pllPartitions->partitionData[0]->alpha); if (aln->num_states == 4) { ((ModelGTR*) getModel())->setRateMatrix(pllPartitions->partitionData[0]->substRates); getModel()->decomposeRateMatrix(); @@ -1720,6 +1720,8 @@ extern pllUFBootData * pllUFBootDataPtr; string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) { if (logl_epsilon == -1) logl_epsilon = params->modeps; +// if (params->test_param) +// logl_epsilon = 1.0; cout << "Estimate model parameters (epsilon = " << logl_epsilon << ")" << endl; double stime = getRealTime(); string newTree; @@ -1743,8 +1745,18 @@ string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) { if (printInfo) cout << etime - stime << " seconds (logl: " << curScore << ")" << endl; } else { - double modOptScore = - getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon); + double modOptScore; + if (params->test_param) { // DO RESTART ON ALPHA AND P_INVAR + double stime = getRealTime(); + modOptScore = getModelFactory()->optimizeParametersGammaInvar(params->fixed_branch_length, printInfo, logl_epsilon); + double etime = getRealTime(); + cout << "Testing param took: " << etime -stime << " CPU seconds" << endl; + cout << endl; + params->test_param = false; + } else { + modOptScore = getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon); + } + if (isSuperTree()) { ((PhyloSuperTree*) this)->computeBranchLengths(); } diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp index fc4de9a..bb7efbd 100644 --- a/model/modelfactory.cpp +++ b/model/modelfactory.cpp @@ -693,22 +693,20 @@ bool ModelFactory::readSiteFreq(Alignment *aln, char* site_freq_file, IntVector double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst *model, double initAlpha, double initPInvar, double *initRates, double *initStateFreqs) { - RateGammaInvar* rateGammaInvar = dynamic_cast<RateGammaInvar*>(rate); - ModelGTR* modelGTR = dynamic_cast<ModelGTR*>(model); + RateHeterogeneity* rateGammaInvar = rate; + ModelGTR* modelGTR = (ModelGTR*)(model); modelGTR->setRateMatrix(initRates); modelGTR->setStateFrequency(initStateFreqs); rateGammaInvar->setGammaShape(initAlpha); rateGammaInvar->setPInvar(initPInvar); modelGTR->decomposeRateMatrix(); - rateGammaInvar->computeRates(); site_rate->phylo_tree->clearAllPartialLH(); return site_rate->phylo_tree->computeLikelihood(); } double ModelFactory::optimizeParametersOnly(double gradient_epsilon) { double logl; - if (Params::getInstance().fai && dynamic_cast<RateGammaInvar*>(site_rate) != NULL - && dynamic_cast<ModelGTR*>(model) != NULL) { + if (Params::getInstance().fai && site_rate != NULL && model != NULL) { cout << "Optimize substitutional and site rates with restart ..." << endl; PhyloTree* tree = site_rate->phylo_tree; double initAlpha = 0.1; @@ -740,8 +738,8 @@ double ModelFactory::optimizeParametersOnly(double gradient_epsilon) { site_rate->optimizeParameters(gradient_epsilon); logl = tree->optimizeAllBranches(1); } - RateGammaInvar* rateGammaInvar = dynamic_cast<RateGammaInvar*>(site_rate); - ModelGTR* modelGTR = dynamic_cast<ModelGTR*>(model); + RateHeterogeneity* rateGammaInvar = site_rate; + ModelGTR* modelGTR = (ModelGTR*)(model); double curAlpha = rateGammaInvar->getGammaShape(); double curPInvar = rateGammaInvar->getPInvar(); if (logl > bestLogl) { @@ -821,6 +819,8 @@ double ModelFactory::optimizeAllParameters(double gradient_epsilon) { model->decomposeRateMatrix(); site_rate->phylo_tree->clearAllPartialLH(); + score = site_rate->phylo_tree->computeLikelihood(); + delete [] bound_check; delete [] lower_bound; delete [] upper_bound; @@ -829,6 +829,112 @@ double ModelFactory::optimizeAllParameters(double gradient_epsilon) { return score; } +double ModelFactory::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { + PhyloTree *tree = site_rate->getTree(); + + RateHeterogeneity* site_rates = tree->getRate(); + if (site_rates == NULL) { +// outError("The model must be +I+G"); + // model is not +I+G, call conventional function instead + return optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon); + } + + double frac_const = tree->aln->frac_const_sites; + if (fixed_len) { + tree->setCurScore(tree->computeLikelihood()); + } else { + tree->optimizeAllBranches(1); + } + + + /* Back up branch lengths and substitutional rates */ + DoubleVector lenvec; + DoubleVector bestLens; + tree->saveBranchLengths(lenvec); + int numRateEntries = tree->getModel()->getNumRateEntries(); + double *rates = new double[numRateEntries]; + double *bestRates = new double[numRateEntries]; + tree->getModel()->getRateMatrix(rates); + int numStates = tree->aln->num_states; + double *state_freqs = new double[numStates]; + tree->getModel()->getStateFrequency(state_freqs); + + /* Best estimates found */ + double *bestStateFreqs = new double[numStates]; + double bestLogl = tree->getCurScore(); + double bestAlpha = 0.0; + double bestPInvar = 0.0; + + double testInterval = (frac_const - MIN_PINVAR*2) / 10; + double initPInv = MIN_PINVAR; + double initAlpha = site_rates->getGammaShape(); + + if (write_info) + cout << "testInterval: " << testInterval << endl; + + // Now perform testing different inital p_inv values + while (initPInv <= frac_const) { + if (write_info) { + cout << endl; + cout << "Testing with init. pinv = " << initPInv << " / init. alpha = " << initAlpha << endl; + } + tree->restoreBranchLengths(lenvec); + ((ModelGTR*) tree->getModel())->setRateMatrix(rates); + ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs); + tree->getModel()->decomposeRateMatrix(); + site_rates->setPInvar(initPInv); + site_rates->setGammaShape(initAlpha); + tree->clearAllPartialLH(); + optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon); + double estAlpha = tree->getRate()->getGammaShape(); + double estPInv = tree->getRate()->getPInvar(); + double logl = tree->getCurScore(); + if (write_info) { + cout << "Est. alpha: " << estAlpha << " / Est. pinv: " << estPInv + << " / Logl: " << logl << endl; + } + initPInv = initPInv + testInterval; + + if (tree->getCurScore() > bestLogl) { + bestLogl = logl; + bestAlpha = estAlpha; + bestPInvar = estPInv; + bestLens.clear(); + tree->saveBranchLengths(bestLens); + tree->getModel()->getRateMatrix(bestRates); + tree->getModel()->getStateFrequency(bestStateFreqs); + } + } + + site_rates->setGammaShape(bestAlpha); +// site_rates->setFixGammaShape(false); + site_rates->setPInvar(bestPInvar); +// site_rates->setFixPInvar(false); + ((ModelGTR*) tree->getModel())->setRateMatrix(bestRates); + ((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs); + tree->restoreBranchLengths(bestLens); + tree->getModel()->decomposeRateMatrix(); + tree->clearAllPartialLH(); + tree->setCurScore(tree->computeLikelihood()); + if (write_info) { + cout << endl; + cout << "Best initial alpha: " << bestAlpha << " / initial pinv: " << bestPInvar << " / "; + cout << "Logl: " << tree->getCurScore() << endl; + } + + delete [] rates; + delete [] state_freqs; + delete [] bestRates; + delete [] bestStateFreqs; + + // updating global variable is not safe! +// Params::getInstance().testAlpha = false; + + // 2016-03-14: this was missing! + return tree->getCurScore(); +} + + double ModelFactory::optimizeParameters(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { assert(model); @@ -950,13 +1056,14 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info, } double elapsed_secs = getRealTime() - begin_time; if (write_info) - cout << "Parameters optimization took " << i-1 << " rounds (" << elapsed_secs << " sec)" << endl << endl; + cout << "Parameters optimization took " << i-1 << " rounds (" << elapsed_secs << " sec)" << endl; startStoringTransMatrix(); // For UpperBounds ----------- tree->mlCheck = 1; // --------------------------- + tree->setCurScore(cur_lh); return cur_lh; } @@ -1105,3 +1212,4 @@ bool ModelFactory::getVariables(double *variables) { return changed; } + diff --git a/model/modelfactory.h b/model/modelfactory.h index a4334d5..baedb6e 100644 --- a/model/modelfactory.h +++ b/model/modelfactory.h @@ -169,6 +169,14 @@ public: double logl_epsilon = 0.1, double gradient_epsilon = 0.001); /** + * optimize model parameters and tree branch lengths for the +I+G model + * using restart strategy. + * @param fixed_len TRUE to fix branch lengths, default is false + * @return the best likelihood + */ + virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001); + + /** * @return TRUE if parameters are at the boundary that may cause numerical unstability */ virtual bool isUnstableParameters(); diff --git a/model/modelgtr.cpp b/model/modelgtr.cpp index 39e18d8..557a291 100644 --- a/model/modelgtr.cpp +++ b/model/modelgtr.cpp @@ -592,6 +592,7 @@ double ModelGTR::optimizeParameters(double gradient_epsilon) { if (changed) { decomposeRateMatrix(); phylo_tree->clearAllPartialLH(); + score = phylo_tree->computeLikelihood(); } delete [] bound_check; diff --git a/model/partitionmodel.cpp b/model/partitionmodel.cpp index cb853c1..bddbecd 100644 --- a/model/partitionmodel.cpp +++ b/model/partitionmodel.cpp @@ -175,6 +175,39 @@ double PartitionModel::optimizeParameters(bool fixed_len, bool write_info, doubl return tree_lh; } + +double PartitionModel::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { + PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); + double tree_lh = 0.0; + int ntrees = tree->size(); + + if (tree->part_order.empty()) tree->computePartitionOrder(); + #ifdef _OPENMP + #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads) + #endif + for (int i = 0; i < ntrees; i++) { + int part = tree->part_order[i]; + if (write_info) + #ifdef _OPENMP + #pragma omp critical + #endif + { + cout << "Optimizing " << tree->at(part)->getModelName() << + " parameters for partition " << tree->part_info[part].name << + " (" << tree->at(part)->getModelFactory()->getNParameters() << " free parameters)" << endl; + } + tree_lh += tree->at(part)->getModelFactory()->optimizeParametersGammaInvar(fixed_len, write_info && verbose_mode >= VB_MED, + logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10)); + } + //return ModelFactory::optimizeParameters(fixed_len, write_info); + + if (tree->params->link_alpha) { + tree_lh = optimizeLinkedAlpha(write_info, gradient_epsilon); + } + return tree_lh; +} + + PartitionModel::~PartitionModel() { } diff --git a/model/partitionmodel.h b/model/partitionmodel.h index 8fe7fda..2a34473 100644 --- a/model/partitionmodel.h +++ b/model/partitionmodel.h @@ -73,6 +73,14 @@ public: virtual double optimizeParameters(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001); /** + * optimize model parameters and tree branch lengths for the +I+G model + * using restart strategy. + * @param fixed_len TRUE to fix branch lengths, default is false + * @return the best likelihood + */ + virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001); + + /** * @return TRUE if parameters are at the boundary that may cause numerical unstability */ virtual bool isUnstableParameters(); diff --git a/model/ratefree.cpp b/model/ratefree.cpp index ba5bcb6..3265a2b 100644 --- a/model/ratefree.cpp +++ b/model/ratefree.cpp @@ -248,6 +248,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) { if (sorted_rates) quicksort(rates, 0, ncategory-1, prop); phylo_tree->clearAllPartialLH(); + score = phylo_tree->computeLikelihood(); } optimizing_params = 0; diff --git a/model/ratefree.h b/model/ratefree.h index 327012b..062985a 100644 --- a/model/ratefree.h +++ b/model/ratefree.h @@ -10,7 +10,7 @@ #include "rategamma.h" -class RateFree: virtual public RateGamma { +class RateFree: public RateGamma { public: /** constructor diff --git a/model/ratefreeinvar.cpp b/model/ratefreeinvar.cpp index b3b8c01..0ec731c 100644 --- a/model/ratefreeinvar.cpp +++ b/model/ratefreeinvar.cpp @@ -8,7 +8,7 @@ #include "ratefreeinvar.h" RateFreeInvar::RateFreeInvar(int ncat, double start_alpha, string params, bool sorted_rates, double p_invar_sites, string opt_alg, PhyloTree *tree) -: RateGamma(ncat, 1.0, false, tree), RateInvar(p_invar_sites, tree), RateFree(ncat, start_alpha, params, sorted_rates, opt_alg, tree) +: RateInvar(p_invar_sites, tree), RateFree(ncat, start_alpha, params, sorted_rates, opt_alg, tree) { cur_optimize = 0; name = "+I" + name; diff --git a/model/rategamma.cpp b/model/rategamma.cpp index 6957c3f..48e0814 100644 --- a/model/rategamma.cpp +++ b/model/rategamma.cpp @@ -150,6 +150,7 @@ void RateGamma::computeRatesMean () { void RateGamma::setGammaShape(double gs) { gamma_shape = gs; + computeRates(); } double RateGamma::computeFunction(double shape) { diff --git a/model/rategamma.h b/model/rategamma.h index 8be01b6..19fc809 100644 --- a/model/rategamma.h +++ b/model/rategamma.h @@ -186,7 +186,7 @@ public: return fix_gamma_shape; } - void setFixGammaShape(bool fixGammaShape) { + virtual void setFixGammaShape(bool fixGammaShape) { fix_gamma_shape = fixGammaShape; } diff --git a/model/rategammainvar.cpp b/model/rategammainvar.cpp index dc293bd..2b0641d 100644 --- a/model/rategammainvar.cpp +++ b/model/rategammainvar.cpp @@ -60,7 +60,7 @@ string RateGammaInvar::getNameParams() { double RateGammaInvar::computeFunction(double value) { if (cur_optimize == 0) gamma_shape = value; - else + else p_invar = value; // need to compute rates again if p_inv or Gamma shape changes! computeRates(); @@ -114,8 +114,9 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) { if (ndim == 0) return phylo_tree->computeLikelihood(); + if (!joint_optimize) { -// double lh = phylo_tree->computeLikelihood(); + double lh = phylo_tree->computeLikelihood(); cur_optimize = 0; double gamma_lh; if (Params::getInstance().testAlpha) { @@ -123,19 +124,41 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) { } else { gamma_lh = RateGamma::optimizeParameters(gradient_epsilon); } + assert(gamma_lh >= lh-0.1); cur_optimize = 1; double invar_lh = -DBL_MAX; invar_lh = RateInvar::optimizeParameters(gradient_epsilon); -// assert(tree_lh >= lh-0.1); -// lh = tree_lh; + assert(invar_lh >= gamma_lh-0.1); + //lh = tree_lh; -// assert(gamma_lh >= invar_lh - 0.1); - phylo_tree->clearAllPartialLH(); + //assert(gamma_lh >= invar_lh - 0.1); +// phylo_tree->clearAllPartialLH(); // return gamma_lh; cur_optimize = 0; return invar_lh; } +/* + if (!joint_optimize) { +// double lh = phylo_tree->computeLikelihood(); + cur_optimize = 1; + double invar_lh = -DBL_MAX; + invar_lh = RateInvar::optimizeParameters(gradient_epsilon); +// assert(tree_lh >= lh-0.1); +// lh = tree_lh; + cur_optimize = 0; + double gamma_lh; + if (Params::getInstance().testAlpha) { + gamma_lh = RateGamma::optimizeParameters(gradient_epsilon, 0.05, 10); + } else { + gamma_lh = RateGamma::optimizeParameters(gradient_epsilon); + } + //assert(gamma_lh >= invar_lh - 0.1); + phylo_tree->clearAllPartialLH(); + return gamma_lh; + } +*/ + if (verbose_mode >= VB_MAX) cout << "Optimizing " << name << " model parameters by BFGS..." << endl; @@ -156,6 +179,7 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) { getVariables(variables); phylo_tree->clearAllPartialLH(); + score = phylo_tree->computeLikelihood(); delete [] bound_check; delete [] lower_bound; diff --git a/model/rategammainvar.h b/model/rategammainvar.h index c4092c8..6d7926c 100644 --- a/model/rategammainvar.h +++ b/model/rategammainvar.h @@ -64,6 +64,15 @@ public: virtual double getRate(int category) { return RateGamma::getRate(category); } /** + set the proportion of invariable sites. Default: do nothing + @param pinv the proportion of invariable sites + */ + virtual void setPInvar(double pInvar) { + p_invar = pInvar; + computeRates(); + } + + /** * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} */ virtual string getNameParams(); diff --git a/model/rateheterogeneity.h b/model/rateheterogeneity.h index 85e14c6..51bab81 100644 --- a/model/rateheterogeneity.h +++ b/model/rateheterogeneity.h @@ -146,6 +146,11 @@ public: */ virtual void setPInvar(double pinv) { } + /** + set whether to fix p_invar + */ + virtual void setFixPInvar(bool fixPInvar) {} + /** Set whether or not to optimize p_invar @param opt TRUE to optimize p_invar, FALSE otherwise @@ -159,6 +164,17 @@ public: virtual double getGammaShape() { return 0.0; } /** + set the Gamma shape. Default: nothing + @param gs Gamma shape + */ + virtual void setGammaShape(double gs) {} + + /** + set whether to fix gamma shape + */ + virtual void setFixGammaShape(bool fixGammaShape) {} + + /** @return >0 if this is a Gamma model (default: 0) */ virtual int isGammaRate() { return 0; } diff --git a/model/rateinvar.cpp b/model/rateinvar.cpp index 58b7d52..55988c5 100644 --- a/model/rateinvar.cpp +++ b/model/rateinvar.cpp @@ -93,10 +93,10 @@ double RateInvar::optimizeParameters(double gradient_epsilon) { double ferror; p_invar = minimizeOneDimen(MIN_PINVAR, p_invar, min(phylo_tree->aln->frac_const_sites, 1.0-MIN_PINVAR), max(gradient_epsilon, TOL_PINVAR), &negative_lh, &ferror); //p_invar = minimizeOneDimen(MIN_PINVAR, p_invar, 1.0 - MIN_PINVAR, TOL_PINVAR, &negative_lh, &ferror); - phylo_tree->clearAllPartialLH(); - phylo_tree->computePtnInvar(); +// phylo_tree->clearAllPartialLH(); +// phylo_tree->computePtnInvar(); // return -negative_lh; - return phylo_tree->computeLikelihood(); + return -computeFunction(p_invar); } void RateInvar::writeInfo(ostream &out) { diff --git a/model/ratekategory.cpp b/model/ratekategory.cpp index fe20122..64ceb09 100644 --- a/model/ratekategory.cpp +++ b/model/ratekategory.cpp @@ -86,6 +86,7 @@ double RateKategory::optimizeParameters(double gradient_epsilon) getVariables(variables); //sort(rates, rates+ncategory); phylo_tree->clearAllPartialLH(); + score = phylo_tree->computeLikelihood(); delete [] bound_check; delete [] lower_bound; diff --git a/msetsblock.cpp b/msetsblock.cpp index 839467e..d4309f1 100644 --- a/msetsblock.cpp +++ b/msetsblock.cpp @@ -227,6 +227,7 @@ void MSetsBlock::Read(NxsToken &token) errormsg += " instead"; throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn()); } + token.SetLabileFlagBit(NxsToken::preserveUnderscores); if (token.Equals(";")) break; else diff --git a/pda.cpp b/pda.cpp index 9a17049..9f9db7d 100644 --- a/pda.cpp +++ b/pda.cpp @@ -2189,7 +2189,7 @@ int main(int argc, char *argv[]) bool append_log = false; - if (!Params::getInstance().ignore_checkpoint) { + if (!Params::getInstance().ignore_checkpoint && fileExists(filename)) { checkpoint->load(); if (checkpoint->hasKey("finished")) { if (checkpoint->getBool("finished")) { diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp index d321ac3..ebfffcf 100644 --- a/phyloanalysis.cpp +++ b/phyloanalysis.cpp @@ -681,7 +681,7 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model, reportRate(out, tree); } - if (params.lmap_num_quartets) { + if (params.lmap_num_quartets >= 0) { tree.reportLikelihoodMapping(out); } @@ -1080,7 +1080,7 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model, cout << " Site log-likelihoods: " << params.out_prefix << ".sitelh" << endl; } } - if (params.lmap_num_quartets) { + if (params.lmap_num_quartets >= 0) { cout << " Likelihood mapping plot (SVG): " << params.out_prefix << ".lmap.svg" << endl; cout << " Likelihood mapping plot (EPS): " << params.out_prefix << ".lmap.eps" << endl; } @@ -1727,6 +1727,9 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre iqtree.initializeAllPartialLh(); double initEpsilon = params.min_iterations == 0 ? params.modeps : (params.modeps*10); + if (params.test_param) + initEpsilon = 0.1; + string initTree; if (iqtree.getRate()->name.find("+I+G") != string::npos) { @@ -1747,6 +1750,7 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre cout << "Testing alpha took: " << etime -stime << " CPU seconds" << endl; cout << endl; } + } // Optimize model parameters and branch lengths using ML for the initial tree @@ -1767,11 +1771,16 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre iqtree.getCheckpoint()->dump(); } - if (params.lmap_num_quartets) { - cout << "Performing likelihood mapping with " << params.lmap_num_quartets << " quartets..." << endl; + if (params.lmap_num_quartets >= 0) { + cout << endl << "Performing likelihood mapping with "; + if (params.lmap_num_quartets > 0) + cout << params.lmap_num_quartets; + else + cout << "all"; + cout << " quartets..." << endl; double lkmap_time = getRealTime(); iqtree.doLikelihoodMapping(); - cout << getRealTime()-lkmap_time << " seconds" << endl; + cout << "Likelihood mapping needed " << getRealTime()-lkmap_time << " seconds" << endl << endl; } bool finishedCandidateSet = iqtree.getCheckpoint()->getBool("finishedCandidateSet"); @@ -1942,10 +1951,7 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre } else { cout << "Performs final model parameters optimization" << endl; string tree; - if (params.testAlpha) - tree = iqtree.optimizeModelParameters(true, 0.001); - else - tree = iqtree.optimizeModelParameters(true); + tree = iqtree.optimizeModelParameters(true); iqtree.candidateTrees.update(tree, iqtree.getCurScore(), true); iqtree.getCheckpoint()->putBool("finishedModelFinal", true); iqtree.saveCheckpoint(); @@ -2038,7 +2044,7 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre } void computeLoglFromUserInputGAMMAInvar(Params ¶ms, IQTree &iqtree) { - RateGammaInvar *site_rates = dynamic_cast<RateGammaInvar *>(iqtree.getRate()); + RateHeterogeneity *site_rates = iqtree.getRate(); site_rates->setFixPInvar(true); site_rates->setFixGammaShape(true); vector<double> alphas, p_invars, logl; @@ -2070,7 +2076,6 @@ void computeLoglFromUserInputGAMMAInvar(Params ¶ms, IQTree &iqtree) { aiFileResults << alphas.at(i) << " " << p_invars.at(i) << " "; site_rates->setGammaShape(alphas.at(i)); site_rates->setPInvar(p_invars.at(i)); - site_rates->computeRates(); iqtree.clearAllPartialLH(); double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001); aiFileResults << lh << " " << iqtree.treeLength() << "\n"; @@ -2086,7 +2091,7 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) { iqtree.setCurScore(iqtree.optimizeAllBranches(1)); else iqtree.setCurScore(iqtree.computeLikelihood()); - RateGammaInvar* site_rates = dynamic_cast<RateGammaInvar*>(iqtree.getRate()); + RateHeterogeneity* site_rates = (iqtree.getRate()); double values[] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 }; vector<double> initAlphas; if (Params::getInstance().randomAlpha) { @@ -2125,7 +2130,6 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) { iqtree.getModel()->decomposeRateMatrix(); site_rates->setGammaShape(initAlphas[i]); site_rates->setPInvar(initPInvar); - site_rates->computeRates(); iqtree.clearAllPartialLH(); iqtree.optimizeModelParameters(verbose_mode >= VB_MED, Params::getInstance().testAlphaEps); double estAlpha = iqtree.getRate()->getGammaShape(); @@ -2152,7 +2156,6 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) { ((ModelGTR*) iqtree.getModel())->setStateFrequency(bestStateFreqs); iqtree.restoreBranchLengths(bestLens); iqtree.getModel()->decomposeRateMatrix(); - site_rates->computeRates(); iqtree.clearAllPartialLH(); iqtree.setCurScore(iqtree.computeLikelihood()); cout << endl; @@ -2183,7 +2186,7 @@ void exhaustiveSearchGAMMAInvar(Params ¶ms, IQTree &iqtree) { DoubleVector lenvec; iqtree.saveBranchLengths(lenvec); - RateGammaInvar* site_rates = dynamic_cast<RateGammaInvar*>(iqtree.getRate()); + RateHeterogeneity* site_rates = (iqtree.getRate()); site_rates->setFixPInvar(true); site_rates->setFixGammaShape(true); @@ -2191,7 +2194,6 @@ void exhaustiveSearchGAMMAInvar(Params ¶ms, IQTree &iqtree) { for (double p_invar = p_invarMin; p_invar < p_invarMax; p_invar = p_invar + stepSize) { site_rates->setGammaShape(alpha); site_rates->setPInvar(p_invar); - site_rates->computeRates(); iqtree.clearAllPartialLH(); double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001); stringstream ss; diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp index 6782e9d..bc93d40 100644 --- a/phylosupertreeplen.cpp +++ b/phylosupertreeplen.cpp @@ -137,6 +137,8 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d if (tree->part_info[part].cur_score == 0.0) tree->part_info[part].cur_score = tree->at(part)->computeLikelihood(); cur_lh += tree->part_info[part].cur_score; + + // normalize rates s.t. branch lengths are #subst per site double mean_rate = tree->at(part)->getRate()->rescaleRates(); @@ -197,6 +199,11 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d return tree_lh; } +double PartitionModelPlen::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { + outError("This option does not work with edge-linked partition model yet"); + return 0.0; +} + void PartitionModelPlen::writeInfo(ostream &out) { PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree(); int ntrees = tree->size(); diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h index d54c46f..baa2979 100644 --- a/phylosupertreeplen.h +++ b/phylosupertreeplen.h @@ -130,6 +130,15 @@ public: virtual double optimizeParameters(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001); + + /** + * optimize model parameters and tree branch lengths for the +I+G model + * using restart strategy. + * @param fixed_len TRUE to fix branch lengths, default is false + * @return the best likelihood + */ + virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001); + double optimizeGeneRate(double tol); // virtual double targetFunk(double x[]); diff --git a/phylotesting.cpp b/phylotesting.cpp index 272b9e9..43e9a50 100644 --- a/phylotesting.cpp +++ b/phylotesting.cpp @@ -991,7 +991,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf this_aln = in_tree->at(distID[i] & ((1<<16)-1))->aln; dist[i] -= ((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states; } - if (params.num_threads > 1) + if (params.num_threads > 1 && num_pairs >= 1) quicksort(dist, 0, num_pairs-1, distID); #ifdef _OPENMP @@ -1195,15 +1195,18 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in in_tree->optimize_by_newton = params.optimize_by_newton; in_tree->setLikelihoodKernel(params.SSE); - int num_rate_classes = 3 + params.max_rate_cats; +// int num_rate_classes = 3 + params.max_rate_cats; - RateHeterogeneity ** rate_class = new RateHeterogeneity*[num_rate_classes]; + RateHeterogeneity ** rate_class = new RateHeterogeneity*[4]; rate_class[0] = new RateHeterogeneity(); rate_class[1] = new RateInvar(-1, NULL); rate_class[2] = new RateGamma(params.num_rate_cats, params.gamma_shape, params.gamma_median, NULL); rate_class[3] = new RateGammaInvar(params.num_rate_cats, params.gamma_shape, params.gamma_median, -1, params.optimize_model_rate_joint, NULL); - for (model = 4; model < num_rate_classes; model++) - rate_class[model] = new RateFree(model-2, params.gamma_shape, "", false, params.optimize_alg, NULL); + + RateFree ** rate_class_free = new RateFree*[params.max_rate_cats-1]; + + for (model = 0; model < params.max_rate_cats-1; model++) + rate_class_free[model] = new RateFree(model+2, params.gamma_shape, "", false, params.optimize_alg, NULL); ModelGTR *subst_model = NULL; if (seq_type == SEQ_BINARY) @@ -1348,7 +1351,7 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in if (ncat <= 1) outError("Number of rate categories for " + model_names[model] + " is <= 1"); if (ncat > params.max_rate_cats) outError("Number of rate categories for " + model_names[model] + " exceeds " + convertIntToString(params.max_rate_cats)); - tree->setRate(rate_class[2+ncat]); + tree->setRate(rate_class_free[ncat-2]); } else if (model_names[model].find("+I") != string::npos && (pos = model_names[model].find("+G")) != string::npos) { tree->setRate(rate_class[3]); if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) { @@ -1428,6 +1431,10 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in outError("-mtree option is not supported for partition model"); } IQTree *iqtree = new IQTree(in_tree->aln); + // set checkpoint + iqtree->setCheckpoint(in_tree->getCheckpoint()); + iqtree->num_precision = in_tree->num_precision; + cout << endl << "===> Testing model " << model+1 << ": " << params.model_name << endl; runTreeReconstruction(params, original_model, *iqtree, model_info); info.logl = iqtree->computeLikelihood(); @@ -1436,6 +1443,16 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in params.model_name = original_model; params.user_file = orig_user_tree; tree = iqtree; + + // clear all checkpointed information + Checkpoint *newCheckpoint = new Checkpoint; + tree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree"); + tree->getCheckpoint()->clear(); + tree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end()); + tree->getCheckpoint()->putBool("finished", false); + tree->getCheckpoint()->dump(true); + delete newCheckpoint; + } else { if (tree->getMemoryRequired() > RAM_requirement) { tree->deleteAllPartialLh(); @@ -1463,7 +1480,8 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in { if (verbose_mode >= VB_MED) cout << "reoptimizing from previous parameters of +R...." << endl; - dynamic_cast<RateFree*>(rate_class[2+ncat])->setRateAndProp(dynamic_cast<RateFree*>(rate_class[1+ncat])); + assert(ncat >= 3); + rate_class_free[ncat-2]->setRateAndProp(rate_class_free[ncat-3]); info.logl = tree->getModelFactory()->optimizeParameters(false, false, TOL_LIKELIHOOD_MODELTEST, TOL_GRADIENT_MODELTEST); info.tree_len = tree->treeLength(); } @@ -1688,10 +1706,17 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in delete model_fac; delete subst_model; - for (int rate_type = num_rate_classes-1; rate_type >= 0; rate_type--) { + int rate_type; + for (rate_type = 3; rate_type >= 0; rate_type--) { delete rate_class[rate_type]; } delete [] rate_class; + + for (rate_type = params.max_rate_cats-2; rate_type >= 0; rate_type--) { + delete rate_class_free[rate_type]; + } + delete [] rate_class_free; + // delete tree_hetero; // delete tree_homo; in_tree->deleteAllPartialLh(); diff --git a/phylotree.cpp b/phylotree.cpp index bdc9da4..2221943 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -360,7 +360,10 @@ void PhyloTree::readTreeString(const string &tree_string) { // str(tree_string); // str.seekg(0, ios::beg); freeNode(); - readTree(str, rooted); + + // bug fix 2016-04-14: in case taxon name happens to be ID + MTree::readTree(str, rooted); + assignLeafNames(); // setAlignment(aln); setRootNode(params->root); diff --git a/phylotree.h b/phylotree.h index 2ed8388..68b33dd 100644 --- a/phylotree.h +++ b/phylotree.h @@ -264,22 +264,22 @@ const double TP_MAX_EXP_DIFF = 40.0; #define LM_MAX 10 struct QuartetGroups{ - int numGroups; // number of clusters: - // 0: not initialized, default -> 1 - // 1: no clusters - any (a,b)|(c,d) - // 2: 2 clusters - (a,a')|(b,b') - // 3: 3 clusters - (a,a')|(b,c) [rare] - // 4: 4 clusters - (a,b)|(c,d) - int numSeqs; // number of seqs in alignment (should be #A+#B+#C+#D+#X) - int numQuartSeqs; // number of seqs in analysis (should be #A+#B+#C+#D) - int numGrpSeqs[5]; // number of seqs in cluster A, B, C, D, and X (exclude) - int uniqueQuarts; // number of existing unique quartets for this grouping - string Name[5]; // seqIDs of cluster A - vector<int> GroupA; // seqIDs of cluster A - vector<int> GroupB; // seqIDs of cluster B - vector<int> GroupC; // seqIDs of cluster C - vector<int> GroupD; // seqIDs of cluster D - vector<int> GroupX; // seqIDs of cluster X + int numGroups; // number of clusters: + // 0: not initialized, default -> 1 + // 1: no clusters - any (a,b)|(c,d) + // 2: 2 clusters - (a,a')|(b,b') + // 3: 3 clusters - (a,a')|(b,c) [rare] + // 4: 4 clusters - (a,b)|(c,d) + int numSeqs; // number of seqs in alignment (should be #A+#B+#C+#D+#X) + int numQuartSeqs; // number of seqs in analysis (should be #A+#B+#C+#D) + int numGrpSeqs[5]; // number of seqs in cluster A, B, C, D, and X (exclude) + int64_t uniqueQuarts; // number of existing unique quartets for this grouping + string Name[5]; // seqIDs of cluster A + vector<int> GroupA; // seqIDs of cluster A + vector<int> GroupB; // seqIDs of cluster B + vector<int> GroupC; // seqIDs of cluster C + vector<int> GroupD; // seqIDs of cluster D + vector<int> GroupX; // seqIDs of cluster X }; struct QuartetInfo { @@ -292,7 +292,7 @@ struct QuartetInfo { }; struct SeqQuartetInfo { - unsigned long countarr[LM_MAX]; // the 7 areas of the simplex triangle [0-6; corners (0:top, 1:right, 2:left), rectangles (3:right, 4:left, 5:bottom), 6:center] and the 3 corners [7-9; 7:top, 8:right, 9:left] + int64_t countarr[LM_MAX]; // the 7 areas of the simplex triangle [0-6; corners (0:top, 1:right, 2:left), rectangles (3:right, 4:left, 5:bottom), 6:center] and the 3 corners [7-9; 7:top, 8:right, 9:left] }; // ******************************************** diff --git a/quartet.cpp b/quartet.cpp index 2fe668d..eca150d 100644 --- a/quartet.cpp +++ b/quartet.cpp @@ -201,7 +201,7 @@ void plotlmpointsvg(FILE *ofp, double w1, double w2) // void finishsvg(FILE *ofp, unsigned long **countarr) -void finishsvg(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, unsigned long Numquartets) +void finishsvg(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, int64_t Numquartets) { fprintf(ofp," </g>\n"); /* end triangle 1 (top) */ @@ -550,7 +550,7 @@ void plotlmpointcolor(FILE *epsofp, FILE *svgofp, double w1, double w2, int red, /* last lines of EPSF likelihood mapping file */ //void finisheps(FILE *ofp, unsigned long **countarr) -void finisheps(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, unsigned long Numquartets) +void finisheps(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, int64_t Numquartets) { fprintf(ofp, "stroke\n"); fprintf(ofp, "%% second triangle (the one with 3 basins)\n"); @@ -674,8 +674,6 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info if (leafNum < 4) outError("Tree must have 4 or more taxa with unique sequences!"); - lmap_quartet_info.resize(params->lmap_num_quartets); - int qc[] = {0, 1, 2, 3, 0, 2, 1, 3, 0, 3, 1, 2}; double onethird = 1.0/3.0; @@ -733,21 +731,97 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info size2 = sizeA-3; size1 = sizeA-2; size0 = sizeA-1; - LMGroups.uniqueQuarts = 1 + size3 + - size2 * (size2-1) / 2 + - size1 * (size1-1) * (size1-2) / 6 + - size0 * (size0-1) * (size0-2) * (size0-3) / 24; + LMGroups.uniqueQuarts = (int64_t)1 + size3 + + (int64_t)size2 * (size2-1) / 2 + + (int64_t)size1 * (size1-1) * (size1-2) / 6 + + (int64_t)size0 * (size0-1) * (size0-2) * (size0-3) / 24; break; case 2: - LMGroups.uniqueQuarts = (sizeA * (sizeA - 1)) / 2 * (sizeB * (sizeB - 1)) / 2; break; + LMGroups.uniqueQuarts = ((int64_t)sizeA * (sizeA - 1)) / 2 * (sizeB * (sizeB - 1)) / 2; break; case 3: - LMGroups.uniqueQuarts = sizeA * sizeB * (sizeC * (sizeC - 1)) / 2; break; + LMGroups.uniqueQuarts = (int64_t)sizeA * sizeB * (sizeC * (sizeC - 1)) / 2; break; case 4: - LMGroups.uniqueQuarts = sizeA * sizeB * sizeC * sizeD; break; + LMGroups.uniqueQuarts = (int64_t)sizeA * sizeB * sizeC * sizeD; break; default: outError("Unknown Likelihood Mapping mode! PLEASE report this to the developers!"); break; } + + if (params->lmap_num_quartets == 0) + params->lmap_num_quartets = LMGroups.uniqueQuarts; + if (params->lmap_num_quartets > LMGroups.uniqueQuarts) { + cout << "INFO: Number of quartets is reduced to all unique quartets " << LMGroups.uniqueQuarts << endl; + } + + cout << "Computing " << params->lmap_num_quartets << " quartet likelihoods (one dot represents 100 quartets)." << endl << endl; + + lmap_quartet_info.resize(params->lmap_num_quartets); + + bool quartets_drawn = false; + + if (params->lmap_num_quartets == LMGroups.uniqueQuarts) { + // draw all unique quartets now + quartets_drawn = true; + int64_t qid = 0; + switch (numGroups) { + case 1: + for (int i0 = 0; i0 < sizeA-3; i0++) + for (int i1 = i0+1; i1 < sizeA-2; i1++) + for (int i2 = i1+1; i2 < sizeA-1; i2++) + for (int i3 = i2+1; i3 < sizeA; i3++) { + lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0]; + lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[i1]; + lmap_quartet_info[qid].seqID[2] = LMGroups.GroupA[i2]; + lmap_quartet_info[qid].seqID[3] = LMGroups.GroupA[i3]; + qid++; + } + break; + + case 2: + for (int i0 = 0; i0 < sizeA-1; i0++) + for (int i1 = i0+1; i1 < sizeA; i1++) + for (int i2 = 0; i2 < sizeB-1; i2++) + for (int i3 = i2+1; i3 < sizeB; i3++) { + lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0]; + lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[i1]; + lmap_quartet_info[qid].seqID[2] = LMGroups.GroupB[i2]; + lmap_quartet_info[qid].seqID[3] = LMGroups.GroupB[i3]; + qid++; + } + break; + + case 3: + for (int i0 = 0; i0 < sizeA; i0++) + for (int i1 = 0; i1 < sizeB; i1++) + for (int i2 = 0; i2 < sizeC-1; i2++) + for (int i3 = i2+1; i3 < sizeC; i3++) { + lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0]; + lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[i1]; + lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[i2]; + lmap_quartet_info[qid].seqID[3] = LMGroups.GroupC[i3]; + qid++; + } + break; + case 4: + for (int i0 = 0; i0 < sizeA; i0++) + for (int i1 = 0; i1 < sizeB; i1++) + for (int i2 = 0; i2 < sizeC; i2++) + for (int i3 = 0; i3 < sizeD; i3++) { + lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0]; + lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[i1]; + lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[i2]; + lmap_quartet_info[qid].seqID[3] = LMGroups.GroupD[i3]; + qid++; + } + break; + + default: + break; + } + // sanity check + assert(qid == LMGroups.uniqueQuarts); + } + // fprintf(stderr,"XXX - #quarts: %d; #groups: %d, A: %d, B:%d, C:%d, D:%d\n", LMGroups.uniqueQuarts, LMGroups.numGroups, sizeA, sizeB, sizeC, sizeD); @@ -763,57 +837,58 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info #ifdef _OPENMP #pragma omp for schedule(guided) #endif - for (int qid = 0; qid < params->lmap_num_quartets; qid++) { /*** draw lmap_num_quartets quartets randomly ***/ - // fprintf(stderr, "%d\n", qid); + for (int64_t qid = 0; qid < params->lmap_num_quartets; qid++) { /*** draw lmap_num_quartets quartets randomly ***/ + // fprintf(stderr, "%I64d\n", qid); // uniformly draw 4 taxa // (a) sample taxon 1 // was: lmap_quartet_info[qid].seqID[0] = random_int(leafNum); - lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[random_int(sizeA, rstream)]; - - do { - // (b) sample taxon 2 according to the number of clusters - // was: lmap_quartet_info[qid].seqID[1] = random_int(leafNum); - switch(numGroups){ - case 1: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|a3,a4 - case 2: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|b1,b2 - case 3: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c1,c2 - case 4: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c, d - default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break; - } - } while (lmap_quartet_info[qid].seqID[1] == lmap_quartet_info[qid].seqID[0]); - do { - // (c) sample taxon 3 according to the number of clusters - // was: lmap_quartet_info[qid].seqID[2] = random_int(leafNum); - switch(numGroups){ - case 1: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|A3,a4 - case 2: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|B1,b2 - case 3: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C1,c2 - case 4: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C, d - default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break; - } - } while (lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[1]); - do { - // (d) sample taxon 4 according to the number of clusters - // was: lmap_quartet_info[qid].seqID[3] = random_int(leafNum); - switch(numGroups){ - case 1: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|a3,A4 - case 2: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|b1,B2 - case 3: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |c1,C2 - case 4: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupD[random_int(sizeD, rstream)]; break; // a ,b |c, D - default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break; - } - } while (lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[1] - || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[2]); - + if (!quartets_drawn) { + // draw a random quartet + lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[random_int(sizeA, rstream)]; + + do { + // (b) sample taxon 2 according to the number of clusters + // was: lmap_quartet_info[qid].seqID[1] = random_int(leafNum); + switch(numGroups){ + case 1: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|a3,a4 + case 2: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|b1,b2 + case 3: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c1,c2 + case 4: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c, d + default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break; + } + } while (lmap_quartet_info[qid].seqID[1] == lmap_quartet_info[qid].seqID[0]); + do { + // (c) sample taxon 3 according to the number of clusters + // was: lmap_quartet_info[qid].seqID[2] = random_int(leafNum); + switch(numGroups){ + case 1: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|A3,a4 + case 2: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|B1,b2 + case 3: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C1,c2 + case 4: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C, d + default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break; + } + } while (lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[1]); + do { + // (d) sample taxon 4 according to the number of clusters + // was: lmap_quartet_info[qid].seqID[3] = random_int(leafNum); + switch(numGroups){ + case 1: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|a3,A4 + case 2: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|b1,B2 + case 3: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |c1,C2 + case 4: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupD[random_int(sizeD, rstream)]; break; // a ,b |c, D + default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break; + } + } while (lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[1] + || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[2]); + } // fprintf(stderr, "qqq%d: %d, %d, %d, %d\n", qid, lmap_quartet_info[qid].seqID[0], lmap_quartet_info[qid].seqID[1], lmap_quartet_info[qid].seqID[2], lmap_quartet_info[qid].seqID[3]); // *** taxa should not be sorted, because that changes the corners a dot is assigned to - removed HAS ;^) // obsolete: sort(lmap_quartet_info[qid].seqID, lmap_quartet_info[qid].seqID+4); // why sort them?!? HAS ;^) - + // initialize sub-alignment and sub-tree Alignment *quartet_aln; - PhyloTree *quartet_tree; if (aln->isSuperAlignment()) { quartet_aln = new SuperAlignment; } else { @@ -821,63 +896,75 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info } IntVector seq_id; seq_id.insert(seq_id.begin(), lmap_quartet_info[qid].seqID, lmap_quartet_info[qid].seqID+4); - quartet_aln->extractSubAlignment(aln, seq_id, 0); - if (isSuperTree()) { - quartet_tree = new PhyloSuperTree((SuperAlignment*)quartet_aln, (PhyloSuperTree*)this); + IntVector kept_partitions; + // only keep partitions with at least 3 sequences + quartet_aln->extractSubAlignment(aln, seq_id, 0, 3, &kept_partitions); + + if (kept_partitions.size() == 0) { + // nothing kept + for (int k = 0; k < 3; k++) { + lmap_quartet_info[qid].logl[k] = -1.0; + } } else { - quartet_tree = new PhyloTree(quartet_aln); - } + // something partition kept, do computations + PhyloTree *quartet_tree; + if (isSuperTree()) { + quartet_tree = new PhyloSuperTree((SuperAlignment*)quartet_aln, (PhyloSuperTree*)this); + } else { + quartet_tree = new PhyloTree(quartet_aln); + } - // set up parameters - quartet_tree->setParams(params); - quartet_tree->optimize_by_newton = params->optimize_by_newton; - quartet_tree->setLikelihoodKernel(params->SSE); - - // set up partition model - if (isSuperTree()) { - PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree; - PhyloSuperTree *super_tree = (PhyloSuperTree*)this; - for (int i = 0; i < super_tree->size(); i++) { - quartet_super_tree->at(i)->setModelFactory(super_tree->at(i)->getModelFactory()); - quartet_super_tree->at(i)->setModel(super_tree->at(i)->getModel()); - quartet_super_tree->at(i)->setRate(super_tree->at(i)->getRate()); + // set up parameters + quartet_tree->setParams(params); + quartet_tree->optimize_by_newton = params->optimize_by_newton; + quartet_tree->setLikelihoodKernel(params->SSE); + + // set up partition model + if (isSuperTree()) { + PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree; + PhyloSuperTree *super_tree = (PhyloSuperTree*)this; + for (int i = 0; i < quartet_super_tree->size(); i++) { + quartet_super_tree->at(i)->setModelFactory(super_tree->at(kept_partitions[i])->getModelFactory()); + quartet_super_tree->at(i)->setModel(super_tree->at(kept_partitions[i])->getModel()); + quartet_super_tree->at(i)->setRate(super_tree->at(kept_partitions[i])->getRate()); + } } - } - - // set model and rate - quartet_tree->setModelFactory(model_factory); - quartet_tree->setModel(getModel()); - quartet_tree->setRate(getRate()); - // NOTE: we don't need to set phylo_tree in model and rate because parameters are not reoptimized - - - - // loop over 3 quartets to compute likelihood - for (int k = 0; k < 3; k++) { - string quartet_tree_str; - quartet_tree_str = "(" + quartet_aln->getSeqName(qc[k*4]) + "," + quartet_aln->getSeqName(qc[k*4+1]) + ",(" + - quartet_aln->getSeqName(qc[k*4+2]) + "," + quartet_aln->getSeqName(qc[k*4+3]) + "));"; - quartet_tree->readTreeStringSeqName(quartet_tree_str); - quartet_tree->initializeAllPartialLh(); - quartet_tree->wrapperFixNegativeBranch(true); - // optimize branch lengths with logl_epsilon=0.1 accuracy - lmap_quartet_info[qid].logl[k] = quartet_tree->optimizeAllBranches(10, 0.1); - } - // reset model & rate so that they are not deleted - quartet_tree->setModel(NULL); - quartet_tree->setModelFactory(NULL); - quartet_tree->setRate(NULL); - - if (isSuperTree()) { - PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree; - for (int i = 0; i < quartet_super_tree->size(); i++) { - quartet_super_tree->at(i)->setModelFactory(NULL); - quartet_super_tree->at(i)->setModel(NULL); - quartet_super_tree->at(i)->setRate(NULL); + + // set model and rate + quartet_tree->setModelFactory(model_factory); + quartet_tree->setModel(getModel()); + quartet_tree->setRate(getRate()); + // NOTE: we don't need to set phylo_tree in model and rate because parameters are not reoptimized + + + + // loop over 3 quartets to compute likelihood + for (int k = 0; k < 3; k++) { + string quartet_tree_str; + quartet_tree_str = "(" + quartet_aln->getSeqName(qc[k*4]) + "," + quartet_aln->getSeqName(qc[k*4+1]) + ",(" + + quartet_aln->getSeqName(qc[k*4+2]) + "," + quartet_aln->getSeqName(qc[k*4+3]) + "));"; + quartet_tree->readTreeStringSeqName(quartet_tree_str); + quartet_tree->initializeAllPartialLh(); + quartet_tree->wrapperFixNegativeBranch(true); + // optimize branch lengths with logl_epsilon=0.1 accuracy + lmap_quartet_info[qid].logl[k] = quartet_tree->optimizeAllBranches(10, 0.1); } + // reset model & rate so that they are not deleted + quartet_tree->setModel(NULL); + quartet_tree->setModelFactory(NULL); + quartet_tree->setRate(NULL); + + if (isSuperTree()) { + PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree; + for (int i = 0; i < quartet_super_tree->size(); i++) { + quartet_super_tree->at(i)->setModelFactory(NULL); + quartet_super_tree->at(i)->setModel(NULL); + quartet_super_tree->at(i)->setRate(NULL); + } + } + delete quartet_tree; } - - delete quartet_tree; + delete quartet_aln; // determine likelihood order @@ -1036,7 +1123,22 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info lmap_quartet_info[qid].area=6; // LM_REG7 - center } + { + int64_t count = (qid+1); + if ((count % 100) == 0) { + cout << '.'; + if ((count % 1000) == 0) { // separator after 10 dots + cout << ' '; + if ((count % 5000) == 0) // new-line after 50 dots + cout << " : " << count << endl; + } + cout.flush(); + } + } } /*** end draw lmap_num_quartets quartets randomly ***/ + if ((params->lmap_num_quartets % 5000) != 0) { + cout << ". : " << params->lmap_num_quartets << flush << endl << endl; + } else cout << endl; #ifdef _OPENMP finish_random(rstream); @@ -1048,6 +1150,79 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info //************************************** +/** + read groups in following format "(A, B, C, D), (E, F), (G, H), (I);" +**/ +void readGroupNewick(char *filename, MSetsBlock *sets_block) { + try { + ifstream in; + in.exceptions(ios::failbit | ios::badbit); + in.open(filename); + in.exceptions(ios::badbit); + char ch; + string name; + TaxaSetNameVector *sets = sets_block->getSets(); + while (!in.eof()) { + // read the cluster + TaxaSetName *myset = new TaxaSetName; + sets->push_back(myset); + in >> ch; + if (ch != '(') + throw "Cluster does not start with '('"; + // read taxon name + do { + in >> ch; + name = ""; + do { + name += ch; + ch = in.get(); + if (controlchar(ch)) { + in >> ch; + break; + } + } while (ch != ',' && ch != ')'); + myset->taxlist.push_back(name); + if (ch == ',') continue; // continue to read next taxon name + if (ch == ')') break; + throw "No ',' or ')' found after " + name; + } while (ch != ')'); + in >> ch; + if (isalnum(ch)) { + // read cluster name + name = ""; + do { + name += ch; + ch = in.get(); + if (controlchar(ch)) { + in >> ch; + break; + } + } while (ch != ',' && ch != ';'); + myset->name = name; + } else { + myset->name = "Cluster" + convertIntToString(sets->size()); + } + // check for duplicated name + for (TaxaSetNameVector::iterator it = sets->begin(); it != sets->end()-1; it++) + if ((*it)->name == myset->name) + throw "Duplicated cluster name " + myset->name; + if (ch == ',') continue; // continue to read next cluster + if (ch == ';') break; + throw "No ',' or ';' found after " + name + ")"; + } + + in.clear(); + // set the failbit again + in.exceptions(ios::failbit | ios::badbit); + in.close(); + } catch (ios::failure) { + outError(ERR_READ_INPUT); + } catch (const char* str) { + outError(str); + } catch (string &str) { + outError(str); + } +} void PhyloTree::readLikelihoodMappingGroups(char *filename, QuartetGroups &LMGroups) { @@ -1056,17 +1231,26 @@ void PhyloTree::readLikelihoodMappingGroups(char *filename, QuartetGroups &LMGro MSetsBlock *lmclusters; lmclusters = new MSetsBlock(); cout << endl << "Reading likelihood mapping cluster file " << filename << "..." << endl; - cout << "(The leading numbers represent the order from the master alignment.)" << endl << endl; - MyReader nexus(filename); - - nexus.Add(lmclusters); - - MyToken token(nexus.inf); - nexus.Execute(token); + InputType intype = detectInputFile(filename); + + if (intype == IN_NEXUS) { + MyReader nexus(filename); + nexus.Add(lmclusters); + MyToken token(nexus.inf); + nexus.Execute(token); + } else if (intype == IN_NEWICK) { + readGroupNewick(filename, lmclusters); + } else + outError("Unknown cluster file format, please use NEXUS or RAxML-style format"); + + if (lmclusters->getNSets() == 0) + outError("No cluster found"); // lmclusters->Report(cout); + cout << "(The leading numbers represent the order from the master alignment.)" << endl << endl; + TaxaSetNameVector *allsets = lmclusters->getSets(); numsets = allsets->size(); @@ -1148,6 +1332,7 @@ void PhyloTree::readLikelihoodMappingGroups(char *filename, QuartetGroups &LMGro } LMGroups.numGroups = n; + delete lmclusters; } // end PhyloTree::readLikelihoodMappingGroups @@ -1159,16 +1344,27 @@ void PhyloTree::doLikelihoodMapping() { // vector<SeqQuartetInfo> lmap_seq_quartet_info; // int areacount[8] = {0, 0, 0, 0, 0, 0, 0, 0}; // int cornercount[4] = {0, 0, 0, 0}; - int resolved, partly, unresolved; - int qid; + int64_t resolved, partly, unresolved; + int64_t qid; ofstream out; string filename; - + if(params->lmap_cluster_file != NULL) { // cout << "YYY: test reading" << params->lmap_cluster_file << endl; - readLikelihoodMappingGroups(params->lmap_cluster_file, LMGroups); + readLikelihoodMappingGroups(params->lmap_cluster_file, LMGroups); } else { - LMGroups.numGroups = 0; /* no clusterfile -> un-initialized */ + LMGroups.numGroups = 0; /* no clusterfile -> un-initialized */ + int64_t recommended_quartets; + if (aln->getNSeq() > 10) { + recommended_quartets = (int64_t)25*aln->getNSeq(); + } else { + int64_t n = aln->getNSeq(); + recommended_quartets = n*(n-1)*(n-2)*(n-3)/24; + } + if (params->lmap_num_quartets > 0 && params->lmap_num_quartets < recommended_quartets) { + outWarning("Number of quartets is recommended to be at least " + convertInt64ToString(recommended_quartets) + " s.t. each sequence is sampled sufficiently"); + } + } areacount[0] = 0; @@ -1198,8 +1394,6 @@ void PhyloTree::doLikelihoodMapping() { lmap_seq_quartet_info[qid].countarr[9] = 0; } - cout << "Computing quartet likelihoods..." << endl << endl; - computeQuartetLikelihoods(lmap_quartet_info, LMGroups); for (qid = 0; qid < params->lmap_num_quartets; qid++) { @@ -1254,11 +1448,12 @@ void PhyloTree::doLikelihoodMapping() { if (params->print_lmap_quartet_lh) { // print quartet file + out << "SeqIDs\tlh1\tlh2\tlh3\tweight1\tweight2\tweight3" << endl; for (qid = 0; qid < params->lmap_num_quartets; qid++) { - out << "(" << lmap_quartet_info[qid].seqID[0] << "," - << lmap_quartet_info[qid].seqID[1] << "," - << lmap_quartet_info[qid].seqID[2] << "," - << lmap_quartet_info[qid].seqID[3] << ")" + out << "(" << lmap_quartet_info[qid].seqID[0]+1 << "," + << lmap_quartet_info[qid].seqID[1]+1 << "," + << lmap_quartet_info[qid].seqID[2]+1 << "," + << lmap_quartet_info[qid].seqID[3]+1 << ")" << "\t" << lmap_quartet_info[qid].logl[0] << "\t" << lmap_quartet_info[qid].logl[1] << "\t" << lmap_quartet_info[qid].logl[2] @@ -1267,128 +1462,15 @@ void PhyloTree::doLikelihoodMapping() { << "\t" << lmap_quartet_info[qid].qweight[2] << endl; } - PhyloTree::reportLikelihoodMapping(out); - - /**** begin of report output ****/ - /**** moved to PhyloTree::reportLikelihoodMapping ****/ -#if 0 - // LM_REG1 0 /* top corner */ - // LM_REG2 1 /* bottom-right corner */ - // LM_REG3 2 /* bottom-left corner */ - // LM_REG4 3 /* right rectangle */ - // LM_REG5 4 /* bottom rectangle */ - // LM_REG6 5 /* left rectangle */ - // LM_REG7 6 /* center */ - // LM_AR1 7 /* top third */ - // LM_AR2 8 /* bottom-right third */ - // LM_AR3 9 /* bottom-left third */ - - out << "LIKELIHOOD MAPPING ANALYSIS" << endl << endl; - out << "Number of quartets: " << params->lmap_num_quartets << "(random choice)" << endl << endl; - out << "Quartet trees are based on the selected model of substitution." << endl << endl; - out << "Sequences are not grouped in clusters." << endl; - - out << endl << endl; - out << "LIKELIHOOD MAPPING STATISTICS" << endl << endl; - - out << " (a,b)-(c,d) (a,b)-(c,d) " << endl; - out << " /\\ /\\ " << endl; - out << " / \\ / \\ " << endl; - out << " / \\ / 1 \\ " << endl; - out << " / a1 \\ / \\ / \\ " << endl; - out << " /\\ /\\ / \\/ \\ " << endl; - out << " / \\ / \\ / /\\ \\ " << endl; - out << " / \\ / \\ / 6 / \\ 4 \\ " << endl; - out << " / \\/ \\ /\\ / 7 \\ /\\ " << endl; - out << " / | \\ / \\ /______\\ / \\ " << endl; - out << " / a3 | a2 \\ / 3 | 5 | 2 \\ " << endl; - out << " /__________|_________\\ /_____|________|_____\\ " << endl; - out << "(a,d)-(b,c) (a,c)-(b,d) (a,b)-(c,d) (a,c)-(b,d) " << endl << endl; - out << "For more information about likelihood-mapping refer to" << endl; - out << " Strimmer and von Haeseler (1997) PNAS 94:6815-6819" << endl; - out << " http://www.ncbi.nlm.nih.gov/pubmed/9192648" << endl; - out << "and/or" << endl; - out << " Schmidt and von Haeseler (2003) Current Protocols in Bioinformatics" << endl; - out << " (by Baxevanis et al., Eds.), Unit 6, Wiley&Sons, New York." << endl; - out << " http://dx.doi.org/10.1002/0471250953.bi0606s17" << endl; - - - out << endl << endl; - out << "Quartet support of regions a1, a2, a3:" << endl << endl; - out << " #quartets a1 (% a1) a2 (% a2) a3 (% a3) name" << endl; - for (qid = 0; qid < leafNum; qid++) { - //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6]; - unsigned long sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; - - out.setf(ios::fixed, ios::floatfield); // set fixed floating format - out.precision(2); - out << setw(4) << qid+1 - << setw(9) << sumq - << setw(7) << lmap_seq_quartet_info[qid].countarr[7] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[7]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[8] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[8]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[9] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[9]/sumq << ") " - << PhyloTree::aln->getSeqName(qid) << endl; - } - - out << endl << endl << "Quartet support of areas 1-7:" << endl << endl; - out << " resolved partly unresolved name" << endl; - out << " #quartets 1 (% 1) 2 (% 2) 3 (% 3) 4 (% 4) 5 (% 5) 6 (% 6) 7 (% 7)" << endl; - for (qid = 0; qid < leafNum; qid++) { - //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6]; - unsigned long sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; - - out.setf(ios::fixed, ios::floatfield); // set fixed floating format - out.precision(2); - out << setw(4) << qid+1 - << setw(9) << sumq - << setw(7) << lmap_seq_quartet_info[qid].countarr[0] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[0]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[1] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[1]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[2] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[2]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[3] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[3]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[4] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[4]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[5] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[5]/sumq << ") " - << setw(7) << lmap_seq_quartet_info[qid].countarr[6] - << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[6]/sumq << ") " - << PhyloTree::aln->getSeqName(qid) << endl; - } +// PhyloTree::reportLikelihoodMapping(out); - out << endl << endl << "Quartet resolution per sequence:" << endl << endl; - out << " #quartets resolved partly unresolved name" << endl; - for (qid = 0; qid < leafNum; qid++) { - //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6]; - unsigned long resolved = lmap_seq_quartet_info[qid].countarr[LM_REG1] + lmap_seq_quartet_info[qid].countarr[LM_REG2] + lmap_seq_quartet_info[qid].countarr[LM_REG3]; - unsigned long partly = lmap_seq_quartet_info[qid].countarr[LM_REG4] + lmap_seq_quartet_info[qid].countarr[LM_REG5] + lmap_seq_quartet_info[qid].countarr[LM_REG6]; - unsigned long unres = lmap_seq_quartet_info[qid].countarr[LM_REG7]; - unsigned long sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; - - out.setf(ios::fixed, ios::floatfield); // set fixed floating format - out.precision(2); - out << setw(4) << qid+1 - << setw(9) << sumq - << setw(7) << resolved - << " (" << setw(6) << (double) 100.0*resolved/sumq << ") " - << setw(7) << partly - << " (" << setw(6) << (double) 100.0*partly/sumq << ") " - << setw(7) << unres - << " (" << setw(6) << (double) 100.0*unres/sumq << ") " - << PhyloTree::aln->getSeqName(qid) << endl; - } -#endif } resolved = areacount[0] + areacount[1] + areacount[2]; partly = areacount[3] + areacount[4] + areacount[5]; unresolved = areacount[6]; +#if 0 // for debugging purposes only (HAS) out << endl << "LIKELIHOOD MAPPING SUMMARY" << endl << endl; out << "Number of quartets: " << (resolved+partly+unresolved) << " (randomly drawn with replacement)" << endl << endl; @@ -1400,13 +1482,8 @@ void PhyloTree::doLikelihoodMapping() { out << "Number of unresolved quartets: " << unresolved << " (" << 100.0 * unresolved/(resolved+partly+unresolved) << "%)" << endl << endl; -#if 0 #endif - /**** end of report output ****/ - /**** moved to PhyloTree::reportLikelihoodMapping ****/ - - if (params->print_lmap_quartet_lh) { // print quartet file out.close(); @@ -1421,25 +1498,14 @@ void PhyloTree::doLikelihoodMapping() { fclose(epsout); cout << "likelihood mapping plot (EPS) written to " << lmap_epsfilename << endl; - -// cout << "\nOverall quartet resolution: (from " << (resolved+partly+unresolved) << " randomly drawn quartets)" << endl; -// cout << "Fully resolved quartets: " << resolved << " (= " -// << (double) resolved * 100.0 / (resolved+partly+unresolved) << "%)" << endl; -// cout << "Partly resolved quartets: " << partly << " (= " -// << (double) partly * 100.0 / (resolved+partly+unresolved) << "%)" << endl; -// cout << "Unresolved quartets: " << unresolved << " (= " -// << (double) unresolved * 100.0 / (resolved+partly+unresolved) << "%)" << endl << endl; - } // end PhyloTree::doLikelihoodMapping void PhyloTree::reportLikelihoodMapping(ofstream &out) { - // int areacount[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - // int cornercount[4] = {0, 0, 0, 0}; - int resolved, partly, unresolved; - int qid; + int64_t resolved, partly, unresolved; + int64_t qid; int leafNum; leafNum = PhyloTree::aln->getNSeq(); // vector<QuartetInfo> lmap_quartet_info; @@ -1456,23 +1522,16 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) { // LM_AR1 7 /* top third */ // LM_AR2 8 /* bottom-right third */ // LM_AR3 9 /* bottom-left third */ -#if 0 -#define LM_REG1 0 /* top corner */ -#define LM_REG2 1 /* bottom-right corner */ -#define LM_REG3 2 /* bottom-left corner */ -#define LM_REG4 3 /* right rectangle */ -#define LM_REG5 4 /* bottom rectangle */ -#define LM_REG6 5 /* left rectangle */ -#define LM_REG7 6 /* center */ -#define LM_AR1 7 /* top third */ -#define LM_AR2 8 /* bottom-right third */ -#define LM_AR3 9 /* bottom-left third */ -#endif out << "LIKELIHOOD MAPPING ANALYSIS" << endl; out << "---------------------------" << endl << endl; - out << "Number of quartets: " << params->lmap_num_quartets << " (randomly chosen from " + out << "Number of quartets: " << params->lmap_num_quartets; + if (params->lmap_num_quartets < LMGroups.uniqueQuarts) + cout << " (randomly chosen with replacement from " << LMGroups.uniqueQuarts << " existing unique quartets)" << endl << endl; + else + out << " (all unique quartets)" << endl << endl; + out << "Quartet trees are based on the selected model of substitution." << endl << endl; if(LMGroups.numGroups == 1) { @@ -1613,8 +1672,7 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) { out << " #quartets a1 (% a1) a2 (% a2) a3 (% a3) name" << endl; out << "-----------------------------------------------------------------------------" << endl; for (qid = 0; qid <= leafNum; qid++) { - //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6]; - unsigned long sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; + int64_t sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; if (sumq>0) sumq0=sumq; else sumq0=1; @@ -1650,8 +1708,7 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) { out << " #quartets 1 (% 1) 2 (% 2) 3 (% 3) 4 (% 4) 5 (% 5) 6 (% 6) 7 (% 7)" << endl; out << "------------------------------------------------------------------------------------------------------------------------------------------------" << endl; for (qid = 0; qid <= leafNum; qid++) { - //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6]; - unsigned long sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; + int64_t sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; if (sumq>0) sumq0=sumq; else sumq0=1; @@ -1703,11 +1760,10 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) { out << " #quartets resolved partly unresolved name" << endl; out << "-----------------------------------------------------------------------------" << endl; for (qid = 0; qid <= leafNum; qid++) { - //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6]; - unsigned long resolved = lmap_seq_quartet_info[qid].countarr[LM_REG1] + lmap_seq_quartet_info[qid].countarr[LM_REG2] + lmap_seq_quartet_info[qid].countarr[LM_REG3]; - unsigned long partly = lmap_seq_quartet_info[qid].countarr[LM_REG4] + lmap_seq_quartet_info[qid].countarr[LM_REG5] + lmap_seq_quartet_info[qid].countarr[LM_REG6]; - unsigned long unres = lmap_seq_quartet_info[qid].countarr[LM_REG7]; - unsigned long sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; + int64_t resolved = lmap_seq_quartet_info[qid].countarr[LM_REG1] + lmap_seq_quartet_info[qid].countarr[LM_REG2] + lmap_seq_quartet_info[qid].countarr[LM_REG3]; + int64_t partly = lmap_seq_quartet_info[qid].countarr[LM_REG4] + lmap_seq_quartet_info[qid].countarr[LM_REG5] + lmap_seq_quartet_info[qid].countarr[LM_REG6]; + int64_t unres = lmap_seq_quartet_info[qid].countarr[LM_REG7]; + int64_t sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9]; if (sumq>0) sumq0=sumq; else sumq0=1; @@ -1754,3 +1810,4 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) { << " (=" << 100.0 * unresolved/(resolved+partly+unresolved) << "%)" << endl << endl; } // end PhyloTree::reportLikelihoodMapping + diff --git a/superalignment.cpp b/superalignment.cpp index 6d71ca0..7311455 100644 --- a/superalignment.cpp +++ b/superalignment.cpp @@ -101,10 +101,11 @@ void SuperAlignment::linkSubAlignment(int part) { } } -void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char) { +void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa, IntVector *kept_partitions) { assert(aln->isSuperAlignment()); SuperAlignment *saln = (SuperAlignment*)aln; + int i; IntVector::iterator it; for (it = seq_id.begin(); it != seq_id.end(); it++) { assert(*it >= 0 && *it < aln->getNSeq()); @@ -115,24 +116,33 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int //Alignment::extractSubAlignment(aln, seq_id, 0); taxa_index.resize(getNSeq()); - for (int i = 0; i < getNSeq(); i++) + for (i = 0; i < getNSeq(); i++) taxa_index[i].resize(saln->partitions.size(), -1); int part = 0; - partitions.resize(saln->partitions.size()); +// partitions.resize(saln->partitions.size()); + partitions.resize(0); for (vector<Alignment*>::iterator ait = saln->partitions.begin(); ait != saln->partitions.end(); ait++, part++) { IntVector sub_seq_id; for (IntVector::iterator it = seq_id.begin(); it != seq_id.end(); it++) if (saln->taxa_index[*it][part] >= 0) sub_seq_id.push_back(saln->taxa_index[*it][part]); + if (sub_seq_id.size() < min_taxa) + continue; Alignment *subaln = new Alignment; subaln->extractSubAlignment(*ait, sub_seq_id, 0); - partitions[part] = subaln; - linkSubAlignment(part); + partitions.push_back(subaln); + linkSubAlignment(partitions.size()-1); + if (kept_partitions) kept_partitions->push_back(part); // cout << subaln->getNSeq() << endl; // subaln->printPhylip(cout); } + if (partitions.size() < saln->partitions.size()) { + for (i = 0; i < getNSeq(); i++) + taxa_index[i].resize(partitions.size()); + } + // now build the patterns based on taxa_index buildPattern(); } diff --git a/superalignment.h b/superalignment.h index c3552ad..583dd0a 100644 --- a/superalignment.h +++ b/superalignment.h @@ -108,8 +108,10 @@ public: @param aln original input alignment @param seq_id ID of sequences to extract from @param min_true_cher the minimum number of non-gap characters, true_char<min_true_char -> delete the sequence + @param min_taxa only keep alignment that has >= min_taxa sequences + @param[out] kept_partitions (for SuperAlignment) indices of kept partitions */ - virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char); + virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa = 0, IntVector *kept_partitions = NULL); /** * remove identical sequences from alignment diff --git a/tools.cpp b/tools.cpp index fc01036..85ca06c 100644 --- a/tools.cpp +++ b/tools.cpp @@ -263,6 +263,35 @@ void convert_int_vec(const char *str, IntVector &vec) throw (string) { } +int64_t convert_int64(const char *str) throw (string) { + char *endptr; + int64_t i = (int64_t)strtoll(str, &endptr, 10); // casted because 'long long' may be larger than int64_t + + if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL || *endptr != 0) { + string err = "Expecting large integer , but found \""; + err += str; + err += "\" instead"; + throw err; + } + + return i; +} + +int64_t convert_int64(const char *str, int &end_pos) throw (string) { + char *endptr; + int64_t i = (int64_t)strtoll(str, &endptr, 10); // casted because 'long long' may be larger than int64_t + + if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL) { + string err = "Expecting large integer, but found \""; + err += str; + err += "\" instead"; + throw err; + } + end_pos = endptr - str; + return i; +} + + double convert_double(const char *str) throw (string) { char *endptr; double d = strtod(str, &endptr); @@ -613,6 +642,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.user_file = NULL; params.fai = false; params.testAlpha = false; + params.test_param = false; params.testAlphaEpsAdaptive = false; params.randomAlpha = false; params.testAlphaEps = 0.1; @@ -882,7 +912,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.freq_const_patterns = NULL; params.no_rescale_gamma_invar = false; params.compute_seq_identity_along_tree = false; - params.lmap_num_quartets = 0; + params.lmap_num_quartets = -1; params.lmap_cluster_file = NULL; params.print_lmap_quartet_lh = false; params.link_alpha = false; @@ -2520,6 +2550,12 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.testAlpha = true; continue; } + + if (strcmp(argv[cnt], "-test_param") == 0 || strcmp(argv[cnt], "--opt-gamma-inv") == 0) { + params.test_param = true; + continue; + } + if (strcmp(argv[cnt], "--adaptive-eps") == 0) { params.testAlphaEpsAdaptive = true; continue; @@ -2830,9 +2866,13 @@ void parseArg(int argc, char *argv[], Params ¶ms) { cnt++; if (cnt >= argc) throw "Use -lmap <likelihood_mapping_num_quartets>"; - params.lmap_num_quartets = convert_int(argv[cnt]); - if (params.lmap_num_quartets < 1) - throw "Number of quartets must be >= 1"; + if (strcmp(argv[cnt],"ALL") == 0) { + params.lmap_num_quartets = 0; + } else { + params.lmap_num_quartets = convert_int64(argv[cnt]); + if (params.lmap_num_quartets < 0) + throw "Number of quartets must be >= 1"; + } continue; } @@ -2844,6 +2884,8 @@ void parseArg(int argc, char *argv[], Params ¶ms) { // '-keep_ident' is currently required to allow a 1-to-1 mapping of the // user-given groups (HAS) - possibly obsolete in the future versions params.ignore_identical_seqs = false; + if (params.lmap_num_quartets < 0) + params.lmap_num_quartets = 0; continue; } @@ -3121,7 +3163,7 @@ void usage_iqtree(char* argv[], bool full_command) { << " number of categories (default: n=4)" << endl << " -a <Gamma_shape> Gamma shape parameter for site rates (default: estimate)" << endl << " -gmedian Median approximation for +G site rates (default: mean)" << endl - << " --test-alpha More thorough estimation for +I+G model parameters" << endl + << " --opt-gamma-inv More thorough estimation for +I+G model parameters" << endl << " -i <p_invar> Proportion of invariable sites (default: estimate)" << endl << " -mh Computing site-specific rates to .mhrate file using" << endl << " Meyer & von Haeseler (2003) method" << endl diff --git a/tools.h b/tools.h index ef24f80..21a2462 100644 --- a/tools.h +++ b/tools.h @@ -439,6 +439,12 @@ public: bool testAlpha; /** + * Restart the optimization of alpha and pinvar from different starting + * pinv values (supercedes the option testAlpha + */ + bool test_param; + + /** * Automatic adjust the log-likelihood espilon using some heuristic */ bool testAlphaEpsAdaptive; @@ -1685,7 +1691,7 @@ public: /** true to ignore checkpoint file */ bool ignore_checkpoint; /** number of quartets for likelihood mapping */ - int lmap_num_quartets; + int64_t lmap_num_quartets; /** file containing the cluster information for clustered likelihood mapping @@ -1903,6 +1909,21 @@ int convert_int(const char *str, int &end_pos) throw (string); void convert_int_vec(const char *str, IntVector &vec) throw (string); /** + convert string to int64_t, with error checking + @param str original string + @return the number + */ +int64_t convert_int64(const char *str) throw (string); + +/** + convert string to int64_t, with error checking + @param str original string + @param end_pos end position + @return the number + */ +int64_t convert_int64(const char *str, int64_t &end_pos) throw (string); + +/** convert string to double, with error checking @param str original string @return the double -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
