This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository iqtree.
commit 173db0cf39ac0403d695f62b2493f420ed3d2edd Author: Andreas Tille <[email protected]> Date: Thu Jan 19 08:31:10 2017 +0100 New upstream version 1.5.3+dfsg --- CMakeLists.txt | 2 +- alignment.cpp | 52 ++++++++++++++++++++----- iqtree.cpp | 16 ++++---- model/modelfactory.cpp | 104 ++++++++++++++++++++++++++++++------------------- model/modelfactory.h | 5 +-- model/modelset.cpp | 10 +++++ model/ratefree.cpp | 2 +- phyloanalysis.cpp | 10 +++-- phylokernelnew.h | 45 +++++++++++---------- phylotree.cpp | 4 +- phylotreesse.cpp | 35 +++++++++++++++-- tools.cpp | 3 ++ 12 files changed, 197 insertions(+), 91 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 767f309..6acf40b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_definitions(-DIQ_TREE) # The version number. set (iqtree_VERSION_MAJOR 1) set (iqtree_VERSION_MINOR 5) -set (iqtree_VERSION_PATCH "2") +set (iqtree_VERSION_PATCH "3") set(BUILD_SHARED_LIBS OFF) diff --git a/alignment.cpp b/alignment.cpp index f935d2b..44f0146 100644 --- a/alignment.cpp +++ b/alignment.cpp @@ -376,7 +376,8 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v countConstSite(); cout << "Alignment has " << getNSeq() << " sequences with " << getNSite() << - " columns and " << getNPattern() << " patterns (" << num_informative_sites << " informative sites)" << endl; + " columns and " << getNPattern() << " patterns (" << num_informative_sites << " informative sites, " << + (int)(frac_const_sites*getNSite()) << " constant sites)" << endl; buildSeqStates(); checkSeqName(); // OBSOLETE: identical sequences are handled later @@ -617,10 +618,11 @@ void Alignment::computeConst(Pattern &pat) { bool is_informative = false; // critical fix: const_char was set wrongly to num_states in some data type (binary, codon), // causing wrong log-likelihood computation for +I or +I+G model - if (STATE_UNKNOWN == num_states) - pat.const_char = STATE_UNKNOWN+1; - else - pat.const_char = STATE_UNKNOWN; + pat.const_char = STATE_UNKNOWN+1; +// if (STATE_UNKNOWN == num_states) +// pat.const_char = STATE_UNKNOWN+1; +// else +// pat.const_char = STATE_UNKNOWN; StateBitset state_app; state_app.reset(); int j; @@ -628,7 +630,7 @@ void Alignment::computeConst(Pattern &pat) { state_app[j] = 1; // number of appearance for each state, to compute is_informative - size_t *num_app = new size_t[num_states]; + size_t num_app[num_states]; memset(num_app, 0, num_states*sizeof(size_t)); for (Pattern::iterator i = pat.begin(); i != pat.end(); i++) { @@ -637,10 +639,11 @@ void Alignment::computeConst(Pattern &pat) { state_app &= this_app; if (*i < num_states) { num_app[(int)(*i)]++; - } else if (*i != STATE_UNKNOWN) { - // ambiguous characters - is_const = false; } +// else if (*i != STATE_UNKNOWN) { +// // ambiguous characters +// is_const = false; +// } } int count = 0; // number of states with >= 2 appearances pat.num_chars = 0; // number of states with >= 1 appearance @@ -655,6 +658,7 @@ void Alignment::computeConst(Pattern &pat) { is_informative = (count >= 2); // compute is_const + /* is_const = is_const && (pat.num_chars <= 1); if (is_const) { if (pat.num_chars == 0) // all-gap pattern @@ -668,8 +672,36 @@ void Alignment::computeConst(Pattern &pat) { } } } + */ + is_const = (state_app.count() >= 1); + if (is_const) { + if (state_app.count() == num_states) { + pat.const_char = STATE_UNKNOWN; + } else if (state_app.count() == 1) { + for (j = 0; j < num_states; j++) + if (state_app[j]) { + pat.const_char = j; + break; + } + } else if (seq_type == SEQ_DNA) { + pat.const_char = num_states-1; + for (j = 0; j < num_states; j++) + if (state_app[j]) + pat.const_char += (1<<j); + } else if (seq_type == SEQ_PROTEIN) { + if (state_app[2] && state_app[3]) //4+8, // B = N or D + pat.const_char = num_states; + else if (state_app[5] && state_app[6]) //32+64, // Z = Q or E + pat.const_char = num_states+1; + else if (state_app[9] && state_app[10]) // 512+1024 // U = I or L + pat.const_char = num_states+2; + else ASSERT(0); + } else { + ASSERT(0); + } + } - delete [] num_app; +// delete [] num_app; // compute is_invariant is_invariant = (state_app.count() >= 1); diff --git a/iqtree.cpp b/iqtree.cpp index 979bc4a..e688afc 100644 --- a/iqtree.cpp +++ b/iqtree.cpp @@ -309,18 +309,20 @@ void IQTree::initSettings(Params ¶ms) { // max_candidate_trees = aln->getNSeq() * params.step_iterations; setRootNode(params.root); - string bootaln_name = params.out_prefix; - bootaln_name += ".bootaln"; - if (params.print_bootaln) { - ofstream bootalnout; - bootalnout.open(bootaln_name.c_str()); - bootalnout.close(); - } size_t i; if (params.online_bootstrap && params.gbo_replicates > 0) { if (aln->getNSeq() < 4) outError("It makes no sense to perform bootstrap with less than 4 sequences."); + + string bootaln_name = params.out_prefix; + bootaln_name += ".bootaln"; + if (params.print_bootaln) { + ofstream bootalnout; + bootalnout.open(bootaln_name.c_str()); + bootalnout.close(); + } + // 2015-12-17: initialize random stream for creating bootstrap samples // mainly so that checkpointing does not need to save bootstrap samples int *saved_randstream = randstream; diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp index 88cb0ab..ae6a2e6 100644 --- a/model/modelfactory.cpp +++ b/model/modelfactory.cpp @@ -605,7 +605,7 @@ int ModelFactory::getNParameters() { int df = model->getNDim() + model->getNDimFreq() + site_rate->getNDim() + site_rate->phylo_tree->branchNum; return df; } - +/* double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst *model, double initAlpha, double initPInvar, double *initRates, double *initStateFreqs) { @@ -619,7 +619,7 @@ double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst site_rate->phylo_tree->clearAllPartialLH(); return site_rate->phylo_tree->computeLikelihood(); } - +*/ double ModelFactory::optimizeParametersOnly(double gradient_epsilon) { double logl; /* Optimize substitution and heterogeneity rates independently */ @@ -700,16 +700,20 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info DoubleVector bestLens; tree->saveBranchLengths(initBranLens); bestLens = initBranLens; - int numRateEntries = tree->getModel()->getNumRateEntries(); - double *rates = new double[numRateEntries]; - double *bestRates = new double[numRateEntries]; - tree->getModel()->getRateMatrix(rates); - int numStates = tree->aln->num_states; - double *state_freqs = new double[numStates]; - tree->getModel()->getStateFrequency(state_freqs); +// int numRateEntries = tree->getModel()->getNumRateEntries(); + Checkpoint *model_ckp = new Checkpoint; + Checkpoint *best_ckp = new Checkpoint; + Checkpoint *saved_ckp = model->getCheckpoint(); + *model_ckp = *saved_ckp; +// double *rates = new double[numRateEntries]; +// double *bestRates = new double[numRateEntries]; +// tree->getModel()->getRateMatrix(rates); +// int numStates = tree->aln->num_states; +// double *state_freqs = new double[numStates]; +// tree->getModel()->getStateFrequency(state_freqs); /* Best estimates found */ - double *bestStateFreqs = new double[numStates]; +// double *bestStateFreqs = new double[numStates]; double bestLogl = -DBL_MAX; double bestAlpha = 0.0; double bestPInvar = 0.0; @@ -727,8 +731,8 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info cout << "Testing with init. pinv = " << initPInv << " / init. alpha = " << initAlpha << endl; } - vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs, - initPInv, initAlpha, initBranLens); + vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, + initPInv, initAlpha, initBranLens, model_ckp); if (write_info) { @@ -742,8 +746,13 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info bestPInvar = estResults[0]; bestLens.clear(); tree->saveBranchLengths(bestLens); - tree->getModel()->getRateMatrix(bestRates); - tree->getModel()->getStateFrequency(bestStateFreqs); + model->setCheckpoint(best_ckp); + model->saveCheckpoint(); + model->setCheckpoint(saved_ckp); +// *best_ckp = *saved_ckp; + +// tree->getModel()->getRateMatrix(bestRates); +// tree->getModel()->getStateFrequency(bestStateFreqs); if (estResults[0] < initPInv) { initPInv = estResults[0] - testInterval; if (initPInv < 0.0) @@ -767,11 +776,11 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info } vector<double> estResults; // vector of p_inv, alpha and logl if (Params::getInstance().opt_gammai_keep_bran) - estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs, - initPInv, initAlpha, bestLens); + estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, + initPInv, initAlpha, bestLens, model_ckp); else - estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs, - initPInv, initAlpha, initBranLens); + estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, + initPInv, initAlpha, initBranLens, model_ckp); if (write_info) { cout << "Est. p_inv: " << estResults[0] << " / Est. gamma shape: " << estResults[1] << " / Logl: " << estResults[2] << endl; @@ -785,33 +794,44 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info bestPInvar = estResults[0]; bestLens.clear(); tree->saveBranchLengths(bestLens); - tree->getModel()->getRateMatrix(bestRates); - tree->getModel()->getStateFrequency(bestStateFreqs); + model->setCheckpoint(best_ckp); + model->saveCheckpoint(); + model->setCheckpoint(saved_ckp); +// *best_ckp = *saved_ckp; + +// tree->getModel()->getRateMatrix(bestRates); +// tree->getModel()->getStateFrequency(bestStateFreqs); } } } site_rate->setGammaShape(bestAlpha); site_rate->setPInvar(bestPInvar); - ((ModelGTR*) tree->getModel())->setRateMatrix(bestRates); - ((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs); + model->setCheckpoint(best_ckp); + model->restoreCheckpoint(); + model->setCheckpoint(saved_ckp); +// ((ModelGTR*) tree->getModel())->setRateMatrix(bestRates); +// ((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs); tree->restoreBranchLengths(bestLens); - tree->getModel()->decomposeRateMatrix(); +// tree->getModel()->decomposeRateMatrix(); tree->clearAllPartialLH(); tree->setCurScore(tree->computeLikelihood()); - assert(fabs(tree->getCurScore() - bestLogl) < 1.0); - if (write_info) { + if (write_info) { cout << endl; cout << "Best p_inv: " << bestPInvar << " / best gamma shape: " << bestAlpha << " / "; cout << "Logl: " << tree->getCurScore() << endl; } + assert(fabs(tree->getCurScore() - bestLogl) < 1.0); + +// delete [] rates; +// delete [] state_freqs; +// delete [] bestRates; +// delete [] bestStateFreqs; + + delete model_ckp; + delete best_ckp; - delete [] rates; - delete [] state_freqs; - delete [] bestRates; - delete [] bestStateFreqs; - double elapsed_secs = getRealTime() - begin_time; if (write_info) cout << "Parameters optimization took " << elapsed_secs << " sec" << endl; @@ -824,21 +844,25 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info } vector<double> ModelFactory::optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon, - PhyloTree *tree, RateHeterogeneity *site_rates, double *rates, - double *state_freqs, double initPInv, double initAlpha, - DoubleVector &lenvec) { + double initPInv, double initAlpha, + DoubleVector &lenvec, Checkpoint *model_ckp) { + PhyloTree *tree = site_rate->getTree(); tree->restoreBranchLengths(lenvec); - ((ModelGTR*) tree->getModel())->setRateMatrix(rates); - ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs); - tree->getModel()->decomposeRateMatrix(); - site_rates->setPInvar(initPInv); - site_rates->setGammaShape(initAlpha); + Checkpoint *saved_ckp = model->getCheckpoint(); + model->setCheckpoint(model_ckp); + model->restoreCheckpoint(); + model->setCheckpoint(saved_ckp); +// ((ModelGTR*) tree->getModel())->setRateMatrix(rates); +// ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs); +// tree->getModel()->decomposeRateMatrix(); + site_rate->setPInvar(initPInv); + site_rate->setGammaShape(initAlpha); tree->clearAllPartialLH(); optimizeParameters(fixed_len, false, logl_epsilon, gradient_epsilon); vector<double> estResults; - double estPInv = tree->getRate()->getPInvar(); - double estAlpha = tree->getRate()->getGammaShape(); + double estPInv = site_rate->getPInvar(); + double estAlpha = site_rate->getGammaShape(); double logl = tree->getCurScore(); estResults.push_back(estPInv); estResults.push_back(estAlpha); diff --git a/model/modelfactory.h b/model/modelfactory.h index 5218f0b..9cdabf3 100644 --- a/model/modelfactory.h +++ b/model/modelfactory.h @@ -257,9 +257,8 @@ protected: */ virtual bool getVariables(double *variables); - vector<double> optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon, PhyloTree *tree, - RateHeterogeneity *site_rates, double *rates, double *state_freqs, - double initPInv, double initAlpha, DoubleVector &lenvec); + vector<double> optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon, + double initPInv, double initAlpha, DoubleVector &lenvec, Checkpoint *model_ckp); }; #endif diff --git a/model/modelset.cpp b/model/modelset.cpp index 83feabb..bcc109f 100644 --- a/model/modelset.cpp +++ b/model/modelset.cpp @@ -158,6 +158,16 @@ void ModelSet::decomposeRateMatrix() size_t vsize = phylo_tree->vector_size; size_t states2 = num_states*num_states; size_t ptn, i, x; + + size_t max_size = get_safe_upper_limit(size()); + + // copy dummy values + for (size_t m = size(); m < max_size; m++) { + memcpy(&eigenvalues[m*num_states], &eigenvalues[(m-1)*num_states], sizeof(double)*num_states); + memcpy(&eigenvectors[m*states2], &eigenvectors[(m-1)*states2], sizeof(double)*states2); + memcpy(&inv_eigenvectors[m*states2], &inv_eigenvectors[(m-1)*states2], sizeof(double)*states2); + } + double new_eval[num_states*vsize]; double new_evec[states2*vsize]; double new_inv_evec[states2*vsize]; diff --git a/model/ratefree.cpp b/model/ratefree.cpp index 60c302f..c31dcba 100644 --- a/model/ratefree.cpp +++ b/model/ratefree.cpp @@ -210,7 +210,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) { cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl; // TODO: turn off EM algorithm for +ASC model - if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()) || getPInvar() <= MIN_PINVAR) + if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty())) return optimizeWithEM(); //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false); diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp index 7017887..1e71528 100644 --- a/phyloanalysis.cpp +++ b/phyloanalysis.cpp @@ -1546,7 +1546,7 @@ void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) { printAncestralSequences(params.out_prefix, &iqtree, params.print_ancestral_sequence); } - if (params.print_site_state_freq != WSF_NONE) { + if (params.print_site_state_freq != WSF_NONE && !params.site_freq_file && !params.tree_freq_file) { string site_freq_file = params.out_prefix; site_freq_file += ".sitesf"; printSiteStateFreq(site_freq_file.c_str(), &iqtree); @@ -2409,8 +2409,12 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali } } else boot_tree = new IQTree(bootstrap_alignment); - if (params.print_bootaln && MPIHelper::getInstance().isMaster()) - bootstrap_alignment->printPhylip(bootaln_name.c_str(), true); + if (params.print_bootaln && MPIHelper::getInstance().isMaster()) { + if (bootstrap_alignment->isSuperAlignment()) + ((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment(bootaln_name.c_str(), true); + else + bootstrap_alignment->printPhylip(bootaln_name.c_str(), true); + } // set checkpoint boot_tree->setCheckpoint(tree->getCheckpoint()); diff --git a/phylokernelnew.h b/phylokernelnew.h index 11c29eb..b68e46a 100644 --- a/phylokernelnew.h +++ b/phylokernelnew.h @@ -566,7 +566,7 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part { if (SAFE_NUMERIC) { size_t x, i; - auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0)); + auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(invar) == 0.0)); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -580,7 +580,7 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part } } else { size_t x, i; - auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0); + auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(invar) == 0.0); if (horizontal_or(underflown)) { // at least one site has numerical underflown size_t block = ncat_mix * nstates; for (x = 0; x < VectorClass::size(); x++) @@ -1286,7 +1286,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t } // check if one should scale partial likelihoods if (SAFE_NUMERIC) { - auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); + auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -1304,7 +1304,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t } if (!SAFE_NUMERIC) { - auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0); + auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -1474,7 +1474,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t #endif // check if one should scale partial likelihoods if (SAFE_NUMERIC) { - auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); + auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -1537,7 +1537,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t #endif // check if one should scale partial likelihoods if (SAFE_NUMERIC) { - auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); + auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -1557,7 +1557,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t } // IF SITE_MODEL if (!SAFE_NUMERIC) { - auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0); + auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -1660,7 +1660,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t // check if one should scale partial likelihoods if (SAFE_NUMERIC) { - auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); + auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0)); if (horizontal_or(underflown)) for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -1682,7 +1682,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t if (!SAFE_NUMERIC) { // check if one should scale partial likelihoods - auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0); + auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0); if (horizontal_or(underflown)) { // at least one site has numerical underflown for (x = 0; x < VectorClass::size(); x++) if (underflown[x]) { @@ -2024,7 +2024,7 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl #endif for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) { - VectorClass lh_ptn; + VectorClass lh_ptn(0.0); //lh_ptn.load_a(&ptn_invar[ptn]); VectorClass *theta = (VectorClass*)(theta_all + ptn*block); VectorClass df_ptn, ddf_ptn; @@ -2055,7 +2055,8 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl dotProductTriple<VectorClass, double, FMA>(val0, val1, val2, theta, lh_ptn, df_ptn, ddf_ptn, block, nstates); #endif } - lh_ptn = abs(lh_ptn + VectorClass().load_a(&ptn_invar[ptn])); + // Sum later to avoid underflow of invariant sites + lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]); if (ptn < orig_nptn) { lh_ptn = 1.0 / lh_ptn; @@ -2319,8 +2320,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch, double *vec_tip = buffer_partial_lh_ptr + block*VectorClass::size()*thread_id; for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) { - VectorClass lh_ptn; - lh_ptn.load_a(&ptn_invar[ptn]); + VectorClass lh_ptn(0.0); +// lh_ptn.load_a(&ptn_invar[ptn]); VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix); VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block); VectorClass *lh_node = SITE_MODEL ? (VectorClass*)&partial_lh_node[ptn*nstates] : (VectorClass*)vec_tip; @@ -2411,7 +2412,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch, } vc_min_scale *= LOG_SCALING_THRESHOLD; - lh_ptn = abs(lh_ptn); + // Sum later to avoid underflow of invariant sites + lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]); if (ptn < orig_nptn) { lh_ptn = log(lh_ptn) + vc_min_scale; lh_ptn.store_a(&_pattern_lh[ptn]); @@ -2466,8 +2468,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch, computePartialLikelihood(*it, ptn_lower, ptn_upper, thread_id); for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) { - VectorClass lh_ptn; - lh_ptn.load_a(&ptn_invar[ptn]); + VectorClass lh_ptn(0.0); +// lh_ptn.load_a(&ptn_invar[ptn]); VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix); VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block); VectorClass *partial_lh_node = (VectorClass*)(node_branch->partial_lh + ptn*block); @@ -2541,7 +2543,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch, } // if SAFE_NUMERIC vc_min_scale *= LOG_SCALING_THRESHOLD; - lh_ptn = abs(lh_ptn); + // Sum later to avoid underflow of invariant sites + lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]); if (ptn < orig_nptn) { lh_ptn = log(lh_ptn) + vc_min_scale; @@ -2708,11 +2711,11 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() #pragma omp for schedule(static) nowait #endif for (ptn = 0; ptn < nptn; ptn+=VectorClass::size()) { - VectorClass lh_ptn; + VectorClass lh_ptn(0.0); VectorClass *theta = (VectorClass*)(theta_all + ptn*block); if (SITE_MODEL) { VectorClass *eval_ptr = (VectorClass*)&eval[ptn*nstates]; - lh_ptn.load_a(&ptn_invar[ptn]); +// lh_ptn.load_a(&ptn_invar[ptn]); for (c = 0; c < ncat; c++) { VectorClass lh_cat; #ifdef KERNEL_FIX_STATES @@ -2725,9 +2728,11 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() } } else { dotProductVec<VectorClass, double, FMA>(val0, theta, lh_ptn, block); - lh_ptn += VectorClass().load_a(&ptn_invar[ptn]); } + // Sum later to avoid underflow of invariant sites + lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]); + if (ptn < orig_nptn) { lh_ptn = log(abs(lh_ptn)) + VectorClass().load_a(&buffer_scale_all[ptn]); lh_ptn.store_a(&_pattern_lh[ptn]); diff --git a/phylotree.cpp b/phylotree.cpp index 5cd4c55..ffa9886 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -692,8 +692,8 @@ size_t PhyloTree::getBufferPartialLhSize() { const size_t VECTOR_SIZE = 8; // TODO, adjusted size_t ncat_mix = site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures()); size_t block = model->num_states * ncat_mix; - size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2 * aln->getNSeq()); - buffer_size += get_safe_upper_limit(block * (aln->getNSeq()+1) * (aln->STATE_UNKNOWN+1)); + size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2) * aln->getNSeq(); + buffer_size += get_safe_upper_limit(block *(aln->STATE_UNKNOWN+1)) * (aln->getNSeq()+1); buffer_size += (block*2+model->num_states)*VECTOR_SIZE*num_threads; return buffer_size; } diff --git a/phylotreesse.cpp b/phylotreesse.cpp index b0f9c01..32f3ecb 100644 --- a/phylotreesse.cpp +++ b/phylotreesse.cpp @@ -230,7 +230,7 @@ void PhyloTree::computeTipPartialLikelihood() { double *inv_evec = &model->getInverseEigenvectors()[ptn*nstates*nstates]; for (v = 0; v < vector_size; v++) { - int state = aln->STATE_UNKNOWN; + int state = 0; if (ptn+v < nptn) state = aln->at(ptn+v)[nodeid]; // double *partial_lh = node_partial_lh + ptn*nstates; @@ -395,6 +395,13 @@ void PhyloTree::computePtnInvar() { size_t nptn = aln->getNPattern(), ptn; size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size()); int nstates = aln->num_states; + int x; + // ambiguous characters + int ambi_aa[] = { + 4+8, // B = N or D + 32+64, // Z = Q or E + 512+1024 // U = I or L + }; double *state_freq = aligned_alloc<double>(nstates); model->getStateFrequency(state_freq); @@ -402,11 +409,31 @@ void PhyloTree::computePtnInvar() { double p_invar = site_rate->getPInvar(); if (p_invar != 0.0) { for (ptn = 0; ptn < nptn; ptn++) { - if ((*aln)[ptn].const_char == nstates) + if ((*aln)[ptn].const_char > aln->STATE_UNKNOWN) + continue; + + if ((*aln)[ptn].const_char == aln->STATE_UNKNOWN) { ptn_invar[ptn] = p_invar; - else if ((*aln)[ptn].const_char < nstates) { + } else if ((*aln)[ptn].const_char < nstates) { ptn_invar[ptn] = p_invar * state_freq[(int) (*aln)[ptn].const_char]; - } + } else if (aln->seq_type == SEQ_DNA) { + // 2016-12-21: handling ambiguous state + ptn_invar[ptn] = 0.0; + int cstate = (*aln)[ptn].const_char-nstates+1; + for (x = 0; x < nstates; x++) { + if ((cstate) & (1 << x)) + ptn_invar[ptn] += state_freq[x]; + } + ptn_invar[ptn] *= p_invar; + } else if (aln->seq_type == SEQ_PROTEIN) { + ptn_invar[ptn] = 0.0; + int cstate = (*aln)[ptn].const_char-nstates; + assert(cstate <= 2); + for (x = 0; x < 11; x++) + if (ambi_aa[cstate] & (1 << x)) + ptn_invar[ptn] += state_freq[x]; + ptn_invar[ptn] *= p_invar; + } else ASSERT(0); } // // ascertmain bias correction // for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++) diff --git a/tools.cpp b/tools.cpp index 4e5af85..fecb07f 100644 --- a/tools.cpp +++ b/tools.cpp @@ -3221,6 +3221,9 @@ void parseArg(int argc, char *argv[], Params ¶ms) { if (params.lh_mem_save == LM_MEM_SAVE && params.partition_file) outError("-mem option does not work with partition models yet"); + if (params.gbo_replicates && params.num_bootstrap_samples) + outError("UFBoot (-bb) and standard bootstrap (-b) must not be specified together"); + if (!params.out_prefix) { if (params.eco_dag_file) params.out_prefix = params.eco_dag_file; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
