This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository libbpp-popgen.
commit bbfffd4c86c352d52905cb1ac4eeffcb01c1a39a Author: Andreas Tille <[email protected]> Date: Wed Apr 13 17:14:31 2016 +0200 Imported Upstream version 2.2.0 --- CMakeLists.txt | 6 +- ChangeLog | 5 + Doxyfile | 2 +- bpp-popgen.spec | 4 +- debian/changelog | 6 + debian/control | 8 +- debian/copyright | 8 +- debian/postinst | 22 +- debian/postrm | 26 +- debian/prerm | 22 +- debian/rules | 4 +- src/Bpp/PopGen/AnalyzedLoci.cpp | 4 +- src/Bpp/PopGen/AnalyzedSequences.cpp | 2 +- src/Bpp/PopGen/DataSet.cpp | 10 +- src/Bpp/PopGen/GeneMapperCsvExport.cpp | 14 +- src/Bpp/PopGen/GeneMapperCsvExport.h | 5 +- src/Bpp/PopGen/Group.cpp | 4 +- src/Bpp/PopGen/PolymorphismMultiGContainer.cpp | 8 +- .../PopGen/PolymorphismMultiGContainerTools.cpp | 4 +- src/Bpp/PopGen/PolymorphismSequenceContainer.cpp | 69 ++-- src/Bpp/PopGen/PolymorphismSequenceContainer.h | 31 +- .../PopGen/PolymorphismSequenceContainerTools.cpp | 16 +- .../PopGen/PolymorphismSequenceContainerTools.h | 13 +- src/Bpp/PopGen/PopgenlibIO.cpp | 14 +- src/Bpp/PopGen/SequenceStatistics.cpp | 452 ++++++++++----------- src/Bpp/PopGen/SequenceStatistics.h | 104 ++--- 26 files changed, 453 insertions(+), 410 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b41c336..2fdcbdb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ ELSE(NO_DEP_CHECK) # In other words, the library implements all the interface numbers in the # range from number current - age to current. SET(BPPPOPGEN_VERSION_CURRENT "6") -SET(BPPPOPGEN_VERSION_REVISION "3") +SET(BPPPOPGEN_VERSION_REVISION "0") SET(BPPPOPGEN_VERSION_AGE "0") # Effective version number computation @@ -103,9 +103,9 @@ ENDIF(NO_DEP_CHECK) # Packager SET(CPACK_PACKAGE_NAME "libbpp-popgen") SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team") -SET(CPACK_PACKAGE_VERSION "2.1.0") +SET(CPACK_PACKAGE_VERSION "2.2.0") SET(CPACK_PACKAGE_VERSION_MAJOR "2") -SET(CPACK_PACKAGE_VERSION_MINOR "1") +SET(CPACK_PACKAGE_VERSION_MINOR "2") SET(CPACK_PACKAGE_VERSION_PATCH "0") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Population Genetics library") SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt") diff --git a/ChangeLog b/ChangeLog index cc4bc56..ca9aad7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +19/09/14 -*- Version 2.2.0 -*- + +19/09/14 Julien Dutheil +* Big cleaning in SequenceStatistics, functions have been renamed for consistency. + 07/03/13 -*- Version 2.1.0 -*- 06/05/13 Julien Dutheil diff --git a/Doxyfile b/Doxyfile index c87d577..c99b33e 100644 --- a/Doxyfile +++ b/Doxyfile @@ -32,7 +32,7 @@ PROJECT_NAME = bpp-popgen # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 2.1.0 +PROJECT_NUMBER = 2.2.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer diff --git a/bpp-popgen.spec b/bpp-popgen.spec index c36d2f2..41239c9 100644 --- a/bpp-popgen.spec +++ b/bpp-popgen.spec @@ -1,5 +1,5 @@ %define _basename bpp-popgen -%define _version 2.1.0 +%define _version 2.2.0 %define _release 1 %define _prefix /usr @@ -176,6 +176,8 @@ createGeneric() { %{_prefix}/include/* %changelog +* Fri Sep 19 2014 Julien Dutheil <[email protected]> 2.2.0-1 +- Statistical funcion names rationalized. * Thu Mar 07 2013 Julien Dutheil <[email protected]> 2.1.0-1 - Bug fixed and warnings removed. * Thu Feb 09 2012 Julien Dutheil <[email protected]> 2.0.3-1 diff --git a/debian/changelog b/debian/changelog index 0c1aec5..41e5998 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +libbpp-popgen (2.2.0-1) unstable; urgency=low + + * Statistical funcion names rationalized. + + -- Julien Dutheil <[email protected]> Fri, 19 Sep 2014 14:00:00 +0100 + libbpp-popgen (2.1.0-1) unstable; urgency=low * Bug fixed and warnings removed. diff --git a/debian/control b/debian/control index 514f887..4d382fe 100644 --- a/debian/control +++ b/debian/control @@ -4,14 +4,14 @@ Priority: optional Maintainer: Loic Dachary <[email protected]> Uploaders: Julien Dutheil <[email protected]> Build-Depends: debhelper (>= 5), cmake (>= 2.6), - libbpp-core-dev (>= 2.1.0), libbpp-seq-dev (>= 2.1.0) -Standards-Version: 3.9.1 + libbpp-core-dev (>= 2.2.0), libbpp-seq-dev (>= 2.2.0) +Standards-Version: 3.9.4 Package: libbpp-popgen-dev Section: libdevel Architecture: any Depends: libbpp-popgen6 (= ${binary:Version}), ${misc:Depends}, - libbpp-core-dev (>= 2.1.0), libbpp-seq-dev (>= 2.1.0) + libbpp-core-dev (>= 2.2.0), libbpp-seq-dev (>= 2.2.0) Description: Bio++ Population Genetics library development files. Contains the Bio++ classes for population genetics. @@ -19,7 +19,7 @@ Package: libbpp-popgen6 Section: libs Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, - libbpp-core2 (>= 2.1.0), libbpp-seq9 (>= 2.1.0) + libbpp-core2 (>= 2.2.0), libbpp-seq9 (>= 2.2.0) Description: Bio++ Population Genetics library. Contains the Bio++ classes for population genetics. diff --git a/debian/copyright b/debian/copyright index ff3fdc0..1ea5b79 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,5 +1,5 @@ -This package was debianized by Julien Dutheil <[email protected]> on -Thu, 07 Mar 2013 10:51:00 +0100 +This package was debianized by Julien Dutheil <[email protected]> on +Fri, 19 Sep 2014 14:00:00 +0100 It was downloaded from <http://biopp.univ-montp2.fr/Repositories/sources> @@ -9,7 +9,7 @@ Upstream Author: Copyright: - Copyright (C) 2013 Bio++ Development Team + Copyright (C) 2014 Bio++ Development Team License: @@ -30,7 +30,7 @@ License: On Debian systems, the complete text of the GNU General Public License can be found in `/usr/share/common-licenses/GPL'. -The Debian packaging is (C) 2013, Julien Dutheil <[email protected]> and +The Debian packaging is (C) 2014, Julien Dutheil <[email protected]> and is licensed under the GPL, see above. The provided software is distributed under the CeCILL license: diff --git a/debian/postinst b/debian/postinst index cf9e925..cff89b1 100755 --- a/debian/postinst +++ b/debian/postinst @@ -35,9 +35,23 @@ createGeneric() { done; } -if [ "$1" = "configure" ]; then - # Actualize .all files - createGeneric /usr/include/Bpp -fi +case "$1" in + configure) + # Actualize .all files + createGeneric /usr/include/Bpp + ;; + abort-upgrade|abort-remove|abort-deconfigure) + echo "$1" + ;; + *) + echo "postinst called with unknown argument \`\$1'" >&2 + exit 0 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# exit 0 diff --git a/debian/postrm b/debian/postrm index 3931669..744f8b1 100755 --- a/debian/postrm +++ b/debian/postrm @@ -35,11 +35,25 @@ createGeneric() { done; } -if [ "$1" = "remove" ]; then - # Automatically added by dh_makeshlibs - ldconfig - # Actualize .all files - createGeneric /usr/include/Bpp -fi +case "$1" in + remove) + # Automatically added by dh_makeshlibs + ldconfig + # Actualize .all files + createGeneric /usr/include/Bpp + ;; + purge|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) + echo $1 + ;; + *) + echo "postrm called with unknown argument \`\$1'" >&2 + exit 0 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# exit 0 diff --git a/debian/prerm b/debian/prerm index 5aefd24..8fab52e 100755 --- a/debian/prerm +++ b/debian/prerm @@ -19,9 +19,23 @@ removeGeneric() { done } -if [ "$1" = "remove" ]; then - # Actualize .all files - removeGeneric /usr/include/Bpp -fi +case "$1" in + remove|upgrade|deconfigure) + # Actualize .all files + removeGeneric /usr/include/Bpp + ;; + failed-upgrade) + echo "$1" + ;; + *) + echo "prerm called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# exit 0 diff --git a/debian/rules b/debian/rules index 63fdf40..441d776 100755 --- a/debian/rules +++ b/debian/rules @@ -38,7 +38,9 @@ configure: config.status: configure dh_testdir -build: build-stamp +build: build-arch build-indep +build-arch: build-stamp +build-indep: build-stamp build-stamp: config.status dh_testdir diff --git a/src/Bpp/PopGen/AnalyzedLoci.cpp b/src/Bpp/PopGen/AnalyzedLoci.cpp index 26a4e61..abea706 100644 --- a/src/Bpp/PopGen/AnalyzedLoci.cpp +++ b/src/Bpp/PopGen/AnalyzedLoci.cpp @@ -79,7 +79,7 @@ void AnalyzedLoci::setLocusInfo( const LocusInfo& locus) throw (IndexOutOfBoundsException) { - if (locus_position >= 0 && locus_position < loci_.size()) + if (locus_position < loci_.size()) loci_[locus_position] = new LocusInfo(locus); else throw IndexOutOfBoundsException("AnalyzedLoci::setLocusInfo: locus_position out of bounds", @@ -163,7 +163,7 @@ void AnalyzedLoci::addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele) throw (Exception) { - if (locus_position >= 0 && locus_position < loci_.size()) + if (locus_position < loci_.size()) { try { diff --git a/src/Bpp/PopGen/AnalyzedSequences.cpp b/src/Bpp/PopGen/AnalyzedSequences.cpp index 3a41892..e17cf28 100644 --- a/src/Bpp/PopGen/AnalyzedSequences.cpp +++ b/src/Bpp/PopGen/AnalyzedSequences.cpp @@ -111,7 +111,7 @@ std::string AnalyzedSequences::getAlphabetType() const return string("---"); string alpha_type = alphabet_->getAlphabetType(); size_t bs = alpha_type.find(" ", 0); - alpha_type = string(alpha_type.begin(), alpha_type.begin() + bs); + alpha_type = string(alpha_type.begin(), alpha_type.begin() + static_cast<ptrdiff_t>(bs)); if (alpha_type == "Proteic") alpha_type = "PROTEIN"; return alpha_type; diff --git a/src/Bpp/PopGen/DataSet.cpp b/src/Bpp/PopGen/DataSet.cpp index 1d382b8..02726cb 100644 --- a/src/Bpp/PopGen/DataSet.cpp +++ b/src/Bpp/PopGen/DataSet.cpp @@ -168,7 +168,7 @@ void DataSet::deleteLocalityAtPosition(size_t locality_position) throw (IndexOut if (locality_position >= localities_.size()) throw IndexOutOfBoundsException("DataSet::deleteLocalityAtPosition: locality_position out of bounds.", locality_position, 0, localities_.size()); delete localities_[locality_position]; - localities_.erase(localities_.begin() + locality_position); + localities_.erase(localities_.begin() + static_cast<ptrdiff_t>(locality_position)); } /******************************************************************************/ @@ -292,7 +292,7 @@ void DataSet::deleteGroupAtPosition(size_t group_position) throw (IndexOutOfBoun if (group_position >= groups_.size()) throw IndexOutOfBoundsException("DataSet::deleteGroup.", group_position, 0, groups_.size()); delete groups_[group_position]; - groups_.erase(groups_.begin() + group_position); + groups_.erase(groups_.begin() + static_cast<ptrdiff_t>(group_position)); } /******************************************************************************/ @@ -1333,9 +1333,9 @@ PolymorphismSequenceContainer* DataSet::getPolymorphismSequenceContainer(const s } if (tmp_ind->hasSequenceAtPosition(sequence_position)) { - const Sequence* tmp_seq = &tmp_ind->getSequenceAtPosition(sequence_position); - psc->addSequence(*tmp_seq, 1, false); - psc->setGroupId((const string) (tmp_seq->getName()), it->first); + const Sequence& tmp_seq = tmp_ind->getSequenceAtPosition(sequence_position); + psc->addSequenceWithFrequency(tmp_seq, 1, false); + psc->setGroupId(tmp_seq.getName(), it->first); } } } diff --git a/src/Bpp/PopGen/GeneMapperCsvExport.cpp b/src/Bpp/PopGen/GeneMapperCsvExport.cpp index 885fe3c..fd9419a 100644 --- a/src/Bpp/PopGen/GeneMapperCsvExport.cpp +++ b/src/Bpp/PopGen/GeneMapperCsvExport.cpp @@ -54,7 +54,7 @@ const std::string GeneMapperCsvExport::PEAK_AREA_H = "Peak Area "; const std::string GeneMapperCsvExport::DAC_H = "DAC"; const std::string GeneMapperCsvExport::AN_H = "AN"; -GeneMapperCsvExport::GeneMapperCsvExport(bool ia) : IndependentAlleles_(ia) {} +//GeneMapperCsvExport::GeneMapperCsvExport(bool ia) : IndependentAlleles_(ia) {} GeneMapperCsvExport::~GeneMapperCsvExport() {} @@ -123,22 +123,22 @@ void GeneMapperCsvExport::read(std::istream& is, DataSet& data_set) throw (Excep vector<string> col_names = dt.getColumnNames(); // Finds columns containing allele data - vector<unsigned int> alleles_cols; - for (unsigned int i = 0; i < col_names.size(); i++) + vector<size_t> alleles_cols; + for (size_t i = 0; i < col_names.size(); i++) { if (TextTools::startsWith(col_names[i], ALLELE_H)) alleles_cols.push_back(i); } // Set LocusInfo - vector<vector<unsigned int> > alleles_pos; - for (unsigned int i = 0; i < markers.size(); i++) + vector<vector<size_t> > alleles_pos; + for (size_t i = 0; i < markers.size(); i++) { al.setLocusInfo(i, LocusInfo(markers[i], LocusInfo::UNKNOWN)); } std::map< std::string, std::set< std::string > > markerAlleles; - for (unsigned int i = 0; i < dt.getNumberOfRows(); ++i) + for (size_t i = 0; i < dt.getNumberOfRows(); ++i) { - for (unsigned int j = 0; j < alleles_cols.size(); ++j) + for (size_t j = 0; j < alleles_cols.size(); ++j) { if (dt(i, alleles_cols[j]) != "") { diff --git a/src/Bpp/PopGen/GeneMapperCsvExport.h b/src/Bpp/PopGen/GeneMapperCsvExport.h index 5f0e97f..2f74298 100644 --- a/src/Bpp/PopGen/GeneMapperCsvExport.h +++ b/src/Bpp/PopGen/GeneMapperCsvExport.h @@ -76,11 +76,12 @@ public: static const std::string AN_H; private: - bool IndependentAlleles_; + //bool IndependentAlleles_; //jdutheilon 19/09/14: this does not seem to be used anywhere! public: // Constructor and destructor - GeneMapperCsvExport(bool ia = false); + //GeneMapperCsvExport(bool ia = false); + GeneMapperCsvExport() {} ~GeneMapperCsvExport(); // public: diff --git a/src/Bpp/PopGen/Group.cpp b/src/Bpp/PopGen/Group.cpp index c76280c..c5c7246 100644 --- a/src/Bpp/PopGen/Group.cpp +++ b/src/Bpp/PopGen/Group.cpp @@ -133,7 +133,7 @@ std::auto_ptr<Individual> Group::removeIndividualById(const std::string& individ { size_t indPos = getIndividualPosition(individual_id); auto_ptr<Individual> ind(individuals_[indPos]); - individuals_.erase(individuals_.begin() + indPos); + individuals_.erase(individuals_.begin() + static_cast<ptrdiff_t>(indPos)); return ind; } catch (IndividualNotFoundException& infe) @@ -147,7 +147,7 @@ std::auto_ptr<Individual> Group::removeIndividualAtPosition(size_t individual_po if (individual_position >= individuals_.size()) throw IndexOutOfBoundsException("Group::removeIndividualAtPosition.", individual_position, 0, individuals_.size()); auto_ptr<Individual> ind(individuals_[individual_position]); - individuals_.erase(individuals_.begin() + individual_position); + individuals_.erase(individuals_.begin() + static_cast<ptrdiff_t>(individual_position)); return ind; } diff --git a/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp b/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp index d7fdd0e..854967e 100644 --- a/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp +++ b/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp @@ -124,8 +124,8 @@ MultilocusGenotype* PolymorphismMultiGContainer::removeMultilocusGenotype(size_t if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::removeMultilocusGenotype: position out of bounds.", position, 0, size() - 1); MultilocusGenotype* tmp_mg = multilocusGenotypes_[position]; - multilocusGenotypes_.erase(multilocusGenotypes_.begin() + position); - groups_.erase(groups_.begin() + position); + multilocusGenotypes_.erase(multilocusGenotypes_.begin() + static_cast<ptrdiff_t>(position)); + groups_.erase(groups_.begin() + static_cast<ptrdiff_t>(position)); return tmp_mg; } @@ -136,8 +136,8 @@ void PolymorphismMultiGContainer::deleteMultilocusGenotype(size_t position) thro if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::deleteMultilocusGenotype: position out of bounds.", position, 0, size() - 1); delete multilocusGenotypes_[position]; - multilocusGenotypes_.erase(multilocusGenotypes_.begin() + position); - groups_.erase(groups_.begin() + position); + multilocusGenotypes_.erase(multilocusGenotypes_.begin() + static_cast<ptrdiff_t>(position)); + groups_.erase(groups_.begin() + static_cast<ptrdiff_t>(position)); } /******************************************************************************/ diff --git a/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp b/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp index e4fba83..685103a 100644 --- a/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp +++ b/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp @@ -269,7 +269,7 @@ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupAl for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group { - int nb_ind_in_group = 0; + size_t nb_ind_in_group = 0; vector< vector<size_t> > nb_alleles_for_inds; nb_alleles_for_inds.resize(loc_num); @@ -314,7 +314,7 @@ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupAl // Build the new PolymorphismMultiGContainer vector<size_t> k(loc_num, 0); - for (int ind = 0; ind < nb_ind_in_group; ind++) + for (size_t ind = 0; ind < nb_ind_in_group; ind++) { MultilocusGenotype tmp_mg(loc_num); for (size_t j = 0; j < loc_num; j++) diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp b/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp index 2469348..6d9fd4c 100644 --- a/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp +++ b/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp @@ -49,40 +49,40 @@ using namespace std; PolymorphismSequenceContainer::PolymorphismSequenceContainer(const Alphabet* alpha) : VectorSiteContainer(alpha), ingroup_(vector<bool>()), - count_(vector<size_t>()), - group_(vector<size_t>()) {} + count_(0), + group_(0) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(size_t size, const Alphabet* alpha) : VectorSiteContainer(size, alpha), - ingroup_(vector<bool>(size)), - count_(vector<size_t>(size)), - group_(vector<size_t>(size)) {} + ingroup_(size), + count_(size), + group_(size) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const OrderedSequenceContainer& sc) : VectorSiteContainer(sc), - ingroup_(vector<bool>(sc.getNumberOfSequences(), true)), - count_(vector<size_t>(sc.getNumberOfSequences(), 1)), - group_(vector<size_t>(sc.getNumberOfSequences(), 1)) {} + ingroup_(sc.getNumberOfSequences(), true), + count_(sc.getNumberOfSequences(), 1), + group_(sc.getNumberOfSequences(), 1) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const SiteContainer& sc) : VectorSiteContainer(sc), - ingroup_(vector<bool>(sc.getNumberOfSequences(), true)), - count_(vector<size_t>(sc.getNumberOfSequences(), 1)), - group_(vector<size_t>(sc.getNumberOfSequences(), 1)) {} + ingroup_(sc.getNumberOfSequences(), true), + count_(sc.getNumberOfSequences(), 1), + group_(sc.getNumberOfSequences(), 1) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const PolymorphismSequenceContainer& psc) : VectorSiteContainer(psc), - ingroup_(vector<bool>(psc.getNumberOfSequences())), - count_(vector<size_t>(psc.getNumberOfSequences())), - group_(vector<size_t>(psc.getNumberOfSequences())) + ingroup_(psc.getNumberOfSequences()), + count_(psc.getNumberOfSequences()), + group_(psc.getNumberOfSequences()) { for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { @@ -128,9 +128,9 @@ Sequence* PolymorphismSequenceContainer::removeSequence(size_t index) throw (Ind { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::removeSequence: index out of bounds.", index, 0, getNumberOfSequences()); - count_.erase(count_.begin() + index); - ingroup_.erase(ingroup_.begin() + index); - group_.erase(group_.begin() + index); + count_.erase(count_.begin() + static_cast<ptrdiff_t>(index)); + ingroup_.erase(ingroup_.begin() + static_cast<ptrdiff_t>(index)); + group_.erase(group_.begin() + static_cast<ptrdiff_t>(index)); return VectorSiteContainer::removeSequence(index); } @@ -178,23 +178,40 @@ void PolymorphismSequenceContainer::deleteSequence(const std::string& name) thro /******************************************************************************/ -void PolymorphismSequenceContainer::addSequence(const Sequence& sequence, size_t effectif, bool checkNames) throw (Exception) +void PolymorphismSequenceContainer::addSequenceWithFrequency(const Sequence& sequence, unsigned int frequency, bool checkName) throw (Exception) { try { - VectorSiteContainer::addSequence(sequence, checkNames); + VectorSiteContainer::addSequence(sequence, checkName); } catch (Exception& e) { throw e; } - count_.push_back(effectif); + count_.push_back(frequency); ingroup_.push_back(true); group_.push_back(0); } /******************************************************************************/ +void PolymorphismSequenceContainer::addSequenceWithFrequency(const Sequence& sequence, size_t sequenceIndex, unsigned int frequency, bool checkName) throw (Exception) +{ + try + { + VectorSiteContainer::addSequence(sequence, sequenceIndex, checkName); + } + catch (Exception& e) + { + throw e; + } + count_.insert(count_.begin() + static_cast<ptrdiff_t>(sequenceIndex), frequency); + ingroup_.insert(ingroup_.begin() + static_cast<ptrdiff_t>(sequenceIndex), true); + group_.insert(group_.begin() + static_cast<ptrdiff_t>(sequenceIndex), 0); +} + +/******************************************************************************/ + void PolymorphismSequenceContainer::clear() { VectorSiteContainer::clear(); @@ -341,7 +358,7 @@ void PolymorphismSequenceContainer::setAsOutgroupMember(const std::string& name) /******************************************************************************/ -void PolymorphismSequenceContainer::setSequenceCount(size_t index, size_t count) throw (Exception) +void PolymorphismSequenceContainer::setSequenceCount(size_t index, unsigned int count) throw (Exception) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setSequenceCount.", index, 0, getNumberOfSequences()); @@ -352,7 +369,7 @@ void PolymorphismSequenceContainer::setSequenceCount(size_t index, size_t count) /******************************************************************************/ -void PolymorphismSequenceContainer::setSequenceCount(const std::string& name, size_t count) throw (Exception) +void PolymorphismSequenceContainer::setSequenceCount(const std::string& name, unsigned int count) throw (Exception) { try { @@ -393,7 +410,7 @@ void PolymorphismSequenceContainer::incrementSequenceCount(const std::string& na /******************************************************************************/ -void PolymorphismSequenceContainer::decrementSequenceCount(size_t index) throw (Exception) +void PolymorphismSequenceContainer::decrementSequenceCount(size_t index) throw (IndexOutOfBoundsException, BadIntegerException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::decrementSequenceCount.", index, 0, getNumberOfSequences()); @@ -404,7 +421,7 @@ void PolymorphismSequenceContainer::decrementSequenceCount(size_t index) throw ( /******************************************************************************/ -void PolymorphismSequenceContainer::decrementSequenceCount(const std::string& name) throw (Exception) +void PolymorphismSequenceContainer::decrementSequenceCount(const std::string& name) throw (SequenceNotFoundException, BadIntegerException) { try { @@ -422,7 +439,7 @@ void PolymorphismSequenceContainer::decrementSequenceCount(const std::string& na /******************************************************************************/ -size_t PolymorphismSequenceContainer::getSequenceCount(size_t index) const throw (IndexOutOfBoundsException) +unsigned int PolymorphismSequenceContainer::getSequenceCount(size_t index) const throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getSequenceCount.", index, 0, getNumberOfSequences()); @@ -431,7 +448,7 @@ size_t PolymorphismSequenceContainer::getSequenceCount(size_t index) const throw /******************************************************************************/ -size_t PolymorphismSequenceContainer::getSequenceCount(const std::string& name) const throw (SequenceNotFoundException) +unsigned int PolymorphismSequenceContainer::getSequenceCount(const std::string& name) const throw (SequenceNotFoundException) { try { diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainer.h b/src/Bpp/PopGen/PolymorphismSequenceContainer.h index 5aeec8d..02656c5 100644 --- a/src/Bpp/PopGen/PolymorphismSequenceContainer.h +++ b/src/Bpp/PopGen/PolymorphismSequenceContainer.h @@ -104,7 +104,7 @@ class PolymorphismSequenceContainer : { private: std::vector<bool> ingroup_; - std::vector<size_t> count_; + std::vector<unsigned int> count_; std::vector<size_t> group_; public: @@ -189,7 +189,14 @@ public: * @throw SequenceException if the sequence's size doesn't match the sequence's size of the container. * @throw SequenceException if the sequence's name already exists in the container. */ - void addSequence(const Sequence& sequence, size_t effectif = 1, bool checkNames = true) throw (Exception); + void addSequenceWithFrequency(const Sequence& sequence, unsigned int frequency, bool checkName = true) throw (Exception); + void addSequenceWithFrequency(const Sequence& sequence, size_t sequenceIndex, unsigned int frequency, bool checkName = true) throw (Exception); + void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception) { + addSequenceWithFrequency(sequence, 1, checkName); + } + void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception) { + addSequenceWithFrequency(sequence, sequenceIndex, 1, checkName); + } /** * @brief Clear the container of all its sequences. @@ -282,7 +289,7 @@ public: * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ - void setSequenceCount(size_t index, size_t count) throw (Exception); + void setSequenceCount(size_t index, unsigned int count) throw (Exception); /** * @brief Set the count of a sequence by name. @@ -290,7 +297,7 @@ public: * @throw throw SequenceNotFoundException if name is not found among the sequences' names. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ - void setSequenceCount(const std::string& name, size_t count) throw (Exception); + void setSequenceCount(const std::string& name, unsigned int count) throw (Exception); /** * @brief Add 1 to the sequence count. @@ -302,39 +309,39 @@ public: /** * @brief Add 1 to the sequence count. * - * @throw throw SequenceNotFoundException if name is not found among the sequences' names. + * @throw SequenceNotFoundException if name is not found among the sequences' names. */ void incrementSequenceCount(const std::string& name) throw (SequenceNotFoundException); - /** - * @brief Remove 1 to the sequence count. + /** + * @brief Removz 1 to the sequence count. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ - void decrementSequenceCount(size_t index) throw (Exception); + void decrementSequenceCount(size_t index) throw(IndexOutOfBoundsException, BadIntegerException); /** * @brief Remove 1 to the sequence count. * - * @throw throw SequenceNotFoundException if name is not found among the sequences' names. + * @throw SequenceNotFoundException if name is not found among the sequences' names. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ - void decrementSequenceCount(const std::string& name) throw (Exception); + void decrementSequenceCount(const std::string& name) throw (SequenceNotFoundException, BadIntegerException); /** * @brief Get the count of a sequence by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ - size_t getSequenceCount(size_t index) const throw (IndexOutOfBoundsException); + unsigned int getSequenceCount(size_t index) const throw (IndexOutOfBoundsException); /** * @brief Get the count of a sequence by name. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ - size_t getSequenceCount(const std::string& name) const throw (SequenceNotFoundException); + unsigned int getSequenceCount(const std::string& name) const throw (SequenceNotFoundException); }; } // end of namespace bpp; diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp index 1193ac3..15c3e0a 100644 --- a/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp +++ b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp @@ -174,7 +174,7 @@ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSelectedSe PolymorphismSequenceContainer* newpsc = new PolymorphismSequenceContainer(psc.getAlphabet()); for (size_t i = 0; i < ss.size(); i++) { - newpsc->addSequence(psc.getSequence(ss[i]), psc.getSequenceCount(i), false); + newpsc->addSequenceWithFrequency(psc.getSequence(ss[i]), psc.getSequenceCount(i), false); if (psc.isIngroupMember(i)) newpsc->setAsIngroupMember(i); else @@ -430,7 +430,10 @@ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getOnePositio /******************************************************************************/ -PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ) +PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getIntrons( + const PolymorphismSequenceContainer& psc, + const std::string& setName, + const GeneticCode* gCode) { Comments maseFileHeader = psc.getGeneralComments(); SiteSelection ss; @@ -456,7 +459,7 @@ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getIntrons(co int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0); int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0); int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0); - if (ca->isStop(ca->getCodon(c1, c2, c3))) + if (gCode->isStop(gCode->getSourceAlphabet()->getCodon(c1, c2, c3))) last = codss[codss.size() - 1]; // Keep sites between AUG and STOP for (size_t i = first; i < last; i++) @@ -522,7 +525,10 @@ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get5Prime(con /******************************************************************************/ -PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ) +PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get3Prime( + const PolymorphismSequenceContainer& psc, + const std::string& setName, + const GeneticCode* gCode) { Comments maseFileHeader = psc.getGeneralComments(); SiteSelection ss; @@ -532,7 +538,7 @@ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get3Prime(con int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0); int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0); int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0); - if (ca->isStop(ca->getCodon(c1, c2, c3))) + if (gCode->isStop(gCode->getSourceAlphabet()->getCodon(c1, c2, c3))) first = codss[codss.size() - 1]; for (size_t i = first; i < psc.getNumberOfSites(); i++) { diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h index 993f1bc..7c576b6 100644 --- a/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h +++ b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h @@ -95,7 +95,7 @@ public: * * @throw Exception if there is no ingroup sequence */ - static PolymorphismSequenceContainer* extractIngroup (const PolymorphismSequenceContainer& psc) throw (Exception); + static PolymorphismSequenceContainer* extractIngroup(const PolymorphismSequenceContainer& psc) throw (Exception); /** * @brief Extract outgroup sequences from a PolymorphismSequenceContainer and create a new one. @@ -104,7 +104,7 @@ public: * * @throw Exception if there is no outgroup sequence */ - static PolymorphismSequenceContainer* extractOutgroup (const PolymorphismSequenceContainer& psc) throw (Exception); + static PolymorphismSequenceContainer* extractOutgroup(const PolymorphismSequenceContainer& psc) throw (Exception); /** * @brief Extract a special group from the PolymorphismSequenceContainer. @@ -125,7 +125,6 @@ public: */ static PolymorphismSequenceContainer* getSelectedSequences(const PolymorphismSequenceContainer& psc, const SequenceSelection& ss); - /** * @brief Get a random set of sequences * @@ -221,9 +220,9 @@ public: * * @param psc a PolymorphismSequenceContainer * @param setName name of the CDS site selection - * @param ca a codon alphabet + * @param gCode The genetic code to use */ - static PolymorphismSequenceContainer* getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ); + static PolymorphismSequenceContainer* getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const GeneticCode* gCode); /** * @brief Retrieve 5' sites @@ -238,9 +237,9 @@ public: * * @param psc a PolymorphismSequenceContainer * @param setName name of the CDS site selection - * @param ca a codon alphabet + * @param gCode The genetic code to use */ - static PolymorphismSequenceContainer* get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ); + static PolymorphismSequenceContainer* get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const GeneticCode* gCode); /** * @brief Get the species name of the ingroup diff --git a/src/Bpp/PopGen/PopgenlibIO.cpp b/src/Bpp/PopGen/PopgenlibIO.cpp index d0c5b9d..9f0f14f 100644 --- a/src/Bpp/PopGen/PopgenlibIO.cpp +++ b/src/Bpp/PopGen/PopgenlibIO.cpp @@ -408,7 +408,7 @@ void PopgenlibIO::parseIndividual_(const std::vector<std::string>& in, DataSet& if (in[i].find("Group", 0) != string::npos) { temp = in[i]; - tmp_group_pos = TextTools::toInt(getValues_(temp, "=")[0]); + tmp_group_pos = TextTools::to<size_t>(getValues_(temp, "=")[0]); try { data_set.addEmptyGroup(tmp_group_pos); @@ -421,7 +421,7 @@ void PopgenlibIO::parseIndividual_(const std::vector<std::string>& in, DataSet& { temp = in[i]; size_t sep_pos = temp.find("=", 0); - string loc_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + sep_pos + 1, temp.end())); + string loc_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + static_cast<ptrdiff_t>(sep_pos + 1), temp.end())); try { tmp_indiv.setLocality(&data_set.getLocalityByName(loc_name)); @@ -457,7 +457,7 @@ void PopgenlibIO::parseIndividual_(const std::vector<std::string>& in, DataSet& try { if (seq_pos_str[j] != getMissingDataSymbol()) - tmp_indiv.addSequence(j, vsc.getSequence(TextTools::toInt(seq_pos_str[j]) - 1)); + tmp_indiv.addSequence(j, vsc.getSequence(TextTools::to<size_t>(seq_pos_str[j]) - 1)); } catch (...) {} @@ -695,19 +695,19 @@ std::vector<std::string> PopgenlibIO::getValues_(std::string& param_line, const { vector<string> values; size_t limit = param_line.find(delim, 0); - if (limit >= 0) - param_line = string(param_line.begin() + limit + delim.size(), param_line.end()); + if (limit != string::npos) + param_line = string(param_line.begin() + static_cast<ptrdiff_t>(limit + delim.size()), param_line.end()); param_line = TextTools::removeSurroundingWhiteSpaces(param_line); size_t bi = 0; size_t bs = param_line.find(getDataSeparatorChar(), bi); while (bs > 0) { - values.push_back(string(param_line.begin() + bi, param_line.begin() + bs)); + values.push_back(string(param_line.begin() + static_cast<ptrdiff_t>(bi), param_line.begin() + static_cast<ptrdiff_t>(bs))); bi = bs + 1; bs = param_line.find(getDataSeparatorChar(), bi); } - values.push_back(string(param_line.begin() + bi, param_line.end())); + values.push_back(string(param_line.begin() + static_cast<ptrdiff_t>(bi), param_line.end())); return values; } diff --git a/src/Bpp/PopGen/SequenceStatistics.cpp b/src/Bpp/PopGen/SequenceStatistics.cpp index b2dcef5..e6dda96 100644 --- a/src/Bpp/PopGen/SequenceStatistics.cpp +++ b/src/Bpp/PopGen/SequenceStatistics.cpp @@ -58,8 +58,7 @@ using namespace std; #include <Bpp/Seq/StringSequenceTools.h> #include <Bpp/Seq/CodonSiteTools.h> #include <Bpp/Seq/Alphabet/DNA.h> -#include <Bpp/Seq/Alphabet/StandardCodonAlphabet.h> -#include <Bpp/Seq/GeneticCode/StandardGeneticCode.h> +#include <Bpp/Seq/Alphabet/AlphabetTools.h> #include <Bpp/Numeric/VectorTools.h> #include <Bpp/Numeric/VectorExceptions.h> @@ -70,167 +69,155 @@ using namespace bpp; // Basic statistics // ****************************************************************************** -size_t SequenceStatistics::polymorphicSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag, bool ignoreUnknown) +unsigned int SequenceStatistics::numberOfPolymorphicSites(const PolymorphismSequenceContainer& psc, bool gapflag, bool ignoreUnknown) { - size_t S = 0; + unsigned int s = 0; const Site* site = 0; - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); + si.reset(new SimpleSiteContainerIterator(psc)); while (si->hasMoreSites()) { site = si->nextSite(); if (!SiteTools::isConstant(*site, ignoreUnknown)) { - S++; + s++; } } - delete si; - return S; + return s; } -size_t SequenceStatistics::parsimonyInformativeSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag) +unsigned int SequenceStatistics::numberOfParsimonyInformativeSites(const PolymorphismSequenceContainer& psc, bool gapflag) { - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); - size_t S = 0; + si.reset(new SimpleSiteContainerIterator(psc)); + unsigned int s = 0; const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); if (SiteTools::isParsimonyInformativeSite(*site)) { - S++; + s++; } } - delete si; - return S; + return s; } -size_t SequenceStatistics::countSingleton(const PolymorphismSequenceContainer& psc, bool gapflag) +unsigned int SequenceStatistics::numberOfSingletons(const PolymorphismSequenceContainer& psc, bool gapflag) { - size_t nus = 0; - const Site* site = 0; - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); + si.reset(new SimpleSiteContainerIterator(psc)); + unsigned int nus = 0; + const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); - nus += getSingletonNumber_(*site); + nus += getNumberOfSingletons_(*site); } - delete si; return nus; } -size_t SequenceStatistics::tripletNumber(const PolymorphismSequenceContainer& psc, bool gapflag) +unsigned int SequenceStatistics::numberOfTriplets(const PolymorphismSequenceContainer& psc, bool gapflag) { - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); - int S = 0; + si.reset(new SimpleSiteContainerIterator(psc)); + unsigned int s = 0; const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); if (SiteTools::isTriplet(*site)) { - S++; + s++; } } - - delete si; - return S; + return s; } -size_t SequenceStatistics::totNumberMutations(const PolymorphismSequenceContainer& psc, bool gapflag) +unsigned int SequenceStatistics::totalNumberOfMutations(const PolymorphismSequenceContainer& psc, bool gapflag) { - size_t tnm = 0; - const Site* site = 0; - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); + si.reset(new SimpleSiteContainerIterator(psc)); + unsigned int tnm = 0; + const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); - tnm += getMutationNumber_(*site); + tnm += getNumberOfMutations_(*site); } - delete si; return tnm; } -size_t SequenceStatistics::totMutationsExternalBranchs( +unsigned int SequenceStatistics::totalNumberOfMutationsOnExternalBranches( const PolymorphismSequenceContainer& ing, const PolymorphismSequenceContainer& outg) throw (Exception) { if (ing.getNumberOfSites() != outg.getNumberOfSites()) throw Exception("ing and outg must have the same size"); - size_t nmuts = 0; + unsigned int nmuts = 0; const Site* site_in = 0; const Site* site_out = 0; - ConstSiteIterator* si = 0; - ConstSiteIterator* so = 0; - si = new SimpleSiteContainerIterator(ing); - so = new SimpleSiteContainerIterator(outg); + auto_ptr<ConstSiteIterator> si(new SimpleSiteContainerIterator(ing)); + auto_ptr<ConstSiteIterator> so(new SimpleSiteContainerIterator(outg)); while (si->hasMoreSites()) { site_in = si->nextSite(); site_out = so->nextSite(); // use fully resolved sites if (SiteTools::isComplete(*site_in) && SiteTools::isComplete(*site_out)) - nmuts += getDerivedSingletonNumber_(*site_in, *site_out); // singletons that are not in outgroup + nmuts += getNumberOfDerivedSingletons_(*site_in, *site_out); // singletons that are not in outgroup } - delete si; - delete so; return nmuts; } double SequenceStatistics::heterozygosity(const PolymorphismSequenceContainer& psc, bool gapflag) { - ConstSiteIterator* si = 0; - const Site* site = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); - double S = 0; + si.reset(new SimpleSiteContainerIterator(psc)); + const Site* site = 0; + double s = 0; while (si->hasMoreSites()) { site = si->nextSite(); - S += SiteTools::heterozygosity(*site); + s += SiteTools::heterozygosity(*site); } - delete si; - return S; + return s; } double SequenceStatistics::squaredHeterozygosity(const PolymorphismSequenceContainer& psc, bool gapflag) { - ConstSiteIterator* si = 0; - const Site* site = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); - double S = 0; + si.reset(new SimpleSiteContainerIterator(psc)); + const Site* site = 0; + double s = 0; while (si->hasMoreSites()) { site = si->nextSite(); double h = SiteTools::heterozygosity(*site); - S += h * h; + s += h * h; } - delete si; - return S; + return s; } // ****************************************************************************** @@ -245,18 +232,18 @@ double SequenceStatistics::gcContent(const PolymorphismSequenceContainer& psc) return (freqs[alpha->charToInt("C")] + freqs[alpha->charToInt("G")]) / (freqs[alpha->charToInt("A")] + freqs[alpha->charToInt("C")] + freqs[alpha->charToInt("G")] + freqs[alpha->charToInt("T")]); } -std::vector<size_t> SequenceStatistics::gcPolymorphism(const PolymorphismSequenceContainer& psc, bool gapflag) +std::vector<unsigned int> SequenceStatistics::gcPolymorphism(const PolymorphismSequenceContainer& psc, bool gapflag) { - size_t nbMut = 0; - size_t nbGC = 0; - const size_t nbSeq = psc.getNumberOfSequences(); - vector<size_t> vect(2); + unsigned int nbMut = 0; + unsigned int nbGC = 0; + size_t nbSeq = psc.getNumberOfSequences(); + vector<unsigned int> vect(2); const Site* site = 0; - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else - si = new NoGapSiteContainerIterator(psc); + si.reset(new NoGapSiteContainerIterator(psc)); while (si->hasMoreSites()) { site = si->nextSite(); @@ -271,15 +258,14 @@ std::vector<size_t> SequenceStatistics::gcPolymorphism(const PolymorphismSequenc */ if (freqGC > 0 && freqGC < 1) { - nbMut += static_cast<size_t>(nbSeq); + nbMut += static_cast<unsigned int>(nbSeq); long double adGC = freqGC * nbSeq; - nbGC += static_cast<size_t>(adGC); + nbGC += static_cast<unsigned int>(adGC); } } } vect[0] = nbMut; vect[1] = nbGC; - delete si; return vect; } @@ -291,15 +277,15 @@ double SequenceStatistics::watterson75(const PolymorphismSequenceContainer& psc, { double ThetaW; size_t n = psc.getNumberOfSequences(); - size_t S = polymorphicSiteNumber(psc, gapflag, ignoreUnknown); - map<string, double> values = getUsefullValues_(n); - ThetaW = (double) S / values["a1"]; + unsigned int s = numberOfPolymorphicSites(psc, gapflag, ignoreUnknown); + map<string, double> values = getUsefulValues_(n); + ThetaW = static_cast<double>(s) / values["a1"]; return ThetaW; } double SequenceStatistics::tajima83(const PolymorphismSequenceContainer& psc, bool gapflag) { - size_t alphabet_size = (psc.getAlphabet())->getSize(); + size_t alphabet_size = psc.getAlphabet()->getSize(); const Site* site = 0; ConstSiteIterator* si = 0; double value2 = 0.; @@ -338,7 +324,7 @@ double SequenceStatistics::tajima83(const PolymorphismSequenceContainer& psc, bo return value2; } -double SequenceStatistics::FayWu2000(const PolymorphismSequenceContainer& psc, const Sequence& ancestralSites) +double SequenceStatistics::fayWu2000(const PolymorphismSequenceContainer& psc, const Sequence& ancestralSites) { if (psc.getNumberOfSites() != ancestralSites.size()) throw Exception("SequenceStatistics::FayWu2000: ancestralSites and psc don't have the same size!!!'" ); @@ -385,18 +371,18 @@ double SequenceStatistics::FayWu2000(const PolymorphismSequenceContainer& psc, c return value; } -size_t SequenceStatistics::DVK(const PolymorphismSequenceContainer& psc, bool gapflag) +unsigned int SequenceStatistics::dvk(const PolymorphismSequenceContainer& psc, bool gapflag) { /* * Sylvain Gaillard 17/03/2010: * This implementation uses unneeded SequenceContainer recopy and works on * string. It needs to be improved. */ - PolymorphismSequenceContainer* sc = 0; + auto_ptr<PolymorphismSequenceContainer> sc; if (gapflag) - sc = PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc); + sc.reset(PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc)); else - sc = new PolymorphismSequenceContainer(psc); + sc.reset(new PolymorphismSequenceContainer(psc)); // int K = 0; vector<string> pscvector; pscvector.push_back(sc->toString(0)); @@ -419,23 +405,22 @@ size_t SequenceStatistics::DVK(const PolymorphismSequenceContainer& psc, bool ga pscvector.push_back(query); } } - delete sc; // return K; - return pscvector.size(); + return static_cast<unsigned int>(pscvector.size()); } -double SequenceStatistics::DVH(const PolymorphismSequenceContainer& psc, bool gapflag) +double SequenceStatistics::dvh(const PolymorphismSequenceContainer& psc, bool gapflag) { /* * Sylvain Gaillard 17/03/2010: * This implementation uses unneeded SequenceContainer recopy and works on * string. It needs to be improved. */ - PolymorphismSequenceContainer* sc = 0; + auto_ptr<PolymorphismSequenceContainer> sc; if (gapflag) - sc = PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc); + sc.reset(PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc)); else - sc = new PolymorphismSequenceContainer(psc); + sc.reset(new PolymorphismSequenceContainer(psc)); double H = 0.; size_t nbSeq; vector<string> pscvector; @@ -468,14 +453,13 @@ double SequenceStatistics::DVH(const PolymorphismSequenceContainer& psc, bool ga H -= (static_cast<double>(effvector[i]) / static_cast<double>(nbSeq)) * ( static_cast<double>(effvector[i]) / static_cast<double>(nbSeq)); } H += 1.; - delete sc; return H; } -size_t SequenceStatistics::getNumberOfTransitions(const PolymorphismSequenceContainer& psc) +unsigned int SequenceStatistics::numberOfTransitions(const PolymorphismSequenceContainer& psc) { - size_t nbT = 0; - ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); + unsigned int nbT = 0; + auto_ptr<ConstSiteIterator> si(new CompleteSiteContainerIterator(psc)); const Site* site = 0; while (si->hasMoreSites()) { @@ -500,14 +484,13 @@ size_t SequenceStatistics::getNumberOfTransitions(const PolymorphismSequenceCont nbT++; } } - delete si; return nbT; } -size_t SequenceStatistics::getNumberOfTransversions(const PolymorphismSequenceContainer& psc) +unsigned int SequenceStatistics::numberOfTransversions(const PolymorphismSequenceContainer& psc) { - size_t nbTv = 0; - ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); + unsigned int nbTv = 0; + auto_ptr<ConstSiteIterator> si(new CompleteSiteContainerIterator(psc)); const Site* site = 0; while (si->hasMoreSites()) { @@ -532,18 +515,17 @@ size_t SequenceStatistics::getNumberOfTransversions(const PolymorphismSequenceCo nbTv++; } } - delete si; return nbTv; } -double SequenceStatistics::getTransitionsTransversionsRatio(const PolymorphismSequenceContainer& psc) throw (Exception) +double SequenceStatistics::ratioOfTransitionsTransversions(const PolymorphismSequenceContainer& psc) throw (Exception) { // return (double) getNumberOfTransitions(psc)/getNumberOfTransversions(psc); - size_t nbT = 0; - size_t nbTv = 0; - ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); + double nbTs = 0; + double nbTv = 0; + auto_ptr<ConstSiteIterator> si(new CompleteSiteContainerIterator(psc)); const Site* site = 0; - vector< int > state(2); + vector<int> state(2); while (si->hasMoreSites()) { map<int, size_t> count; @@ -551,7 +533,7 @@ double SequenceStatistics::getTransitionsTransversionsRatio(const PolymorphismSe SymbolListTools::getCounts(*site, count); if (count.size() != 2) continue; - int i = 0; + size_t i = 0; for (map<int, size_t>::iterator it = count.begin(); it != count.end(); it++) { state[i] = it->first; @@ -560,92 +542,91 @@ double SequenceStatistics::getTransitionsTransversionsRatio(const PolymorphismSe if (((state[0] == 0 && state[1] == 2) || (state[0] == 2 && state[1] == 0)) || ((state[0] == 1 && state[1] == 3) || (state[0] == 3 && state[1] == 1))) { - nbT++; // transitions + nbTs++; // transitions } else { nbTv++; // transversion } } - delete si; if (nbTv == 0) throw ZeroDivisionException("SequenceStatistics::getTransitionsTransversionsRatio."); - return static_cast<double>(nbT) / static_cast<double>(nbTv); + return nbTs / nbTv; } // ****************************************************************************** // Synonymous and non-synonymous polymorphism // ****************************************************************************** -size_t SequenceStatistics::stopCodonSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag) +unsigned int SequenceStatistics::numberOfSitesWithStopCodon(const PolymorphismSequenceContainer& psc, const GeneticCode& gCode, bool gapflag) { /* * Sylvain Gaillard 17/03/2010 * What if the Alphabet is not a codon alphabet? */ - ConstSiteIterator* si = 0; + if (!AlphabetTools::isCodonAlphabet(psc.getAlphabet())) + throw AlphabetMismatchException("SequenceStatistics::stopCodonSiteNumber(). PolymorphismSequenceContainer must be with a codon alphabet.", psc.getAlphabet()); + + auto_ptr<ConstSiteIterator> si; if (gapflag) - si = new NoGapSiteContainerIterator(psc); + si.reset(new NoGapSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); - size_t S = 0; + si.reset(new SimpleSiteContainerIterator(psc)); + unsigned int s = 0; const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); - if (CodonSiteTools::hasStop(*site)) - S++; + if (CodonSiteTools::hasStop(*site, gCode)) + s++; } - delete si; - return S; + return s; } -size_t SequenceStatistics::monoSitePolymorphicCodonNumber(const PolymorphismSequenceContainer& psc, bool stopflag, bool gapflag) +unsigned int SequenceStatistics::numberOfMonoSitePolymorphicCodons(const PolymorphismSequenceContainer& psc, bool stopflag, bool gapflag) { - ConstSiteIterator* si = 0; + auto_ptr<ConstSiteIterator> si; if (stopflag) - si = new CompleteSiteContainerIterator(psc); + si.reset(new CompleteSiteContainerIterator(psc)); else { if (gapflag) - si = new NoGapSiteContainerIterator(psc); + si.reset(new NoGapSiteContainerIterator(psc)); else - si = new SimpleSiteContainerIterator(psc); + si.reset(new SimpleSiteContainerIterator(psc)); } - size_t S = 0; + unsigned int s = 0; const Site* site; while (si->hasMoreSites()) { site = si->nextSite(); if (CodonSiteTools::isMonoSitePolymorphic(*site)) - S++; + s++; } - delete si; - return S; + return s; } -size_t SequenceStatistics::synonymousPolymorphicCodonNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc) +unsigned int SequenceStatistics::numberOfSynonymousPolymorphicCodons(const PolymorphismSequenceContainer& psc, const GeneticCode& gc) { - ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); - size_t S = 0; + auto_ptr<ConstSiteIterator> si(new CompleteSiteContainerIterator(psc)); + unsigned int s = 0; const Site* site; while (si->hasMoreSites()) { site = si->nextSite(); if (CodonSiteTools::isSynonymousPolymorphic(*site, gc)) - S++; + s++; } - delete si; - return S; + return s; } double SequenceStatistics::watterson75Synonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc) { double ThetaW = 0.; size_t n = psc.getNumberOfSequences(); - size_t S = synonymousSubstitutionsNumber(psc, gc); - map<string, double> values = getUsefullValues_(n); - ThetaW = static_cast<double>(S) / values["a1"]; + unsigned int s = numberOfSynonymousSubstitutions(psc, gc); + map<string, double> values = getUsefulValues_(n); + ThetaW = static_cast<double>(s) / values["a1"]; return ThetaW; } @@ -653,9 +634,9 @@ double SequenceStatistics::watterson75NonSynonymous(const PolymorphismSequenceCo { double ThetaW; size_t n = psc.getNumberOfSequences(); - size_t S = nonSynonymousSubstitutionsNumber(psc, gc); - map<string, double> values = getUsefullValues_(n); - ThetaW = static_cast<double>(S) / values["a1"]; + unsigned int s = numberOfNonSynonymousSubstitutions(psc, gc); + map<string, double> values = getUsefulValues_(n); + ThetaW = static_cast<double>(s) / values["a1"]; return ThetaW; } @@ -687,7 +668,7 @@ double SequenceStatistics::piNonSynonymous(const PolymorphismSequenceContainer& return S; } -double SequenceStatistics::meanSynonymousSitesNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio) +double SequenceStatistics::meanNumberOfSynonymousSites(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio) { double S = 0.; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); @@ -701,7 +682,7 @@ double SequenceStatistics::meanSynonymousSitesNumber(const PolymorphismSequenceC return S; } -double SequenceStatistics::meanNonSynonymousSitesNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio) +double SequenceStatistics::meanNumberOfNonSynonymousSites(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio) { double S = 0.; int n = 0; @@ -717,40 +698,38 @@ double SequenceStatistics::meanNonSynonymousSitesNumber(const PolymorphismSequen return static_cast<double>(n - S); } -size_t SequenceStatistics::synonymousSubstitutionsNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin) +unsigned int SequenceStatistics::numberOfSynonymousSubstitutions(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin) { - size_t St = 0, Sns = 0; - ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); + size_t st = 0, sns = 0; + auto_ptr<ConstSiteIterator> si(new CompleteSiteContainerIterator(psc)); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); - St += CodonSiteTools::numberOfSubsitutions(*site, freqmin); - Sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin); + st += CodonSiteTools::numberOfSubsitutions(*site, gc, freqmin); + sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin); } - delete si; - return St - Sns; + return static_cast<unsigned int>(st - sns); } -size_t SequenceStatistics::nonSynonymousSubstitutionsNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin) +unsigned int SequenceStatistics::numberOfNonSynonymousSubstitutions(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin) { - size_t Sns = 0; - ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); + unsigned int sns = 0; + auto_ptr<ConstSiteIterator> si(new CompleteSiteContainerIterator(psc)); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); - Sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin); + sns += static_cast<unsigned int>(CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin)); } - delete si; - return Sns; + return sns; } -vector<size_t> SequenceStatistics::fixedDifferences(const PolymorphismSequenceContainer& pscin, const PolymorphismSequenceContainer& pscout, PolymorphismSequenceContainer& psccons, const GeneticCode& gc) +vector<unsigned int> SequenceStatistics::fixedDifferences(const PolymorphismSequenceContainer& pscin, const PolymorphismSequenceContainer& pscout, PolymorphismSequenceContainer& psccons, const GeneticCode& gc) { - ConstSiteIterator* siIn = new CompleteSiteContainerIterator(pscin); - ConstSiteIterator* siOut = new CompleteSiteContainerIterator(pscout); - ConstSiteIterator* siCons = new CompleteSiteContainerIterator(psccons); + auto_ptr<ConstSiteIterator> siIn(new CompleteSiteContainerIterator(pscin)); + auto_ptr<ConstSiteIterator> siOut(new CompleteSiteContainerIterator(pscout)); + auto_ptr<ConstSiteIterator> siCons(new CompleteSiteContainerIterator(psccons)); const Site* siteIn = 0; const Site* siteOut = 0; const Site* siteCons = 0; @@ -765,16 +744,13 @@ vector<size_t> SequenceStatistics::fixedDifferences(const PolymorphismSequenceCo NfixS += v[0]; NfixA += v[1]; } - vector<size_t> v(2); - v[0] = NfixS; - v[1] = NfixA; - delete siIn; - delete siOut; - delete siCons; + vector<unsigned int> v(2); + v[0] = static_cast<unsigned int>(NfixS); + v[1] = static_cast<unsigned int>(NfixA); return v; } -vector<size_t> SequenceStatistics::MKtable(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin) +vector<unsigned int> SequenceStatistics::mkTable(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin) { PolymorphismSequenceContainer psctot(ingroup); for (size_t i = 0; i < outgroup.getNumberOfSequences(); i++) @@ -782,47 +758,26 @@ vector<size_t> SequenceStatistics::MKtable(const PolymorphismSequenceContainer& psctot.addSequence(outgroup.getSequence(i)); psctot.setAsOutgroupMember(i + ingroup.getNumberOfSequences()); } - const PolymorphismSequenceContainer* psccomplet = PolymorphismSequenceContainerTools::getCompleteSites(psctot); - const PolymorphismSequenceContainer* pscin = PolymorphismSequenceContainerTools::extractIngroup(*psccomplet); - const PolymorphismSequenceContainer* pscout = PolymorphismSequenceContainerTools::extractOutgroup(*psccomplet); - const Sequence* consensusIn = SiteContainerTools::getConsensus(*pscin, "consensusIn"); - const Sequence* consensusOut = SiteContainerTools::getConsensus(*pscout, "consensusOut"); - PolymorphismSequenceContainer* consensus = new PolymorphismSequenceContainer(ingroup.getAlphabet()); + auto_ptr<const PolymorphismSequenceContainer> psccomplet(PolymorphismSequenceContainerTools::getCompleteSites(psctot)); + auto_ptr<const PolymorphismSequenceContainer> pscin (PolymorphismSequenceContainerTools::extractIngroup(*psccomplet)); + auto_ptr<const PolymorphismSequenceContainer> pscout (PolymorphismSequenceContainerTools::extractOutgroup(*psccomplet)); + auto_ptr<const Sequence> consensusIn (SiteContainerTools::getConsensus(*pscin, "consensusIn")); + auto_ptr<const Sequence> consensusOut(SiteContainerTools::getConsensus(*pscout, "consensusOut")); + auto_ptr<PolymorphismSequenceContainer> consensus(new PolymorphismSequenceContainer(ingroup.getAlphabet())); consensus->addSequence(*consensusIn); consensus->addSequence(*consensusOut); - vector<size_t> u = SequenceStatistics::fixedDifferences(*pscin, *pscout, *consensus, gc); - vector<size_t> v(4); - v[0] = SequenceStatistics::nonSynonymousSubstitutionsNumber(*pscin, gc, freqmin); - v[1] = SequenceStatistics::synonymousSubstitutionsNumber(*pscin, gc, freqmin); + vector<unsigned int> u = SequenceStatistics::fixedDifferences(*pscin, *pscout, *consensus, gc); + vector<unsigned int> v(4); + v[0] = SequenceStatistics::numberOfNonSynonymousSubstitutions(*pscin, gc, freqmin); + v[1] = SequenceStatistics::numberOfSynonymousSubstitutions(*pscin, gc, freqmin); v[2] = u[1]; v[3] = u[0]; - delete consensus; - if (psccomplet) - { - delete psccomplet; - } - if (pscin) - { - delete pscin; - } - if (pscout) - { - delete pscout; - } - if (consensusIn) - { - delete consensusIn; - } - if (consensusOut) - { - delete consensusOut; - } return v; } double SequenceStatistics::neutralityIndex(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin) { - vector<size_t> v = SequenceStatistics::MKtable(ingroup, outgroup, gc, freqmin); + vector<unsigned int> v = SequenceStatistics::mkTable(ingroup, outgroup, gc, freqmin); if (v[1] != 0 && v[2] != 0) return static_cast<double>(v[0] * v[3]) / static_cast<double>(v[1] * v[2]); else @@ -833,61 +788,62 @@ double SequenceStatistics::neutralityIndex(const PolymorphismSequenceContainer& // Statistical tests // ****************************************************************************** -double SequenceStatistics::tajimaDSS(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException) +double SequenceStatistics::tajimaDss(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException) { - double S = static_cast<double>(polymorphicSiteNumber(psc, gapflag)); - if (!S) - throw ZeroDivisionException("S should not be null"); + unsigned int Sp = numberOfPolymorphicSites(psc, gapflag); + if (Sp == 0) + throw ZeroDivisionException("SequenceStatistics::tajimaDss. S should not be 0."); + double S = static_cast<double>(Sp); double tajima = tajima83(psc, gapflag); double watterson = watterson75(psc, gapflag); size_t n = psc.getNumberOfSequences(); - map<string, double> values = getUsefullValues_(n); - // if (S == 0) - // cout << "ARG S == 0" << endl; + map<string, double> values = getUsefulValues_(n); return (tajima - watterson) / sqrt((values["e1"] * S) + (values["e2"] * S * (S - 1))); } -double SequenceStatistics::tajimaDTNM(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException) +double SequenceStatistics::tajimaDtnm(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException) { - double eta = static_cast<double>(totNumberMutations(psc, gapflag)); - if (!eta) - throw ZeroDivisionException("eta should not be null"); + unsigned int etaP = totalNumberOfMutations(psc, gapflag); + if (etaP == 0) + throw ZeroDivisionException("SequenceStatistics::tajimaDtnm. Eta should not be 0."); + double eta = static_cast<double>(etaP); double tajima = tajima83(psc, gapflag); size_t n = psc.getNumberOfSequences(); - map<string, double> values = getUsefullValues_(n); - double eta_a1 = static_cast<double>(eta) / values["a1"]; + map<string, double> values = getUsefulValues_(n); + double eta_a1 = eta / values["a1"]; return (tajima - eta_a1) / sqrt((values["e1"] * eta) + (values["e2"] * eta * (eta - 1))); } -double SequenceStatistics::fuliD(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException) +double SequenceStatistics::fuLiD(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException) { size_t n = ingroup.getNumberOfSequences(); - map<string, double> values = getUsefullValues_(n); + map<string, double> values = getUsefulValues_(n); double vD = getVD_(n, values["a1"], values["a2"], values["cn"]); double uD = getUD_(values["a1"], vD); - double eta = static_cast<double>(totNumberMutations(ingroup)); - if (eta == 0.) - throw ZeroDivisionException("eta should not be null"); + unsigned int etaP = totalNumberOfMutations(ingroup); + if (etaP == 0) + throw ZeroDivisionException("SequenceStatistics::fuLiD. Eta should not be 0."); + double eta = static_cast<double>(etaP); double etae = 0.; if (original) - etae = static_cast<double>(countSingleton(outgroup)); + etae = static_cast<double>(numberOfSingletons(outgroup)); else - etae = static_cast<double>(totMutationsExternalBranchs(ingroup, outgroup)); // added by Khalid 13/07/2005 + etae = static_cast<double>(totalNumberOfMutationsOnExternalBranches(ingroup, outgroup)); // added by Khalid 13/07/2005 return (eta - (values["a1"] * etae)) / sqrt((uD * eta) + (vD * eta * eta)); } -double SequenceStatistics::fuliDstar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException) +double SequenceStatistics::fuLiDStar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException) { size_t n = group.getNumberOfSequences(); double nn = static_cast<double>(n); double _n = nn / (nn - 1.); - map<string, double> values = getUsefullValues_(n); + map<string, double> values = getUsefulValues_(n); double vDs = getVDstar_(n, values["a1"], values["a2"], values["dn"]); double uDs = getUDstar_(n, values["a1"], vDs); - double eta = static_cast<double>(totNumberMutations(group)); + double eta = static_cast<double>(totalNumberOfMutations(group)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); - double etas = static_cast<double>(countSingleton(group)); + double etas = static_cast<double>(numberOfSingletons(group)); // Fu & Li 1993 return ((_n * eta) - (values["a1"] * etas)) / sqrt(uDs * eta + vDs * eta * eta); @@ -898,29 +854,29 @@ double SequenceStatistics::fuliDstar(const PolymorphismSequenceContainer& group) */ } -double SequenceStatistics::fuliF(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException) +double SequenceStatistics::fuLiF(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException) { size_t n = ingroup.getNumberOfSequences(); double nn = static_cast<double>(n); - map<string, double> values = getUsefullValues_(n); + map<string, double> values = getUsefulValues_(n); double pi = tajima83(ingroup, true); double vF = (values["cn"] + values["b2"] - 2. / (nn - 1.)) / (pow(values["a1"], 2) + values["a2"]); double uF = ((1. + values["b1"] - (4. * ((nn + 1.) / ((nn - 1.) * (nn - 1.)))) * (values["a1n"] - (2. * nn) / (nn + 1.))) / values["a1"]) - vF; - double eta = static_cast<double>(totNumberMutations(ingroup)); + double eta = static_cast<double>(totalNumberOfMutations(ingroup)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); double etae = 0.; if (original) - etae = static_cast<double>(countSingleton(outgroup)); + etae = static_cast<double>(numberOfSingletons(outgroup)); else - etae = static_cast<double>(totMutationsExternalBranchs(ingroup, outgroup)); // added by Khalid 13/07/2005 + etae = static_cast<double>(totalNumberOfMutationsOnExternalBranches(ingroup, outgroup)); // added by Khalid 13/07/2005 return (pi - etae) / sqrt(uF * eta + vF * eta * eta); } -double SequenceStatistics::fuliFstar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException) +double SequenceStatistics::fuLiFStar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException) { double n = static_cast<double>(group.getNumberOfSequences()); - map<string, double> values = getUsefullValues_(group.getNumberOfSequences()); + map<string, double> values = getUsefulValues_(group.getNumberOfSequences()); double pi = tajima83(group, true); // Fu & Li 1993 @@ -930,16 +886,16 @@ double SequenceStatistics::fuliFstar(const PolymorphismSequenceContainer& group) // Simonsen et al. 1995 double vFs = (((2 * n * n * n + 110 * n * n - 255 * n + 153) / (9 * n * n * (n - 1))) + ((2 * (n - 1) * values["a1"]) / (n * n)) - 8 * values["a2"] / n) / (pow(values["a1"], 2) + values["a2"]); double uFs = (((4 * n * n + 19 * n + 3 - 12 * (n + 1) * values["a1n"]) / (3 * n * (n - 1))) / values["a1"]) - vFs; - double eta = static_cast<double>(totNumberMutations(group)); + double eta = static_cast<double>(totalNumberOfMutations(group)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); - double etas = static_cast<double>(countSingleton(group)); + double etas = static_cast<double>(numberOfSingletons(group)); // Fu & Li 1993 // Simonsen et al. 1995 return (pi - ((n - 1.) / n * etas)) / sqrt(uFs * eta + vFs * eta * eta); } -double SequenceStatistics::FstHudson92(const PolymorphismSequenceContainer& psc, size_t id1, size_t id2) +double SequenceStatistics::fstHudson92(const PolymorphismSequenceContainer& psc, size_t id1, size_t id2) { vector<double> vdiff; double piIntra1, piIntra2, meanPiIntra, piInter, Fst; @@ -982,7 +938,7 @@ double SequenceStatistics::FstHudson92(const PolymorphismSequenceContainer& psc, /* Preliminary method */ /**********************/ -PolymorphismSequenceContainer* SequenceStatistics::generateLDContainer(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) +PolymorphismSequenceContainer* SequenceStatistics::generateLdContainer(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) { SiteSelection ss; // Extract polymorphic site with only two alleles @@ -1188,7 +1144,7 @@ Vdouble SequenceStatistics::pairwiseDistances2(const PolymorphismSequenceContain Vdouble SequenceStatistics::pairwiseD(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { - PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin); + PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLdContainer(psc, keepsingleton, freqmin); Vdouble D; size_t nbsite = newpsc->getNumberOfSites(); size_t nbseq = newpsc->getNumberOfSequences(); @@ -1221,7 +1177,7 @@ Vdouble SequenceStatistics::pairwiseD(const PolymorphismSequenceContainer& psc, Vdouble SequenceStatistics::pairwiseDprime(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { - PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin); + PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLdContainer(psc, keepsingleton, freqmin); Vdouble Dprime; size_t nbsite = newpsc->getNumberOfSites(); size_t nbseq = newpsc->getNumberOfSequences(); @@ -1277,7 +1233,7 @@ Vdouble SequenceStatistics::pairwiseDprime(const PolymorphismSequenceContainer& Vdouble SequenceStatistics::pairwiseR2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { - PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin); + PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLdContainer(psc, keepsingleton, freqmin); Vdouble R2; size_t nbsite = newpsc->getNumberOfSites(); size_t nbseq = newpsc->getNumberOfSequences(); @@ -1523,7 +1479,7 @@ double SequenceStatistics::hudson87(const PolymorphismSequenceContainer& psc, do double dif = 1; double c1 = cinf; double c2 = csup; - if (SequenceStatistics::polymorphicSiteNumber(psc) < 2) + if (SequenceStatistics::numberOfPolymorphicSites(psc) < 2) return -1; if (rightHandHudson_(c1, n) < left) return cinf; @@ -1544,9 +1500,9 @@ double SequenceStatistics::hudson87(const PolymorphismSequenceContainer& psc, do /* Tests methods */ /*****************/ -void SequenceStatistics::testUsefullValues(std::ostream& s, size_t n) +void SequenceStatistics::testUsefulValues(std::ostream& s, size_t n) { - map<string, double> v = getUsefullValues_(n); + map<string, double> v = getUsefulValues_(n); double vD = getVD_(n, v["a1"], v["a2"], v["cn"]); double uD = getUD_(v["a1"], vD); double vDs = getVDstar_(n, v["a1"], v["a2"], v["dn"]); @@ -1574,9 +1530,9 @@ void SequenceStatistics::testUsefullValues(std::ostream& s, size_t n) // Private methods // ****************************************************************************** -size_t SequenceStatistics::getMutationNumber_(const Site& site) +unsigned int SequenceStatistics::getNumberOfMutations_(const Site& site) { - size_t tmp_count = 0; + unsigned int tmp_count = 0; map<int, size_t> states_count; SymbolListTools::getCounts(site, states_count); @@ -1590,9 +1546,9 @@ size_t SequenceStatistics::getMutationNumber_(const Site& site) return tmp_count; } -size_t SequenceStatistics::getSingletonNumber_(const Site& site) +unsigned int SequenceStatistics::getNumberOfSingletons_(const Site& site) { - size_t nus = 0; + unsigned int nus = 0; map<int, size_t> states_count; SymbolListTools::getCounts(site, states_count); for (map<int, size_t>::iterator it = states_count.begin(); it != states_count.end(); it++) @@ -1603,9 +1559,9 @@ size_t SequenceStatistics::getSingletonNumber_(const Site& site) return nus; } -size_t SequenceStatistics::getDerivedSingletonNumber_(const Site& site_in, const Site& site_out) +unsigned int SequenceStatistics::getNumberOfDerivedSingletons_(const Site& site_in, const Site& site_out) { - size_t nus = 0; + unsigned int nus = 0; map<int, size_t> states_count; map<int, size_t> outgroup_states_count; SymbolListTools::getCounts(site_in, states_count); @@ -1625,7 +1581,7 @@ size_t SequenceStatistics::getDerivedSingletonNumber_(const Site& site_in, const return nus; } -std::map<std::string, double> SequenceStatistics::getUsefullValues_(size_t n) +std::map<std::string, double> SequenceStatistics::getUsefulValues_(size_t n) { double nn = static_cast<double>(n); map<string, double> values; diff --git a/src/Bpp/PopGen/SequenceStatistics.h b/src/Bpp/PopGen/SequenceStatistics.h index ffc279f..4eac1a7 100644 --- a/src/Bpp/PopGen/SequenceStatistics.h +++ b/src/Bpp/PopGen/SequenceStatistics.h @@ -44,7 +44,7 @@ #ifndef _SEQUENCESTATISTICS_H_ #define _SEQUENCESTATISTICS_H_ -// From the SeqLib library +// From the bpp-seq library #include <Bpp/Seq/SymbolListTools.h> #include <Bpp/Seq/Alphabet/CodonAlphabet.h> #include <Bpp/Seq/GeneticCode/GeneticCode.h> @@ -84,7 +84,7 @@ public: * @param ignoreUnknown a boolean set by default to true to ignore * unknown states */ - static size_t polymorphicSiteNumber( + static unsigned int numberOfPolymorphicSites( const PolymorphismSequenceContainer& psc, bool gapflag = true, bool ignoreUnknown = true); @@ -96,7 +96,7 @@ public: * @param gapflag a boolean set by default to true if you don't want to * take gap into account */ - static size_t parsimonyInformativeSiteNumber( + static unsigned int numberOfParsimonyInformativeSites( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -108,7 +108,7 @@ public: * take gap into account * @author Sylvain Gaillard */ - static size_t countSingleton( + static unsigned int numberOfSingletons( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -122,7 +122,7 @@ public: * take gap into account * @author Sylvain Gaillard */ - static size_t totNumberMutations( + static unsigned int totalNumberOfMutations( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -139,7 +139,7 @@ public: * @throw Exception if ing and outg are not of the same size (site number) * @author Khalid Belkhir */ - static size_t totMutationsExternalBranchs( + static unsigned int totalNumberOfMutationsOnExternalBranches( const PolymorphismSequenceContainer& ing, const PolymorphismSequenceContainer& outg) throw (Exception); @@ -151,7 +151,7 @@ public: * @param gapflag a boolean set by default to true if you don't want to take gap into account * @author Sylvain Glémin */ - static size_t tripletNumber( + static unsigned int numberOfTriplets( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -198,7 +198,7 @@ public: * @return A std::vector of size 2 containing the number of GC alleles * and the total number of alleles. */ - static std::vector<size_t> gcPolymorphism( + static std::vector<unsigned int> gcPolymorphism( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -209,7 +209,7 @@ public: * \hat{\theta}_S=\frac{S}{a_1} * @f] * where @f$S@f$ is the number of polymorphic sites and @f$a_1@f$ is - * describe in SequenceStatistics::_getUsefullValues(). + * describe in SequenceStatistics::getUsefulValues_(). * * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to @@ -227,8 +227,8 @@ public: * @brief Compute diversity estimator Theta of Tajima (1983, Genetics, 105 pp437-460) * * @f[ - * \hat{\theta}_\pi=1-\sum_{i=1}^{S}\sum_{j=1}^{4}\frac{k_{j,i}\times\left(k_{j,i}-1\right)} - * {n_i\times\left(n_i-1\right)} \qquad \textrm{with }k_{j,i}>0 + * \hat{\theta}_\pi = \sum_{i=1}^{S} \left(1-\sum_{j=1}^{4} \frac{k_{j,i}\times\left(k_{j,i}-1\right)} + * {n_i\times\left(n_i-1\right)}\right) \qquad \textrm{with }k_{j,i}>0 * @f] * where @f$k_{j,i}@f$ is the count of the j<sup>th</sup> state at the * i<sup>th</sup> site, @@ -252,7 +252,7 @@ public: * (reconstructed independently) to fold the mutation in the psc SequenceContainer. @author Benoit Nabholz */ - static double FayWu2000( + static double fayWu2000( const PolymorphismSequenceContainer& psc, const Sequence& ancestralSites); @@ -266,9 +266,9 @@ public: * @author Éric Bazin * @todo * - remove unneeded Sequence Container recopy - * - work on Sequence rather on string + * - work on Sequence rather than string */ - static size_t DVK( + static unsigned int dvk( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -284,7 +284,7 @@ public: * - remove unneeded Sequence Container recopy * - work on Sequence rather on string */ - static double DVH( + static double dvh( const PolymorphismSequenceContainer& psc, bool gapflag = true); @@ -294,7 +294,7 @@ public: * @param psc a PolymorphismSequenceContainer * @author Éric Bazin */ - static size_t getNumberOfTransitions( + static unsigned int numberOfTransitions( const PolymorphismSequenceContainer& psc); /** @@ -303,7 +303,7 @@ public: * @param psc a PolymorphismSequenceContainer * @author Éric Bazin */ - static size_t getNumberOfTransversions( + static unsigned int numberOfTransversions( const PolymorphismSequenceContainer& psc); /** @@ -312,7 +312,7 @@ public: * @param psc a PolymorphismSequenceContainer * @author Éric Bazin */ - static double getTransitionsTransversionsRatio( + static double ratioOfTransitionsTransversions( const PolymorphismSequenceContainer& psc ) throw (Exception); @@ -320,12 +320,14 @@ public: * @brief Compute the number of codon sites with stop codon * * @param psc a PolymorphismSequenceContainer + * @param gCode the genetic code to use * @param gapflag a boolean set by default to true if you don't want to * take gaps into account * @author Sylvain Glémin */ - static size_t stopCodonSiteNumber( + static unsigned int numberOfSitesWithStopCodon( const PolymorphismSequenceContainer& psc, + const GeneticCode& gCode, bool gapflag = true); /** @@ -340,7 +342,7 @@ public: * @bug Sylvain Gaillard 17/03/2010: stopflag don't work as expected * because CompleteSiteIterator don't skip stop codon. */ - static size_t monoSitePolymorphicCodonNumber( + static unsigned int numberOfMonoSitePolymorphicCodons( const PolymorphismSequenceContainer& psc, bool stopflag = true, bool gapflag = true); @@ -355,7 +357,7 @@ public: * @author Sylvain Glémin * @author Éric Bazin */ - static size_t synonymousPolymorphicCodonNumber( + static unsigned int numberOfSynonymousPolymorphicCodons( const PolymorphismSequenceContainer& psc, const GeneticCode& gc); @@ -449,7 +451,7 @@ public: * @author Sylvain Glémin * @author Éric Bazin */ - static double meanSynonymousSitesNumber( + static double meanNumberOfSynonymousSites( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio = 1.); @@ -467,7 +469,7 @@ public: * @param ratio a double * @author Éric Bazin */ - static double meanNonSynonymousSitesNumber( + static double meanNumberOfNonSynonymousSites( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio = 1.); @@ -487,7 +489,7 @@ public: * @param freqmin a double, to exclude snp in frequency strictly lower * than freqmin */ - static size_t synonymousSubstitutionsNumber( + static unsigned int numberOfSynonymousSubstitutions( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin = 0.); @@ -507,7 +509,7 @@ public: * @param freqmin a double, to exclude snp in frequency strictly lower * than freqmin */ - static size_t nonSynonymousSubstitutionsNumber( + static unsigned int numberOfNonSynonymousSubstitutions( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin = 0.); @@ -529,7 +531,7 @@ public: * @bug Sylvain Gaillard 17.03.2010: should throw something if pscin, * pscout and psccons have different length (site number). */ - static std::vector<size_t> fixedDifferences( + static std::vector<unsigned int> fixedDifferences( const PolymorphismSequenceContainer& pscin, const PolymorphismSequenceContainer& pscout, PolymorphismSequenceContainer& psccons, @@ -546,7 +548,7 @@ public: * than freqmin * @author Sylvain Glémin */ - static std::vector<size_t> MKtable( + static std::vector<unsigned int> mkTable( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, @@ -586,7 +588,7 @@ public: * @throw ZeroDivisionException if S == 0 * @author Sylvain Gaillard */ - static double tajimaDSS( + static double tajimaDss( const PolymorphismSequenceContainer& psc, bool gapflag = true) throw (ZeroDivisionException); @@ -604,7 +606,7 @@ public: * @throw ZeroDivisionException if eta == 0 * @author Sylvain Gaillard */ - static double tajimaDTNM( + static double tajimaDtnm( const PolymorphismSequenceContainer& psc, bool gapflag = true) throw (ZeroDivisionException); @@ -624,7 +626,7 @@ public: * If the outgroup contains more than one sequence the sites with more * than one variant will not be considered for external branch mutations! */ - static double fuliD( + static double fuLiD( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original = true) @@ -636,7 +638,7 @@ public: * @param group a PolymorphismSequenceContainer * @author Sylvain Gaillard */ - static double fuliDstar( + static double fuLiDStar( const PolymorphismSequenceContainer& group) throw (ZeroDivisionException); @@ -654,7 +656,7 @@ public: * If the outgroup contains more than one sequence the sites with more * than one variant will not be considered for external branch mutations! */ - static double fuliF( + static double fuLiF( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original = true) @@ -666,7 +668,7 @@ public: * @param group a PolymorphismSequenceContainer * @author Sylvain Gaillard */ - static double fuliFstar( + static double fuLiFStar( const PolymorphismSequenceContainer& group) throw (ZeroDivisionException); @@ -688,7 +690,7 @@ public: * @param id2 is the id of the population 2 * @author Benoit Nabholz */ - double FstHudson92( + double fstHudson92( const PolymorphismSequenceContainer& psc, size_t id1, size_t id2); @@ -720,7 +722,7 @@ public: * @todo * - To be moved to PolymorphismSequenceContainerTools. */ - static PolymorphismSequenceContainer* generateLDContainer( + static PolymorphismSequenceContainer* generateLdContainer( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.); @@ -1083,12 +1085,12 @@ public: double csup = 10000.); /** - * @brief Test usefull values + * @brief Test useful values * @param s a ostream where write the values * @param n then number of observed sequences * @author Sylvain Gaillard */ - static void testUsefullValues( + static void testUsefulValues( std::ostream& s, size_t n); @@ -1096,14 +1098,12 @@ private: /** * @brief Count the number of mutation for a site. */ - static size_t getMutationNumber_( - const Site& site); + static unsigned int getNumberOfMutations_(const Site& site); /** * @brief Count the number of singleton for a site. */ - static size_t getSingletonNumber_( - const Site& site); + static unsigned int getNumberOfSingletons_(const Site& site); /** * @brief Count the number of singleton for a site. @@ -1112,12 +1112,12 @@ private: * site_in is a site from an ingroup * @author Khalid Belkhir */ - static size_t getDerivedSingletonNumber_( + static unsigned getNumberOfDerivedSingletons_( const Site& site_in, const Site& site_out); /** - * @brief Get usefull values for theta estimators. + * @brief Get useful values for theta estimators. * * @param n the number of observed sequences * @@ -1149,16 +1149,16 @@ private: * * @author Sylvain Gaillard */ - static std::map<std::string, double> getUsefullValues_( + static std::map<std::string, double> getUsefulValues_( size_t n); /** * @brief Get the vD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709) * * @param n the number of observed sequences - * @param a1 as describe in getUsefullValues - * @param a2 as describe in getUsefullValues - * @param cn as describe in getUsefullValues + * @param a1 as describe in getUsefulValues + * @param a2 as describe in getUsefulValues + * @param cn as describe in getUsefulValues * * @return the vD value as double * @@ -1173,7 +1173,7 @@ private: /** * @brief Get the uD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709) * - * @param a1 as describe in getUsefullValues + * @param a1 as describe in getUsefulValues * @param vD as provided by getVD_ * * @return the uD value as double @@ -1188,9 +1188,9 @@ private: * @brief Get the vD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709) * * @param n the number of observed sequences - * @param a1 as describe in getUsefullValues - * @param a2 as describe in getUsefullValues - * @param dn as describe in getUsefullValues + * @param a1 as describe in getUsefulValues + * @param a2 as describe in getUsefulValues + * @param dn as describe in getUsefulValues * * @return the vD* value as double * @@ -1206,7 +1206,7 @@ private: * @brief Get the uD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709) * * @param n the number of observed sequences - * @param a1 as describe in getUsefullValues + * @param a1 as describe in getUsefulValues * @param vDs as provided by getVDstar_ * * @return the uD* value as double -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbpp-popgen.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
