This is an automated email from the git hooks/post-receive script. plessy pushed a commit to branch master in repository bedtools.
commit 1d31cd1fc318aa28039c66dd32011861ec2bb445 Author: Charles Plessy <[email protected]> Date: Sat Nov 5 21:45:39 2016 +0900 Sync with upstream repo at commit 6bf23c This patch contains bug fixes to upstream issues #429, #418 and #424. In particular, it repairs the groupby command, which was completely broken. Cherry-picking a single commit did not result in a buildable source, and this big patch was the easiest alternative. Closes: #831833 --- debian/patches/series | 1 + debian/patches/v2.26.0-19-g6bf23c4.patch | 6711 ++++++++++++++++++++++++++++++ 2 files changed, 6712 insertions(+) diff --git a/debian/patches/series b/debian/patches/series index e0b66fe..84290f3 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -2,3 +2,4 @@ gzstream.h.patch fix_test_script.patch remove_barski_binding_site.png.patch reproducible_build.patch +v2.26.0-19-g6bf23c4.patch diff --git a/debian/patches/v2.26.0-19-g6bf23c4.patch b/debian/patches/v2.26.0-19-g6bf23c4.patch new file mode 100644 index 0000000..150225f --- /dev/null +++ b/debian/patches/v2.26.0-19-g6bf23c4.patch @@ -0,0 +1,6711 @@ +Author: Upstream +Bug-Debian: https://bugs.debian.org/831833 +Description: Sync with upstream repo at commit 6bf23c + This patch contains bug fixes to upstream issues #429, #418 and #424. + In particular, it repairs the groupby command, which was completely + broken. Cherry-picking a single commit did not result in a buildable + source, and this big patch was the easiest alternative. +diff --git a/Makefile b/Makefile +index 91cbbd5..80c5656 100644 +--- a/Makefile ++++ b/Makefile +@@ -18,7 +18,7 @@ export SRC_DIR = src + export UTIL_DIR = src/utils + export CXX = g++ + ifeq ($(DEBUG),1) +-export CXXFLAGS = -Wall -O0 -g -fno-inline -fkeep-inline-functions -D_FILE_OFFSET_BITS=64 -fPIC -DDEBUG -D_DEBUG ++export CXXFLAGS = -Wall -Wextra -DDEBUG -D_DEBUG -g -O0 -D_FILE_OFFSET_BITS=64 -fPIC $(INCLUDES) + else + export CXXFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -fPIC $(INCLUDES) + endif +diff --git a/docs/content/history.rst b/docs/content/history.rst +index c59e84e..ac04a02 100644 +--- a/docs/content/history.rst ++++ b/docs/content/history.rst +@@ -4,23 +4,24 @@ Release History + + Version 2.26.0 (7-July-2016) + ============================ +-1. Fixed a major memory leak when using ``-sorted``. Thanks to Emily Tsang and Steohen Montgomery. ++1. Fixed a major memory leak when using ``-sorted``. Thanks to Emily Tsang and Stephen Montgomery. + 2. Fixed a bug for BED files containing a single record with no newline. Thanks to @jmarshall. +-3. The ``getfasta`` tool includes name, chromosome and position in fasta headers when the ``-name`` option is used. Thanks to @rishavray. +-4. Fixed a bug that now forces the ``coverage`` tool to process every record in the ``-a`` file. +-5. Fixed a bug preventing proper processing of BED files with consecutive tabs. +-6. VCF files containing structural variants now infer SV length from either the SVLEN or END INFO fields. Thanks to Zev Kronenberg. +-7. Resolve off by one bugs when intersecting GFF or VCF files with BED files. +-8. The ``shuffle`` tool now uses roulette wheel sampling to shuffle to ``-incl`` regions based upon the size of the interval. Thanks to Zev Kronenberg and Michael Imbeault. +-9. Fixed a bug in ``coverage`` that prevented correct calculation of depth when using the ``-split`` option. +-10. The ``shuffle`` tool warns when an interval exceeds the maximum chromosome length. +-11. The ``complement`` tool better checks intervals against the chromosome lengths. +-12. Fixes for ``stddev``, ``min``, and ``max`` operations. Thanks to @jmarshall. +-13. Enabled ``stdev``, ``sstdev``, ``freqasc``, and ``freqdesc`` options for ``groupby``. +-14. Allow ``-s`` and ``-w`` to be used in any order for ``makewindows``. +-15. Added new ``-bedOut`` option to ``getfasta``. +-16. The ``-r`` option forces the ``-F`` value for ``intersect``. +-17. Add ``-pc`` option to the ``genomecov`` tool, allowing coverage to be calculated based upon paired-end fragments. ++3. Fixed a bug in the contigency table values for thr ``fisher`` tool. ++4. The ``getfasta`` tool includes name, chromosome and position in fasta headers when the ``-name`` option is used. Thanks to @rishavray. ++5. Fixed a bug that now forces the ``coverage`` tool to process every record in the ``-a`` file. ++6. Fixed a bug preventing proper processing of BED files with consecutive tabs. ++7. VCF files containing structural variants now infer SV length from either the SVLEN or END INFO fields. Thanks to Zev Kronenberg. ++8. Resolve off by one bugs when intersecting GFF or VCF files with BED files. ++9. The ``shuffle`` tool now uses roulette wheel sampling to shuffle to ``-incl`` regions based upon the size of the interval. Thanks to Zev Kronenberg and Michael Imbeault. ++10. Fixed a bug in ``coverage`` that prevented correct calculation of depth when using the ``-split`` option. ++11. The ``shuffle`` tool warns when an interval exceeds the maximum chromosome length. ++12. The ``complement`` tool better checks intervals against the chromosome lengths. ++13. Fixes for ``stddev``, ``min``, and ``max`` operations. Thanks to @jmarshall. ++14. Enabled ``stdev``, ``sstdev``, ``freqasc``, and ``freqdesc`` options for ``groupby``. ++15. Allow ``-s`` and ``-w`` to be used in any order for ``makewindows``. ++16. Added new ``-bedOut`` option to ``getfasta``. ++17. The ``-r`` option forces the ``-F`` value for ``intersect``. ++18. Add ``-pc`` option to the ``genomecov`` tool, allowing coverage to be calculated based upon paired-end fragments. + + + Version 2.25.0 (3-Sept-2015) +diff --git a/docs/index.rst b/docs/index.rst +index 2d67581..67cdd91 100755 +--- a/docs/index.rst ++++ b/docs/index.rst +@@ -11,6 +11,7 @@ genomic file formats such as BAM, BED, GFF/GTF, VCF. While each individual tool + *intersect* two interval files), quite sophisticated analyses can be conducted + by combining multiple bedtools operations on the UNIX command line. + ++**bedtools** is developed in the `Quinlan laboratory <http://quinlanlab.org>`_ at the `University of Utah <http://www.utah.edu/>`_ and benefits from fantastic contributions made by scientists worldwide. + + ========================== + Tutorial +diff --git a/docs/templates/sidebar-intro.html b/docs/templates/sidebar-intro.html +index 262da46..dc430e7 100644 +--- a/docs/templates/sidebar-intro.html ++++ b/docs/templates/sidebar-intro.html +@@ -8,7 +8,7 @@ + <li><a target="_blank" href="https://bedtools.googlecode.com">Old Releases @ Google Code</a></li> + <li><a target="_blank" href="http://groups.google.com/group/bedtools-discuss">Mailing list @ Google Groups</a></li> + <li><a target="_blank" href="http://www.biostars.org/show/tag/bedtools/">Queries @ Biostar</a></li> +- <li><a target="_blank" href="http://quinlanlab.org">Quinlan lab @ UVa</a></li> ++ <li><a target="_blank" href="http://quinlanlab.org">Quinlan lab @ UU</a></li> + + </ul> + +diff --git a/src/bedtools.cpp b/src/bedtools.cpp +index 088ea70..b03b072 100644 +--- a/src/bedtools.cpp ++++ b/src/bedtools.cpp +@@ -34,8 +34,8 @@ using namespace std; + // define our parameter checking macro + #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +-bool sub_main(const QuickString &subCmd); +-void showHelp(const QuickString &subCmd); ++bool sub_main(const string &subCmd); ++void showHelp(const string &subCmd); + + int annotate_main(int argc, char* argv[]);// + int bamtobed_main(int argc, char* argv[]);// +@@ -92,7 +92,7 @@ int main(int argc, char *argv[]) + // make sure the user at least entered a sub_command + if (argc < 2) return bedtools_help(); + +- QuickString subCmd(argv[1]); ++ string subCmd(argv[1]); + BedtoolsDriver btDriver; + if (btDriver.supports(subCmd)) { + +@@ -190,8 +190,13 @@ int main(int argc, char *argv[]) + + int bedtools_help(void) + { +- cout << PROGRAM_NAME << ": flexible tools for genome arithmetic and DNA sequence analysis.\n"; +- cout << "usage: bedtools <subcommand> [options]" << endl << endl; ++ cout << PROGRAM_NAME << " is a powerful toolset for genome arithmetic." << endl << endl; ++ cout << "Version: " << VERSION << endl; ++ cout << "About: developed in the quinlanlab.org and by many contributors worldwide." << endl; ++ cout << "Docs: http://bedtools.readthedocs.io/" << endl; ++ cout << "Code: https://github.com/arq5x/bedtools2" << endl; ++ cout << "Mail: https://groups.google.com/forum/#!forum/bedtools-discuss" << endl << endl; ++ cout << "Usage: bedtools <subcommand> [options]" << endl << endl; + + cout << "The bedtools sub-commands include:" << endl; + +@@ -287,7 +292,7 @@ int bedtools_faq(void) + return 0; + } + +-void showHelp(const QuickString &subCmd) { ++void showHelp(const string &subCmd) { + if (subCmd == "intersect") { + intersect_help(); + } else if (subCmd == "map") { +diff --git a/src/complementFile/complementFile.cpp b/src/complementFile/complementFile.cpp +index 803b7c5..5d3b384 100644 +--- a/src/complementFile/complementFile.cpp ++++ b/src/complementFile/complementFile.cpp +@@ -38,7 +38,7 @@ void ComplementFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hi + const Record *rec = hits.getKey(); + + //test for chrom change. +- const QuickString &newChrom = rec->getChrName(); ++ const string &newChrom = rec->getChrName(); + if (_currChrom != newChrom) { + + outPutLastRecordInPrevChrom(); +@@ -95,7 +95,7 @@ void ComplementFile::giveFinalReport(RecordOutputMgr *outputMgr) { + + void ComplementFile::outPutLastRecordInPrevChrom() + { +- const QuickString &chrom = _outRecord.getChrName(); ++ const string &chrom = _outRecord.getChrName(); + + //do nothing if triggered by first record in DB. At this point, + //there was no prev chrom, so nothing is stored in the output Record yet. +@@ -106,7 +106,7 @@ void ComplementFile::outPutLastRecordInPrevChrom() + printRecord(maxChromSize); + } + +-bool ComplementFile::fastForward(const QuickString &newChrom) { ++bool ComplementFile::fastForward(const string &newChrom) { + if (!newChrom.empty() && !_genomeFile->hasChrom(newChrom)) return false; + + int i= _currPosInGenomeList +1; +@@ -133,14 +133,14 @@ bool ComplementFile::fastForward(const QuickString &newChrom) { + void ComplementFile::printRecord(int endPos) + { + _outRecord.setStartPos(_currStartPos); +- QuickString startStr; +- startStr.append(_currStartPos); +- _outRecord.setStartPosStr(startStr); ++ stringstream startStr; ++ startStr << _currStartPos; ++ _outRecord.setStartPosStr(startStr.str()); + + _outRecord.setEndPos(endPos); +- QuickString endStr; +- endStr.append(endPos); +- _outRecord.setEndPosStr(endStr); ++ stringstream endStr; ++ endStr << endPos; ++ _outRecord.setEndPosStr(endStr.str()); + + _outputMgr->printRecord(&_outRecord); + _outputMgr->newline(); +diff --git a/src/complementFile/complementFile.h b/src/complementFile/complementFile.h +index 3382dbd..a90b6c2 100644 +--- a/src/complementFile/complementFile.h ++++ b/src/complementFile/complementFile.h +@@ -34,17 +34,17 @@ public: + protected: + FileRecordMergeMgr *_frm; + Bed3Interval _outRecord; +- QuickString _currChrom; ++ string _currChrom; + const NewGenomeFile *_genomeFile; + int _currStartPos; + RecordOutputMgr *_outputMgr; +- const vector<QuickString> &_chromList; ++ const vector<string> &_chromList; + int _currPosInGenomeList; + + virtual ContextComplement *upCast(ContextBase *context) { return static_cast<ContextComplement *>(context); } + + void outPutLastRecordInPrevChrom(); +- bool fastForward(const QuickString &newChrom); ++ bool fastForward(const string &newChrom); + void printRecord(int endPos); + + }; +diff --git a/src/coverageFile/coverageFile.cpp b/src/coverageFile/coverageFile.cpp +index 9473eeb..b01eda4 100644 +--- a/src/coverageFile/coverageFile.cpp ++++ b/src/coverageFile/coverageFile.cpp +@@ -6,6 +6,7 @@ + */ + + #include "coverageFile.h" ++#include <iomanip> + + CoverageFile::CoverageFile(ContextCoverage *context) + : IntersectFile(context), +@@ -13,6 +14,7 @@ CoverageFile::CoverageFile(ContextCoverage *context) + _depthArrayCapacity(0), + _queryLen(0), + _totalQueryLen(0), ++ _hitCount(0), + _queryOffset(0), + _floatValBuf(NULL) + { +@@ -34,40 +36,38 @@ CoverageFile::~CoverageFile() { + } + + +-void CoverageFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) { +- makeDepthCount(hits); +- _finalOutput.clear(); +- +- switch(upCast(_context)->getCoverageType()) { +- case ContextCoverage::COUNT: +- doCounts(outputMgr, hits); +- break; +- +- case ContextCoverage::PER_BASE: +- doPerBase(outputMgr, hits); +- break; +- +- case ContextCoverage::MEAN: +- doMean(outputMgr, hits); +- break; +- +- case ContextCoverage::HIST: +- doHist(outputMgr, hits); +- break; +- +- case ContextCoverage::DEFAULT: +- default: +- doDefault(outputMgr, hits); +- break; +- +- } +- ++void CoverageFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) ++{ ++ makeDepthCount(hits); ++ _finalOutput.clear(); ++ ++ switch(upCast(_context)->getCoverageType()) { ++ case ContextCoverage::COUNT: ++ doCounts(outputMgr, hits); ++ break; ++ ++ case ContextCoverage::PER_BASE: ++ doPerBase(outputMgr, hits); ++ break; ++ ++ case ContextCoverage::MEAN: ++ doMean(outputMgr, hits); ++ break; ++ ++ case ContextCoverage::HIST: ++ doHist(outputMgr, hits); ++ break; ++ ++ case ContextCoverage::DEFAULT: ++ default: ++ doDefault(outputMgr, hits); ++ break; ++ } + } + + void CoverageFile::cleanupHits(RecordKeyVector &hits) { + IntersectFile::cleanupHits(hits); + memset(_depthArray, 0, sizeof(size_t) * _queryLen); +- + } + + void CoverageFile::giveFinalReport(RecordOutputMgr *outputMgr) { +@@ -77,19 +77,25 @@ void CoverageFile::giveFinalReport(RecordOutputMgr *outputMgr) { + return; + } + ++ + for (depthMapType::iterator iter = _finalDepthMap.begin(); iter != _finalDepthMap.end(); iter++) { + size_t depth = iter->first; + size_t basesAtDepth = iter->second; ++ //cout << "x\n"; + float depthPct = (float)basesAtDepth / (float)_totalQueryLen; +- +- _finalOutput = "all\t"; +- _finalOutput.append(static_cast<uint32_t>(depth)); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(basesAtDepth)); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(_totalQueryLen)); +- _finalOutput.append("\t"); +- format(depthPct); ++ //cout << "y\n"; ++ ostringstream s; ++ s << "all\t"; ++ s << depth; ++ s << "\t"; ++ s << basesAtDepth; ++ s << "\t"; ++ s << _totalQueryLen; ++ s << "\t"; ++ char *depthPctString; ++ asprintf(&depthPctString, "%0.7f", depthPct); ++ s << depthPctString; ++ _finalOutput = s.str(); + + outputMgr->printRecord(NULL, _finalOutput); + } +@@ -101,24 +107,57 @@ void CoverageFile::makeDepthCount(RecordKeyVector &hits) { + _queryLen = (size_t)(key->getEndPos() - _queryOffset); + _totalQueryLen += _queryLen; + +- //resize depth array if needed ++ // resize depth array if needed + if (_depthArrayCapacity < _queryLen) { + _depthArray = (size_t*)realloc(_depthArray, sizeof(size_t) * _queryLen); + _depthArrayCapacity = _queryLen; + memset(_depthArray, 0, sizeof(size_t) * _depthArrayCapacity); + } +- +- //loop through hits, which may not be in sorted order, due to +- //potential multiple databases, and increment the depth array as needed. +- for (RecordKeyVector::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { +- const Record *dbRec = *iter; +- int dbStart = dbRec->getStartPos(); +- int dbEnd = dbRec->getEndPos(); +- int maxStart = max(_queryOffset, dbStart); +- int minEnd = min(dbEnd, key->getEndPos()); +- +- for (int i=maxStart; i < minEnd; i++) { +- _depthArray[i - _queryOffset]++; ++ _hitCount = 0; ++ // no -split ++ if (!(_context)->getObeySplits()) ++ { ++ //loop through hits, which may not be in sorted order, due to ++ //potential multiple databases, and increment the depth array as needed. ++ for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) ++ { ++ const Record *dbRec = *iter; ++ int dbStart = dbRec->getStartPos(); ++ int dbEnd = dbRec->getEndPos(); ++ int maxStart = max(_queryOffset, dbStart); ++ int minEnd = min(dbEnd, key->getEndPos()); ++ ++ for (int i=maxStart; i < minEnd; i++) { ++ _depthArray[i - _queryOffset]++; ++ } ++ _hitCount++; ++ } ++ } ++ // -split ++ else ++ { ++ for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { ++ const Record *dbRec = *iter; ++ bool count_hit = false; ++ for (size_t i = 0; i < dbRec->block_starts.size(); ++i) ++ { ++ int block_start = dbRec->block_starts[i]; ++ int block_end = dbRec->block_ends[i]; ++ int maxStart = max(_queryOffset, block_start); ++ int minEnd = min(block_end, key->getEndPos()); ++ if ((minEnd - maxStart) > 0) ++ { ++ for (int i = maxStart; i < minEnd; i++) ++ { ++ _depthArray[i - _queryOffset]++; ++ } ++ count_hit = true; ++ } ++ } ++ if (count_hit) ++ { ++ _hitCount++; ++ } + } + } + } +@@ -135,19 +174,23 @@ size_t CoverageFile::countBasesAtDepth(size_t depth) { + + void CoverageFile::doCounts(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + { +- _finalOutput = static_cast<uint32_t>(hits.size()); ++ ostringstream s; ++ s << _hitCount; ++ _finalOutput.append(s.str()); + outputMgr->printRecord(hits.getKey(), _finalOutput); + } + + void CoverageFile::doPerBase(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + { + //loop through all bases in query, printing full record and metrics for each +- const Record * queryRec = hits.getKey(); ++ ++ Record * queryRec = hits.getKey(); + for (size_t i= 0; i < _queryLen; i++) { +- _finalOutput = static_cast<uint32_t>(i+1); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(_depthArray[i])); +- ++ ostringstream s; ++ s << (i+1); ++ s << "\t"; ++ s << _depthArray[i]; ++ _finalOutput = s.str(); + outputMgr->printRecord(queryRec, _finalOutput); + } + } +@@ -158,7 +201,12 @@ void CoverageFile::doMean(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + for (size_t i= 0; i < _queryLen; i++) { + sum += _depthArray[i]; + } +- format((float)sum / (float)_queryLen); ++ ostringstream s; ++ float mean = ((float)sum / (float)_queryLen); ++ char *meanString; ++ asprintf(&meanString, "%0.7f", mean); ++ s << meanString; ++ _finalOutput.append(s.str()); + outputMgr->printRecord(hits.getKey(), _finalOutput); + } + +@@ -166,7 +214,6 @@ void CoverageFile::doMean(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + void CoverageFile::doHist(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + { + //make a map of depths to num bases with that depth +- + _currDepthMap.clear(); + for (size_t i=0; i < _queryLen; i++) { + _currDepthMap[_depthArray[i]]++; +@@ -176,40 +223,38 @@ void CoverageFile::doHist(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + for (depthMapType::iterator iter = _currDepthMap.begin(); iter != _currDepthMap.end(); iter++) { + size_t depth = iter->first; + size_t numBasesAtDepth = iter->second; +- float coveredBases = (float)numBasesAtDepth / (float)_queryLen; +- +- _finalOutput = static_cast<uint32_t>(depth); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(numBasesAtDepth)); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(_queryLen)); +- _finalOutput.append("\t"); +- format(coveredBases); +- ++ float coveredFraction = (float)numBasesAtDepth / (float)_queryLen; ++ ++ ostringstream s; ++ s << depth; ++ s << "\t"; ++ s << numBasesAtDepth; ++ s << "\t"; ++ s << _queryLen; ++ s << "\t"; ++ char *coveredFractionString; ++ asprintf(&coveredFractionString, "%0.7f", coveredFraction); ++ s << coveredFractionString; ++ _finalOutput = s.str(); + outputMgr->printRecord(hits.getKey(), _finalOutput); + } +- + } + + void CoverageFile::doDefault(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + { + size_t nonZeroBases = _queryLen - countBasesAtDepth(0); +- float coveredBases = (float)nonZeroBases / (float)_queryLen; +- +- _finalOutput = static_cast<uint32_t>(hits.size()); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(nonZeroBases)); +- _finalOutput.append("\t"); +- _finalOutput.append(static_cast<uint32_t>(_queryLen)); +- _finalOutput.append("\t"); +- format(coveredBases); +- ++ float coveredFraction = (float)nonZeroBases / (float)_queryLen; ++ ++ ostringstream s; ++ s << _hitCount; ++ s << "\t"; ++ s << nonZeroBases; ++ s << "\t"; ++ s << _queryLen; ++ s << "\t"; ++ char *coveredFractionString; ++ asprintf(&coveredFractionString, "%0.7f", coveredFraction); ++ s << coveredFractionString; ++ _finalOutput = s.str(); + outputMgr->printRecord(hits.getKey(), _finalOutput); + } +- +-void CoverageFile::format(float val) +-{ +- memset(_floatValBuf, 0, floatValBufLen); +- sprintf(_floatValBuf, "%0.7f", val); +- _finalOutput.append(_floatValBuf); +-} +diff --git a/src/coverageFile/coverageFile.h b/src/coverageFile/coverageFile.h +index fa2c662..691b74f 100644 +--- a/src/coverageFile/coverageFile.h ++++ b/src/coverageFile/coverageFile.h +@@ -8,6 +8,7 @@ + #ifndef COVERAGEFILE_H_ + #define COVERAGEFILE_H_ + ++#include <stdio.h> // for asprintf + #include "intersectFile.h" + #include "ContextCoverage.h" + +@@ -21,12 +22,13 @@ public: + + + protected: +- QuickString _finalOutput; ++ string _finalOutput; + + size_t *_depthArray; + size_t _depthArrayCapacity; + size_t _queryLen; + size_t _totalQueryLen; ++ size_t _hitCount; + int _queryOffset; + static const int DEFAULT_DEPTH_CAPACITY = 1024; + char *_floatValBuf; +@@ -47,9 +49,6 @@ protected: + void doMean(RecordOutputMgr *outputMgr, RecordKeyVector &hits); + void doHist(RecordOutputMgr *outputMgr, RecordKeyVector &hits); + void doDefault(RecordOutputMgr *outputMgr, RecordKeyVector &hits); +- +- void format(float val); +- + }; + + +diff --git a/src/fisher/fisher.cpp b/src/fisher/fisher.cpp +index 7548f18..583b467 100644 +--- a/src/fisher/fisher.cpp ++++ b/src/fisher/fisher.cpp +@@ -91,7 +91,7 @@ void Fisher::giveFinalReport(RecordOutputMgr *outputMgr) + unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList) + { + unsigned long intersection = 0; +- const Record *key = recList.getKey(); ++ Record *key = recList.getKey(); + int keyStart = key->getStartPos(); + int keyEnd = key->getEndPos(); + +@@ -99,7 +99,7 @@ unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList) + _qsizes.push_back((keyEnd - keyStart)); + + int hitIdx = 0; +- for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { ++ for (RecordKeyVector::iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { + int maxStart = max((*iter)->getStartPos(), keyStart); + int minEnd = min((*iter)->getEndPos(), keyEnd); + _qsizes.push_back((int)(minEnd - maxStart)); +diff --git a/src/groupBy/Makefile b/src/groupBy/Makefile +index 9bb141a..44cd7aa 100644 +--- a/src/groupBy/Makefile ++++ b/src/groupBy/Makefile +@@ -10,6 +10,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ + -I$(UTILITIES_DIR)/general/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ ++ -I$(UTILITIES_DIR)/stringUtilities/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/BamTools/include \ +diff --git a/src/groupBy/groupBy.cpp b/src/groupBy/groupBy.cpp +index 867f15f..1e1dbda 100644 +--- a/src/groupBy/groupBy.cpp ++++ b/src/groupBy/groupBy.cpp +@@ -7,6 +7,8 @@ + #include "groupBy.h" + #include "Tokenizer.h" + #include "ParseTools.h" ++#include "stringUtilities.h" ++#include <utility> + + GroupBy::GroupBy(ContextGroupBy *context) + : ToolBase(context), +@@ -29,8 +31,8 @@ bool GroupBy::init() + for (int i=0; i < numElems; i++) { + //if the item is a range, such as 3-5, + //must split that as well. +- const QuickString &elem = groupColsTokens.getElem(i); + ++ const string &elem = groupColsTokens.getElem(i); + if (strchr(elem.c_str(), '-')) { + Tokenizer rangeElems; + rangeElems.tokenize(elem, '-'); +@@ -59,14 +61,19 @@ bool GroupBy::findNext(RecordKeyVector &hits) + assignPrevFields(); + hits.setKey(_prevRecord); + hits.push_back(_prevRecord); //key should also be part of group for calculations +- while (1) { +- const Record *newRecord = getNextRecord(); +- if (newRecord == NULL) { ++ while (1) ++ { ++ Record *newRecord = getNextRecord(); ++ if (newRecord == NULL) ++ { + _prevRecord = NULL; + break; +- } else if (canGroup(newRecord)) { ++ } else if (canGroup(newRecord)) ++ { + hits.push_back(newRecord); +- } else { ++ } ++ else ++ { + _prevRecord = newRecord; + break; + } +@@ -77,15 +84,19 @@ bool GroupBy::findNext(RecordKeyVector &hits) + void GroupBy::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + { + +- const Record *rec = hits.getKey(); +- const QuickString &opVal = _context->getColumnOpsVal(hits); +- if (upCast(_context)->printFullCols()) { ++ Record *rec = hits.getKey(); ++ const string &opVal = _context->getColumnOpsVal(hits); ++ if (upCast(_context)->printFullCols()) ++ { + outputMgr->printRecord(rec, opVal); +- } else { +- QuickString outBuf; +- for (int i=0; i < (int)_groupCols.size(); i++) { ++ } ++ else ++ { ++ string outBuf; ++ for (int i = 0; i < (int)_groupCols.size(); i++) ++ { + outBuf.append(rec->getField(_groupCols[i])); +- outBuf.append('\t'); ++ outBuf.append("\t"); + } + outBuf.append(opVal); + outputMgr->printRecord(NULL, outBuf); +@@ -95,7 +106,7 @@ void GroupBy::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + + void GroupBy::cleanupHits(RecordKeyVector &hits) + { +- RecordKeyVector::const_iterator_type iter = hits.begin(); ++ RecordKeyVector::iterator_type iter = hits.begin(); + for (; iter != hits.end(); iter = hits.next()) + { + _queryFRM->deleteRecord(*iter); +@@ -103,12 +114,16 @@ void GroupBy::cleanupHits(RecordKeyVector &hits) + hits.clearAll(); + } + +-const Record *GroupBy::getNextRecord() { +- while (!_queryFRM->eof()) { ++Record *GroupBy::getNextRecord() { ++ while (!_queryFRM->eof()) ++ { + Record *queryRecord = _queryFRM->getNextRecord(); +- if (queryRecord == NULL) { ++ if (queryRecord == NULL) ++ { + continue; +- } else { ++ } ++ else ++ { + return queryRecord; + } + } +@@ -121,19 +136,22 @@ void GroupBy::assignPrevFields() { + } + } + +-bool GroupBy::canGroup(const Record *newRecord) { +- +- for (int i=0; i < (int)_groupCols.size(); i++) { ++bool GroupBy::canGroup(Record *newRecord) ++{ ++ for (int i = 0; i < (int)_groupCols.size(); i++) ++ { + int fieldNum = _groupCols[i]; +- const QuickString &newField = newRecord->getField(fieldNum); +- const QuickString &oldField = _prevFields[i]; +- if (upCast(_context)->ignoreCase()) { +- if (oldField.stricmp(newField)) return false; +- } else { ++ const string &newField = newRecord->getField(fieldNum); ++ const string &oldField = _prevFields[i]; ++ if (upCast(_context)->ignoreCase()) ++ { ++ if (toLower(oldField) != toLower(newField)) return false; ++ } ++ else ++ { + if (oldField != newField) return false; + } + } + return true; +- + } + +diff --git a/src/groupBy/groupBy.h b/src/groupBy/groupBy.h +index 2c96dc9..c44ba3c 100644 +--- a/src/groupBy/groupBy.h ++++ b/src/groupBy/groupBy.h +@@ -27,11 +27,11 @@ protected: + virtual ContextGroupBy *upCast(ContextBase *context) { return static_cast<ContextGroupBy *>(context); } + + vector<int> _groupCols; +- vector<QuickString> _prevFields; ++ vector<string> _prevFields; + FileRecordMgr *_queryFRM; +- const Record *_prevRecord; +- const Record *getNextRecord(); +- bool canGroup(const Record *); ++ Record *_prevRecord; ++ Record *getNextRecord(); ++ bool canGroup(Record *); + void assignPrevFields(); + }; + +diff --git a/src/intersectFile/intersectFile.cpp b/src/intersectFile/intersectFile.cpp +index 01bb222..45977fc 100644 +--- a/src/intersectFile/intersectFile.cpp ++++ b/src/intersectFile/intersectFile.cpp +@@ -67,6 +67,7 @@ bool IntersectFile::findNext(RecordKeyVector &hits) + + void IntersectFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) + { ++ RecordKeyVector::iterator_type hitListIter = hits.begin(); + outputMgr->printRecord(hits); + } + +@@ -120,19 +121,16 @@ void IntersectFile::makeSweep() { + void IntersectFile::checkSplits(RecordKeyVector &hitSet) + { + if (upCast(_context)->getObeySplits()) { +- RecordKeyVector keySet(hitSet.getKey()); +- RecordKeyVector resultSet(hitSet.getKey()); +- RecordKeyVector overlapSet(hitSet.getKey()); +- upCast(_context)->getSplitBlockInfo()->findBlockedOverlaps(keySet, hitSet, resultSet, overlapSet); +- ++ + // when using coverage, we need a list of the sub-intervals of coverage + // so that per-base depth can be properly calculated when obeying splits + if (_context->getProgram() == ContextBase::COVERAGE) + { +- hitSet.swap(overlapSet); ++ upCast(_context)->getSplitBlockInfo()->findBlockedOverlaps(hitSet, true); + } +- else { +- hitSet.swap(resultSet); ++ else ++ { ++ upCast(_context)->getSplitBlockInfo()->findBlockedOverlaps(hitSet, false); + } + } + } +diff --git a/src/intersectFile/intersectFile.h b/src/intersectFile/intersectFile.h +index f40e750..3c85f93 100644 +--- a/src/intersectFile/intersectFile.h ++++ b/src/intersectFile/intersectFile.h +@@ -26,11 +26,11 @@ public: + IntersectFile(ContextIntersect *context); + virtual ~IntersectFile(); + virtual bool init(); +- virtual bool findNext(RecordKeyVector &hits); +- virtual void processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits); +- virtual void cleanupHits(RecordKeyVector &hits); ++ virtual bool findNext(RecordKeyVector &); ++ virtual void processHits(RecordOutputMgr *, RecordKeyVector &); ++ virtual void cleanupHits(RecordKeyVector &); + virtual bool finalizeCalculations(); +- virtual void giveFinalReport(RecordOutputMgr *outputMgr) {} ++ virtual void giveFinalReport(RecordOutputMgr *) {} + + + protected: +diff --git a/src/jaccard/jaccard.cpp b/src/jaccard/jaccard.cpp +index c99c117..2f61b86 100644 +--- a/src/jaccard/jaccard.cpp ++++ b/src/jaccard/jaccard.cpp +@@ -57,13 +57,13 @@ void Jaccard::giveFinalReport(RecordOutputMgr *outputMgr) { + unsigned long Jaccard::getTotalIntersection(RecordKeyVector &hits) + { + unsigned long intersection = 0; +- const Record *key = hits.getKey(); ++ Record *key = hits.getKey(); + int keyStart = key->getStartPos(); + int keyEnd = key->getEndPos(); + + int hitIdx = 0; +- for (RecordKeyVector::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { +- const Record *currRec = *iter; ++ for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { ++ Record *currRec = *iter; + int maxStart = max(currRec->getStartPos(), keyStart); + int minEnd = min(currRec->getEndPos(), keyEnd); + if (_context->getObeySplits()) { +diff --git a/src/jaccard/jaccard.h b/src/jaccard/jaccard.h +index e23ad9c..8c4f6db 100644 +--- a/src/jaccard/jaccard.h ++++ b/src/jaccard/jaccard.h +@@ -15,11 +15,11 @@ class Jaccard : public IntersectFile { + + public: + Jaccard(ContextJaccard *context); +- virtual bool findNext(RecordKeyVector &hits); +- virtual void processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) {} +- virtual void cleanupHits(RecordKeyVector &hits); ++ virtual bool findNext(RecordKeyVector &); ++ virtual void processHits(RecordOutputMgr *, RecordKeyVector &) {} ++ virtual void cleanupHits(RecordKeyVector &); + virtual bool finalizeCalculations(); +- virtual void giveFinalReport(RecordOutputMgr *outputMgr); ++ virtual void giveFinalReport(RecordOutputMgr *); + + + protected: +diff --git a/src/nekSandbox1/nekSandboxMain.cpp b/src/nekSandbox1/nekSandboxMain.cpp +index d228540..d9b298a 100644 +--- a/src/nekSandbox1/nekSandboxMain.cpp ++++ b/src/nekSandbox1/nekSandboxMain.cpp +@@ -67,7 +67,7 @@ int nek_sandbox1_main(int argc,char** argv) + // printf("%s", sLine); + // } + // return 0; +-// QuickString filename(argv[1]); ++// string filename(argv[1]); + // istream *inputStream = NULL; + // if (filename == "-") { + // inputStream = &cin; +@@ -91,7 +91,7 @@ int nek_sandbox1_main(int argc,char** argv) + //// exit(1); + //// } + //// } +-// QuickString _bamHeader = _bamReader.GetHeaderText(); ++// string _bamHeader = _bamReader.GetHeaderText(); + // BamTools::RefVector _references = _bamReader.GetReferenceData(); + // + // if (_bamHeader.empty() || _references.empty()) { +@@ -107,10 +107,10 @@ int nek_sandbox1_main(int argc,char** argv) + // exit(1); + // } + // string sLine; +-// vector<QuickString> fields; +-// QuickString chrName; ++// vector<string> fields; ++// string chrName; + // +-// vector<QuickString> chroms; ++// vector<string> chroms; + // chroms.push_back("1"); + // chroms.push_back("2"); + // chroms.push_back("10"); +@@ -127,7 +127,7 @@ int nek_sandbox1_main(int argc,char** argv) + // continue; + // } + // Tokenize(sLine.c_str(), fields); +-// const QuickString &currChrom = fields[2]; ++// const string &currChrom = fields[2]; + // if (currChrom == chroms[chromIdx]) { + // cout << sLine << endl; + // chromCounts[chromIdx]++; +@@ -157,7 +157,7 @@ int nek_sandbox1_main(int argc,char** argv) + // cout << "RecordType is : " << frm.getRecordType() << ", " << frm.getRecordTypeName() << "." << endl; + // + // bool headerFound = false; +-// QuickString outbuf; ++// string outbuf; + // while (!frm.eof()) { + // Record *record = frm.getNextRecord(); + // if (!headerFound && frm.hasHeader()) { +diff --git a/src/regressTest/regressTestMain.cpp b/src/regressTest/regressTestMain.cpp +index 0377ca6..01a2816 100644 +--- a/src/regressTest/regressTestMain.cpp ++++ b/src/regressTest/regressTestMain.cpp +@@ -3,7 +3,7 @@ + #include <cstring> + #include <cstdlib> + #include <cstdio> +-#include "QuickString.h" ++#include "string.h" + + void usage() { + printf("Usage: bedtools regressTest sub-prog targetVersion configFile [optionsToTest]\n"); +@@ -31,7 +31,7 @@ int regress_test_main(int argc, char **argv) { + usage(); + exit(1); + } +- QuickString program(argv[2]); ++ string program(argv[2]); + + RegressTest *regressTest = new RegressTest(); + +diff --git a/src/shiftBed/shiftBed.cpp b/src/shiftBed/shiftBed.cpp +index 81022f8..50724ce 100644 +--- a/src/shiftBed/shiftBed.cpp ++++ b/src/shiftBed/shiftBed.cpp +@@ -51,7 +51,7 @@ void BedShift::AddShift(BED &bed) { + + CHRPOS chromSize = (CHRPOS)_genome->getChromSize(bed.chrom); + +- float shift; ++ double shift; + + if (bed.strand == "-") { + shift = _shiftMinus; +@@ -59,7 +59,7 @@ void BedShift::AddShift(BED &bed) { + shift = _shiftPlus; + } + if (_fractional == true) +- shift = shift * (float)bed.size(); ++ shift = shift * (double)bed.size(); + + if ((bed.start + shift) < 0) + bed.start = 0; +diff --git a/src/shuffleBed/shuffleBed.cpp b/src/shuffleBed/shuffleBed.cpp +index 9b71125..2478ccc 100644 +--- a/src/shuffleBed/shuffleBed.cpp ++++ b/src/shuffleBed/shuffleBed.cpp +@@ -73,14 +73,15 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, + _haveExclude = true; + } + +- if (_haveInclude) { ++ if (_haveInclude) ++ { + _include = new BedFile(includeFile); +- _include->loadBedFileIntoVector(); +- +- for(std::vector<BED>::iterator it = _include->bedList.begin(); ... 5743 lines suppressed ... -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bedtools.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
