This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository bwa.
commit b93d335740137413db21e8136ce17f6c1ec9c0bc Author: Andreas Tille <[email protected]> Date: Tue Mar 18 13:47:03 2014 +0100 Imported Upstream version 0.7.7 --- NEWS | 17 +++++++++++++++++ README.md | 3 ++- bwa.1 | 2 +- bwa.c | 19 ++++++++++++------- bwamem.c | 4 ++++ fastmap.c | 4 +++- main.c | 22 +++++++++++++++++++++- 7 files changed, 60 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index eb9c37a..a7c64ed 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,20 @@ +Release 0.7.7 (25 Feburary, 2014) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This release fixes incorrect MD tags in the BWA-MEM output. + +A note about short-read mapping to GRCh38. The new human reference genome +GRCh38 contains 60Mbp program generated alpha repeat arrays, some of which are +hard masked as they cannot be localized. These highly repetitive arrays make +BWA-MEM ~50% slower. If you are concerned with the performance of BWA-MEM, you +may consider to use option "-c2000 -m50". On simulated data, this setting helps +the performance at a very minor cost on accuracy. I may consider to change the +default in future releases. + +(0.7.7: 25 Feburary 2014, r441) + + + Release 0.7.6 (31 Januaray, 2014) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/README.md b/README.md index 009a4ca..ac1e57e 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ different sub-commands: **aln/samse/sampe** for BWA-backtrack, ###Availability BWA is released under [GPLv3][1]. The latest souce code is [freely -available][2] at github. Released packages can [be downloaded ][3] at +available][2] at github. Released packages can [be downloaded][3] at SourceForge. After you acquire the source code, simply use `make` to compile and copy the single executable `bwa` to the destination you want. The only dependency of BWA is [zlib][14]. @@ -73,3 +73,4 @@ do not have plan to submit it to a peer-reviewed journal in the near future. [12]: http://arxiv.org/abs/1303.3997 [13]: http://arxiv.org/ [14]: http://zlib.net/ +[15]: https://github.com/lh3/bwa/tree/mem diff --git a/bwa.1 b/bwa.1 index 5949a1b..601a529 100644 --- a/bwa.1 +++ b/bwa.1 @@ -1,4 +1,4 @@ -.TH bwa 1 "31 January 2014" "bwa-0.7.6" "Bioinformatics tools" +.TH bwa 1 "25 Feburary 2014" "bwa-0.7.7" "Bioinformatics tools" .SH NAME .PP bwa - Burrows-Wheeler Alignment Tool diff --git a/bwa.c b/bwa.c index aec04d8..140d57e 100644 --- a/bwa.c +++ b/bwa.c @@ -93,6 +93,7 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa int i; int64_t rlen; kstring_t str; + const char *int2base; *n_cigar = 0; *NM = -1; if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand @@ -124,9 +125,10 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa // NW alignment *score = ksw_global(l_query, query, rlen, rseq, 5, mat, q, r, w, n_cigar, &cigar); } - {// compute NM + {// compute NM and MD int k, x, y, u, n_mm = 0, n_gap = 0; str.l = str.m = *n_cigar * 4; str.s = (char*)cigar; // append MD to CIGAR + int2base = rb < l_pac? "ACGTN" : "TGCAN"; for (k = 0, x = y = u = 0; k < *n_cigar; ++k) { int op, len; cigar = (uint32_t*)str.s; @@ -134,17 +136,20 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa if (op == 0) { // match for (i = 0; i < len; ++i) { if (query[x + i] != rseq[y + i]) { - kputw(u, &str); kputc("ACGTN"[rseq[y+i]], &str); + kputw(u, &str); + kputc(int2base[rseq[y+i]], &str); ++n_mm; u = 0; } else ++u; } x += len; y += len; } else if (op == 2) { // deletion - kputw(u, &str); kputc('^', &str); - for (i = 0; i < len; ++i) - kputc("ACGTN"[rseq[y+i]], &str); - u = 0; - y += len, n_gap += len; + if (k > 0 && k < *n_cigar - 1) { // don't do the following if D is the first or the last CIGAR + kputw(u, &str); kputc('^', &str); + for (i = 0; i < len; ++i) + kputc(int2base[rseq[y+i]], &str); + u = 0; n_gap += len; + } + y += len; } else if (op == 1) x += len, n_gap += len; // insertion } kputw(u, &str); kputc(0, &str); diff --git a/bwamem.c b/bwamem.c index 6f77064..19ca561 100644 --- a/bwamem.c +++ b/bwamem.c @@ -1024,7 +1024,9 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn worker_t w; mem_alnreg_v *regs; mem_pestat_t pes[4]; + double ctime, rtime; + ctime = cputime(); rtime = realtime(); regs = malloc(n * sizeof(mem_alnreg_v)); w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac; w.seqs = seqs; w.regs = regs; w.n_processed = n_processed; @@ -1036,4 +1038,6 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn } kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment free(regs); + if (bwa_verbose >= 3) + fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime); } diff --git a/fastmap.c b/fastmap.c index 40cea8c..72d850c 100644 --- a/fastmap.c +++ b/fastmap.c @@ -30,7 +30,7 @@ int main_mem(int argc, char *argv[]) int64_t n_processed = 0; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:")) >= 0) { + while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); else if (c == 'w') opt->w = atoi(optarg); else if (c == 'A') opt->a = atoi(optarg); @@ -50,6 +50,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'v') bwa_verbose = atoi(optarg); else if (c == 'r') opt->split_factor = atof(optarg); else if (c == 'D') opt->chain_drop_ratio = atof(optarg); + else if (c == 'm') opt->max_matesw = atoi(optarg); else if (c == 'C') copy_comment = 1; else if (c == 'Q') { opt->mapQ_coef_len = atoi(optarg); @@ -77,6 +78,7 @@ int main_mem(int argc, char *argv[]) // fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->chain_drop_ratio); + fprintf(stderr, " -m INT perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw); fprintf(stderr, " -S skip mate rescue\n"); fprintf(stderr, " -P skip pairing; mate rescue performed unless -S also in use\n"); fprintf(stderr, " -A INT score for a sequence match [%d]\n", opt->a); diff --git a/main.c b/main.c index f872917..a8df9c0 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.6a-r433" +#define PACKAGE_VERSION "0.7.7-r441" #endif int bwa_fa2pac(int argc, char *argv[]); @@ -14,9 +14,17 @@ int bwa_bwt2sa(int argc, char *argv[]); int bwa_index(int argc, char *argv[]); int bwt_bwtgen_main(int argc, char *argv[]); +int bwa_aln(int argc, char *argv[]); +int bwa_sai2sam_se(int argc, char *argv[]); +int bwa_sai2sam_pe(int argc, char *argv[]); + +int bwa_bwtsw2(int argc, char *argv[]); + int main_fastmap(int argc, char *argv[]); int main_mem(int argc, char *argv[]); +int main_pemerge(int argc, char *argv[]); + char *bwa_pg; static int usage() @@ -29,6 +37,11 @@ static int usage() fprintf(stderr, "Command: index index sequences in the FASTA format\n"); fprintf(stderr, " mem BWA-MEM algorithm\n"); fprintf(stderr, " fastmap identify super-maximal exact matches\n"); + fprintf(stderr, " pemerge merge overlapping paired ends (EXPERIMENTAL)\n"); + fprintf(stderr, " aln gapped/ungapped alignment\n"); + fprintf(stderr, " samse generate alignment (single ended)\n"); + fprintf(stderr, " sampe generate alignment (paired ended)\n"); + fprintf(stderr, " bwasw BWA-SW for long queries\n"); fprintf(stderr, "\n"); fprintf(stderr, " fa2pac convert FASTA to PAC format\n"); fprintf(stderr, " pac2bwt generate BWT from PAC\n"); @@ -60,8 +73,15 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1); else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1); + else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1); + else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1); + else if (strcmp(argv[1], "sampe") == 0) ret = bwa_sai2sam_pe(argc-1, argv+1); + else if (strcmp(argv[1], "bwtsw2") == 0) ret = bwa_bwtsw2(argc-1, argv+1); + else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); + else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1); else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1); + else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1); else { fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bwa.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
