This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch debian/unstable
in repository samtools.

commit 399b28378ebd22623ab4b7a48b519b1f3a1da02b
Author: Petr Danecek <[email protected]>
Date:   Thu Nov 28 11:23:43 2013 +0000

    New "flags" command plus moved filtering from htslib/pileup to BAM reading 
callbacks
---
 Makefile     |  3 ++-
 bam2depth.c  | 15 +++++++----
 bam_flags.c  | 43 +++++++++++++++++++++++++++++++
 bam_plcmd.c  | 83 ++++++++++++++++++++++++++++++++++--------------------------
 bamtk.c      |  5 +++-
 bedcov.c     | 13 +++++++---
 cut_target.c | 27 ++++++++++++--------
 phase.c      | 23 ++++++++++-------
 8 files changed, 146 insertions(+), 66 deletions(-)

diff --git a/Makefile b/Makefile
index bf5557f..e208ac5 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ AOBJS=                bam_index.o bam_plcmd.o sam_view.o \
                        bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o 
bam_color.o \
                        bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o 
sample.o \
                        cut_target.o phase.o bam2depth.o padding.o bedcov.o 
bamshuf.o \
-            faidx.o stats.o 
+            faidx.o stats.o bam_flags.o
             # tview todo: bam_tview.o bam_tview_curses.o bam_tview_html.o 
bam_lpileup.o
 INCLUDES=      -I. -I$(HTSDIR)
 LIBCURSES=     -lcurses # -lXCurses
@@ -121,6 +121,7 @@ bam_stat.o: bam_stat.c $(bam_h)
 bam_tview.o: bam_tview.c $(bam_tview_h)
 bam_tview_curses.o: bam_tview_curses.c $(bam_tview_h)
 bam_tview_html.o: bam_tview_html.c $(bam_tview_h)
+bam_flags.o: bam_flags.c $(sam_h)
 bamshuf.o: bamshuf.c $(htslib_sam_h) $(HTSDIR)/htslib/ksort.h
 bamtk.o: bamtk.c $(bam_h) version.h samtools.h
 bedcov.o: bedcov.c $(htslib_bgzf_h) $(bam_h) $(HTSDIR)/htslib/kseq.h
diff --git a/bam2depth.c b/bam2depth.c
index 6fd023a..c884741 100644
--- a/bam2depth.c
+++ b/bam2depth.c
@@ -24,11 +24,16 @@ int bed_overlap(const void *_h, const char *chr, int beg, 
int end); // test if c
 static int read_bam(void *data, bam1_t *b) // read level filters better go 
here to avoid pileup
 {
        aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary 
structure
-       int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : 
bam_read1(aux->fp, b);
-       if (!(b->core.flag&BAM_FUNMAP)) {
-               if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= 
BAM_FUNMAP;
-               else if (aux->min_len && bam_cigar2qlen(&b->core, 
bam1_cigar(b)) < aux->min_len) b->core.flag |= BAM_FUNMAP;
-       }
+    int ret;
+    while (1)
+    {
+        ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : 
bam_read1(aux->fp, b);
+        if ( ret<0 ) break;
+        if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | 
BAM_FDUP) ) continue;
+        if ( (int)b->core.qual < aux->min_mapQ ) continue;
+        if ( aux->min_len && bam_cigar2qlen(&b->core, bam1_cigar(b)) < 
aux->min_len ) continue;
+        break;
+    }
        return ret;
 }
 
diff --git a/bam_flags.c b/bam_flags.c
new file mode 100644
index 0000000..f2d0be3
--- /dev/null
+++ b/bam_flags.c
@@ -0,0 +1,43 @@
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <htslib/sam.h>
+
+static void usage(void)
+{
+    fprintf(stderr, "\n");
+    fprintf(stderr, "About: Convert between textual and numeric flag 
representation\n");
+    fprintf(stderr, "Usage: samtools flags [INT|STR]\n");
+    fprintf(stderr, "Flags:\n");
+    fprintf(stderr, "\t0x%x\tPAIRED        .. paired-end (or multiple-segment) 
sequencing technology\n", BAM_FPAIRED);
+    fprintf(stderr, "\t0x%x\tPROPER_PAIR   .. each segment properly aligned 
according to the alig\n", BAM_FPROPER_PAIR);
+    fprintf(stderr, "\t0x%x\tUNMAP         .. segment unmapped\n", BAM_FUNMAP);
+    fprintf(stderr, "\t0x%x\tMUNMAP        .. next segment in the template 
unmapped\n", BAM_FMUNMAP);
+    fprintf(stderr, "\t0x%x\tREVERSE       .. SEQ is reverse complemented\n", 
BAM_FREVERSE);
+    fprintf(stderr, "\t0x%x\tMREVERSE      .. SEQ of the next segment in the 
template is reversed\n", BAM_FMREVERSE);
+    fprintf(stderr, "\t0x%x\tREAD1         .. the first segment in the 
template\n", BAM_FREAD1);
+    fprintf(stderr, "\t0x%x\tREAD2         .. the last segment in the 
template\n", BAM_FREAD2);
+    fprintf(stderr, "\t0x%x\tSECONDARY     .. secondary alignment\n", 
BAM_FSECONDARY);
+    fprintf(stderr, "\t0x%x\tQCFAIL        .. not passing quality controls\n", 
BAM_FQCFAIL);
+    fprintf(stderr, "\t0x%x\tDUP           .. PCR or optical duplicate\n", 
BAM_FDUP);
+    fprintf(stderr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", 
BAM_FSUPPLEMENTARY);
+    fprintf(stderr, "\n");
+}
+
+
+int main_flags(int argc, char *argv[])
+{
+    if ( argc!=2 ) usage();
+    else
+    {
+        int mask = bam_str2flag(argv[1]);
+        if ( mask<0 ) { fprintf(stderr,"Could not parse \"%s\"\n", argv[1]); 
return 1; }
+        printf("0x%x\t%s\n", mask, bam_flag2str(mask));
+    }
+       return 0;
+}
+
diff --git a/bam_plcmd.c b/bam_plcmd.c
index eb56253..efdf4ab 100644
--- a/bam_plcmd.c
+++ b/bam_plcmd.c
@@ -592,6 +592,7 @@ int bam_mpileup(int argc, char *argv[])
        mplp.min_frac = 0.002; mplp.min_support = 1;
        mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
     mplp.argc = argc; mplp.argv = argv;
+    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
     static struct option lopts[] = 
     {
         {"rf",1,0,1},   // require flag
@@ -601,8 +602,14 @@ int bam_mpileup(int argc, char *argv[])
        while ((c = getopt_long(argc, argv, 
"Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsV1:2:vx",lopts,NULL)) >= 0) {
                switch (c) {
         case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
-        case  1 : mplp.rflag_require = strtol(optarg,0,0); break;
-        case  2 : mplp.rflag_filter  = strtol(optarg,0,0); break;
+        case  1 : 
+            mplp.rflag_require = bam_str2flag(optarg); 
+            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf 
%s\n", optarg); return 1; }
+            break;
+        case  2 : 
+            mplp.rflag_filter = bam_str2flag(optarg); 
+            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff 
%s\n", optarg); return 1; }
+            break;
                case 'f':
                        mplp.fai = fai_load(optarg);
                        if (mplp.fai == 0) return 1;
@@ -657,47 +664,51 @@ int bam_mpileup(int argc, char *argv[])
                }
        }
        if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
-       if (argc == 1) {
+       if (argc == 1) 
+    {
+        char *tmp_require = bam_flag2str(mplp.rflag_require);
+        char *tmp_filter  = bam_flag2str(mplp.rflag_filter);
                fprintf(stderr, "\n");
                fprintf(stderr, "Usage: samtools mpileup [options] in1.bam 
[in2.bam [...]]\n\n");
                fprintf(stderr, "Input options:\n\n");
-               fprintf(stderr, "       -6           assume the quality is in 
the Illumina-1.3+ encoding\n");
-               fprintf(stderr, "       -A           count anomalous read 
pairs\n");
-               fprintf(stderr, "       -B           disable BAQ 
computation\n");
-               fprintf(stderr, "       -b FILE      list of input BAM 
filenames, one per line [null]\n");
-               fprintf(stderr, "       -C INT       parameter for adjusting 
mapQ; 0 to disable [0]\n");
-               fprintf(stderr, "       -d INT       max per-BAM depth to avoid 
excessive memory usage [%d]\n", mplp.max_depth);
-               fprintf(stderr, "       -E           recalculate extended BAQ 
on the fly thus ignoring existing BQs\n");
-               fprintf(stderr, "       -f FILE      faidx indexed reference 
sequence file [null]\n");
-               fprintf(stderr, "       -G FILE      exclude read groups listed 
in FILE [null]\n");
-               fprintf(stderr, "       -l FILE      list of positions (chr 
pos) or regions (BED) [null]\n");
-               fprintf(stderr, "       -M INT       cap mapping quality at INT 
[%d]\n", mplp.max_mq);
-               fprintf(stderr, "       -r STR       region in which pileup is 
generated [null]\n");
-               fprintf(stderr, "       -R           ignore RG tags\n");
-               fprintf(stderr, "       -q INT       skip alignments with mapQ 
smaller than INT [%d]\n", mplp.min_mq);
-               fprintf(stderr, "       -Q INT       skip bases with baseQ/BAQ 
smaller than INT [%d]\n", mplp.min_baseQ);
-               fprintf(stderr, "       --rf INT     required flags: skip reads 
with mask bits unset []\n");
-               fprintf(stderr, "       --ff INT     filter flags: skip reads 
with mask bits set []\n");
-               fprintf(stderr, "       -x           disable read-pair overlap 
detection\n");
+               fprintf(stderr, "       -6              assume the quality is 
in the Illumina-1.3+ encoding\n");
+               fprintf(stderr, "       -A              count anomalous read 
pairs\n");
+               fprintf(stderr, "       -B              disable BAQ 
computation\n");
+               fprintf(stderr, "       -b FILE         list of input BAM 
filenames, one per line [null]\n");
+               fprintf(stderr, "       -C INT          parameter for adjusting 
mapQ; 0 to disable [0]\n");
+               fprintf(stderr, "       -d INT          max per-BAM depth to 
avoid excessive memory usage [%d]\n", mplp.max_depth);
+               fprintf(stderr, "       -E              recalculate extended 
BAQ on the fly thus ignoring existing BQs\n");
+               fprintf(stderr, "       -f FILE         faidx indexed reference 
sequence file [null]\n");
+               fprintf(stderr, "       -G FILE         exclude read groups 
listed in FILE [null]\n");
+               fprintf(stderr, "       -l FILE         list of positions (chr 
pos) or regions (BED) [null]\n");
+               fprintf(stderr, "       -M INT          cap mapping quality at 
INT [%d]\n", mplp.max_mq);
+               fprintf(stderr, "       -r STR          region in which pileup 
is generated [null]\n");
+               fprintf(stderr, "       -R              ignore RG tags\n");
+               fprintf(stderr, "       -q INT          skip alignments with 
mapQ smaller than INT [%d]\n", mplp.min_mq);
+               fprintf(stderr, "       -Q INT          skip bases with 
baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ);
+               fprintf(stderr, "       --rf STR|INT    required flags: skip 
reads with mask bits unset [%s]\n", tmp_require);
+               fprintf(stderr, "       --ff STR|INT    filter flags: skip 
reads with mask bits set [%s]\n", tmp_filter);
+               fprintf(stderr, "       -x              disable read-pair 
overlap detection\n");
                fprintf(stderr, "\nOutput options:\n\n");
-               fprintf(stderr, "       -D/V         output per-sample DP/DV in 
BCF (requires -g/-v)\n");
-               fprintf(stderr, "       -g/v         generate BCF/VCF output 
(genotype likelihoods)\n");
-               fprintf(stderr, "       -O           output base positions on 
reads (disabled by -g/-u)\n");
-               fprintf(stderr, "       -s           output mapping quality 
(disabled by -g/-u)\n");
-               fprintf(stderr, "       -S           output per-sample strand 
bias P-value in BCF (require -g/-u)\n");
-               fprintf(stderr, "       -u           generate uncompressed 
BCF/VCF output\n");
+               fprintf(stderr, "       -D/V            output per-sample DP/DV 
in BCF (requires -g/-v)\n");
+               fprintf(stderr, "       -g/v            generate BCF/VCF output 
(genotype likelihoods)\n");
+               fprintf(stderr, "       -O              output base positions 
on reads (disabled by -g/-u)\n");
+               fprintf(stderr, "       -s              output mapping quality 
(disabled by -g/-u)\n");
+               fprintf(stderr, "       -S              output per-sample 
strand bias P-value in BCF (require -g/-u)\n");
+               fprintf(stderr, "       -u              generate uncompressed 
BCF/VCF output\n");
                fprintf(stderr, "\nSNP/INDEL genotype likelihoods options 
(effective with `-g' or `-u'):\n\n");
-               fprintf(stderr, "       -e INT       Phred-scaled gap extension 
seq error probability [%d]\n", mplp.extQ);
-               fprintf(stderr, "       -F FLOAT     minimum fraction of gapped 
reads for candidates [%g]\n", mplp.min_frac);
-               fprintf(stderr, "       -h INT       coefficient for 
homopolymer errors [%d]\n", mplp.tandemQ);
-               fprintf(stderr, "       -I           do not perform indel 
calling\n");
-               fprintf(stderr, "       -L INT       max per-sample depth for 
INDEL calling [%d]\n", mplp.max_indel_depth);
-               fprintf(stderr, "       -m INT       minimum gapped reads for 
indel candidates [%d]\n", mplp.min_support);
-               fprintf(stderr, "       -o INT       Phred-scaled gap open 
sequencing error probability [%d]\n", mplp.openQ);
-               fprintf(stderr, "       -p           apply -m and -F per-sample 
to increase sensitivity\n");
-               fprintf(stderr, "       -P STR       comma separated list of 
platforms for indels [all]\n");
+               fprintf(stderr, "       -e INT          Phred-scaled gap 
extension seq error probability [%d]\n", mplp.extQ);
+               fprintf(stderr, "       -F FLOAT        minimum fraction of 
gapped reads for candidates [%g]\n", mplp.min_frac);
+               fprintf(stderr, "       -h INT          coefficient for 
homopolymer errors [%d]\n", mplp.tandemQ);
+               fprintf(stderr, "       -I              do not perform indel 
calling\n");
+               fprintf(stderr, "       -L INT          max per-sample depth 
for INDEL calling [%d]\n", mplp.max_indel_depth);
+               fprintf(stderr, "       -m INT          minimum gapped reads 
for indel candidates [%d]\n", mplp.min_support);
+               fprintf(stderr, "       -o INT          Phred-scaled gap open 
sequencing error probability [%d]\n", mplp.openQ);
+               fprintf(stderr, "       -p              apply -m and -F 
per-sample to increase sensitivity\n");
+               fprintf(stderr, "       -P STR          comma separated list of 
platforms for indels [all]\n");
                fprintf(stderr, "\n");
                fprintf(stderr, "Notes: Assuming diploid individuals.\n\n");
+        free(tmp_require); free(tmp_filter);
                return 1;
        }
     int ret;
diff --git a/bamtk.c b/bamtk.c
index cdf69a0..9c0dbe7 100644
--- a/bamtk.c
+++ b/bamtk.c
@@ -29,6 +29,7 @@ int main_pad2unpad(int argc, char *argv[]);
 int main_bedcov(int argc, char *argv[]);
 int main_bamshuf(int argc, char *argv[]);
 int main_stats(int argc, char *argv[]);
+int main_flags(int argc, char *argv[]);
 
 int faidx_main(int argc, char *argv[]);
 
@@ -71,7 +72,8 @@ static void usage(FILE *fp)
 "         phase       phase heterozygotes\n"
 "         stats       generate stats (former bamcheck)\n"
 "  -- viewing\n"
-"         tview       text alignment viewer (todo)\n"
+"         flags       explain BAM flags\n"
+"         tview       text alignment viewer\n"
 "         view        SAM<->BAM conversion\n"
 //"         depad       convert padded BAM to unpadded BAM\n" // not stable
 "\n");
@@ -127,6 +129,7 @@ int main(int argc, char *argv[])
        else if (strcmp(argv[1], "bedcov") == 0)    ret = main_bedcov(argc-1, 
argv+1);
        else if (strcmp(argv[1], "bamshuf") == 0)   ret = main_bamshuf(argc-1, 
argv+1);
        else if (strcmp(argv[1], "stats") == 0)     ret = main_stats(argc-1, 
argv+1);
+       else if (strcmp(argv[1], "flags") == 0)     ret = main_flags(argc-1, 
argv+1);
        else if (strcmp(argv[1], "pileup") == 0) {
                fprintf(stderr, "[main] The `pileup' command has been removed. 
Please use `mpileup' instead.\n");
                return 1;
diff --git a/bedcov.c b/bedcov.c
index 5fb9b37..8c143c1 100644
--- a/bedcov.c
+++ b/bedcov.c
@@ -19,9 +19,16 @@ typedef struct {
 
 static int read_bam(void *data, bam1_t *b)
 {
-       aux_t *aux = (aux_t*)data;
-       int ret = bam_iter_read(aux->fp, aux->iter, b);
-       if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP;
+       aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary 
structure
+    int ret;
+    while (1)
+    {
+        ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : 
bam_read1(aux->fp, b);
+        if ( ret<0 ) break;
+        if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | 
BAM_FDUP) ) continue;
+        if ( (int)b->core.qual < aux->min_mapQ ) continue;
+        break;
+    }
        return ret;
 }
 
diff --git a/cut_target.c b/cut_target.c
index 8c56f33..cd266a5 100644
--- a/cut_target.c
+++ b/cut_target.c
@@ -121,21 +121,27 @@ static int read_aln(void *data, bam1_t *b)
        extern int bam_prob_realn_core(bam1_t *b, const char *ref, int flag);
        ct_t *g = (ct_t*)data;
        int ret, len;
-       ret = bam_read1(g->fp, b);
-       if (ret >= 0 && g->fai && b->core.tid >= 0 && (b->core.flag&4) == 0) {
-               if (b->core.tid != g->tid) { // then load the sequence
-                       free(g->ref);
-                       g->ref = fai_fetch(g->fai, 
g->h->target_name[b->core.tid], &len);
-                       g->tid = b->core.tid;
-               }
-               bam_prob_realn_core(b, g->ref, 1<<1|1);
-       }
+    while (1)
+    {
+        ret = bam_read1(g->fp, b);
+        if ( ret<0 ) break;
+        if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | 
BAM_FDUP) ) continue;
+        if ( g->fai && b->core.tid >= 0 ) {
+            if (b->core.tid != g->tid) { // then load the sequence
+                free(g->ref);
+                g->ref = fai_fetch(g->fai, g->h->target_name[b->core.tid], 
&len);
+                g->tid = b->core.tid;
+            }
+            bam_prob_realn_core(b, g->ref, 1<<1|1);
+        }
+        break;
+    }
        return ret;
 }
 
 int main_cut_target(int argc, char *argv[])
 {
-       int c, tid, pos, n, lasttid = -1, lastpos = -1, l, max_l;
+       int c, tid, pos, n, lasttid = -1, l, max_l;
        const bam_pileup1_t *p;
        bam_plp_t plp;
        uint16_t *cns;
@@ -178,7 +184,6 @@ int main_cut_target(int argc, char *argv[])
                        lasttid = tid;
                }
                cns[pos] = gencns(&g, n, p);
-               lastpos = pos;
        }
        process_cns(g.h, lasttid, l, cns);
        free(cns);
diff --git a/phase.c b/phase.c
index bf8d6d3..b086b5f 100644
--- a/phase.c
+++ b/phase.c
@@ -450,15 +450,20 @@ static int readaln(void *data, bam1_t *b)
 {
        phaseg_t *g = (phaseg_t*)data;
        int ret;
-       ret = bam_read1(g->fp, b);
-       if (ret < 0) return ret;
-       if (!(b->core.flag & (BAM_FUNMAP|BAM_FSECONDARY|BAM_FQCFAIL|BAM_FDUP)) 
&& g->pre) {
-               if (g->n == g->m) {
-                       g->m = g->m? g->m<<1 : 16;
-                       g->b = realloc(g->b, g->m * sizeof(bam1_t*));
-               }
-               g->b[g->n++] = bam_dup1(b);
-       }
+    while (1)
+    {
+        ret = bam_read1(g->fp, b);
+        if (ret < 0) break;
+        if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | 
BAM_FDUP) ) continue;
+        if ( g->pre ) {
+            if (g->n == g->m) {
+                g->m = g->m? g->m<<1 : 16;
+                g->b = realloc(g->b, g->m * sizeof(bam1_t*));
+            }
+            g->b[g->n++] = bam_dup1(b);
+        }
+        break;
+    }
        return ret;
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/samtools.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

Reply via email to