Changeset: b736e8998988 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b736e8998988
Modified Files:
        sql/backends/monet5/bam/bam_lib.c
        sql/backends/monet5/bam/bam_lib.h
Branch: default
Log Message:

Backport the bam_lib functions
and make it more robust against exceptions.
More work needed to get the speed.


diffs (247 lines):

diff --git a/sql/backends/monet5/bam/bam_lib.c 
b/sql/backends/monet5/bam/bam_lib.c
--- a/sql/backends/monet5/bam/bam_lib.c
+++ b/sql/backends/monet5/bam/bam_lib.c
@@ -54,6 +54,7 @@ bam_flag(bit * ret, sht * flag, str * na
        return MAL_SUCCEED;
 }
 
+// use a simple lookup table for these mappings
 str
 reverse_seq(str * ret, str * seq)
 {
@@ -146,7 +147,8 @@ seq_length(int *ret, str * cigar)
        int result = 0;
        str cigar_consumable = *cigar;
 
-       if (cigar_consumable[0] == '*' && cigar_consumable[1] == '\0') {
+       if (cigar_consumable[0] == '\0' || 
+                       (cigar_consumable[0] == '*' && cigar_consumable[1] == 
'\0')) {
                *ret = -1;
                return MAL_SUCCEED;
        }
@@ -169,6 +171,66 @@ seq_length(int *ret, str * cigar)
        return MAL_SUCCEED;
 }
 
+str
+seq_char(str * ret, int * ref_pos, str * alg_seq, int * alg_pos, str * 
alg_cigar) 
+{
+       str cigar_consumable = *alg_cigar;
+       int seq_pos = -1;
+       int cur_ref_pos = *alg_pos - 1;
+       
+       if (cigar_consumable[0] == '\0' || 
+                       (cigar_consumable[0] == '*' && cigar_consumable[1] == 
'\0')) {
+               *ret = GDKstrdup(str_nil);
+               return MAL_SUCCEED;
+       }
+       while(TRUE) {
+               int cnt;
+               char op;
+               int nr_chars_read;
+               bit advance_ref_pos;
+               bit advance_seq_pos;
+
+               if (sscanf
+                       (cigar_consumable, "%d%c%n", &cnt, &op,
+                        &nr_chars_read) != 2)
+                       throw(MAL, "seq_char",
+                                 "Error parsing CIGAR string '%s'\n", 
*alg_cigar);
+               advance_ref_pos = (op == 'M' || op == 'D' || 
+                       op == 'N' || op == '=' || op == 'X');
+               advance_seq_pos = (op == 'M' || op == 'I'); // TODO: Find out 
which chars advance the seq pos
+               if(advance_seq_pos) {
+                       seq_pos += cnt;
+               }
+               if (advance_ref_pos) {
+                       cur_ref_pos += cnt;
+                       if(cur_ref_pos >= *ref_pos) {
+                               if(!advance_seq_pos) {
+                                       seq_pos = -1;
+                               } else {
+                                       seq_pos -= (cur_ref_pos - *ref_pos);
+                               }
+                               break;
+                       }
+               }
+               cigar_consumable += nr_chars_read;
+               if(cigar_consumable[0] == '\0') {
+                       seq_pos = -1;
+                       break;
+               }
+       }
+       if(seq_pos < 0 || seq_pos >= (int)strlen(*alg_seq)) {
+               *ret = GDKstrdup(str_nil);
+               return MAL_SUCCEED;
+       }
+       if(((*ret) = GDKmalloc(2 * sizeof(char))) == NULL) {
+               throw(MAL, "seq_char", MAL_MALLOC_FAIL);
+       }
+       (*ret)[0] = (*alg_seq)[seq_pos];
+       (*ret)[1] = '\0';
+       return MAL_SUCCEED;
+}
+
+
 
 
 str
@@ -192,6 +254,7 @@ bam_flag_bat(bat * ret, bat * bid, str *
        /* allocate result BAT */
        result = BATnew(TYPE_void, TYPE_bit, BATcount(flags), TRANSIENT);
        if (result == NULL) {
+               BBPreleaseref(flags->batCacheid);
                throw(MAL, "bam_flag_bat", MAL_MALLOC_FAIL);
        }
        BATseqbase(result, flags->hseqbase);
@@ -228,6 +291,7 @@ reverse_seq_bat(bat * ret, bat * bid)
        /* allocate result BAT */
        result = BATnew(TYPE_void, TYPE_str, BATcount(seqs), TRANSIENT);
        if (result == NULL) {
+               BBPreleaseref(seqs->batCacheid);
                throw(MAL, "reverse_seq_bat", MAL_MALLOC_FAIL);
        }
        BATseqbase(result, seqs->hseqbase);
@@ -240,6 +304,7 @@ reverse_seq_bat(bat * ret, bat * bid)
 
                if ((msg = reverse_seq(&r, &t)) != MAL_SUCCEED) {
                        BBPreleaseref(result->batCacheid);
+                       BBPreleaseref(seqs->batCacheid);
                        return msg;
                }
                BUNappend(result, (ptr) r, FALSE);
@@ -269,6 +334,7 @@ reverse_qual_bat(bat * ret, bat * bid)
        /* allocate result BAT */
        result = BATnew(TYPE_void, TYPE_str, BATcount(quals), TRANSIENT);
        if (result == NULL) {
+               BBPreleaseref(quals->batCacheid);
                throw(MAL, "reverse_qual_bat", MAL_MALLOC_FAIL);
        }
        BATseqbase(result, quals->hseqbase);
@@ -280,6 +346,7 @@ reverse_qual_bat(bat * ret, bat * bid)
                str r, msg;
 
                if ((msg = reverse_qual(&r, &t)) != MAL_SUCCEED) {
+                       BBPreleaseref(quals->batCacheid);
                        BBPreleaseref(result->batCacheid);
                        return msg;
                }
@@ -310,6 +377,7 @@ seq_length_bat(bat * ret, bat * bid)
        /* allocate result BAT */
        result = BATnew(TYPE_void, TYPE_int, BATcount(cigars), TRANSIENT);
        if (result == NULL) {
+               BBPreleaseref(cigars->batCacheid);
                throw(MAL, "seq_length_bat", MAL_MALLOC_FAIL);
        }
        BATseqbase(result, cigars->hseqbase);
@@ -323,6 +391,7 @@ seq_length_bat(bat * ret, bat * bid)
 
                if ((msg = seq_length(&r, &t)) != MAL_SUCCEED) {
                        BBPreleaseref(result->batCacheid);
+                       BBPreleaseref(cigars->batCacheid);
                        return msg;
                }
                BUNappend(result, (ptr) &r, FALSE);
@@ -335,3 +404,86 @@ seq_length_bat(bat * ret, bat * bid)
 
        return MAL_SUCCEED;
 }
+
+
+str
+seq_char_bat(bat * ret, int * ref_pos, bat * alg_seq, bat * alg_pos, bat * 
alg_cigar)
+{
+       BAT *seqs, *poss, *refs, *cigars, *result;
+       BUN ref= 0, seq = 0, pos = 0, cigar = 0, seq_end = 0;
+       BATiter ref_it, seq_it, pos_it, cigar_it;
+
+       assert(ret != NULL && ref_pos != NULL && alg_seq != NULL && alg_pos != 
NULL && alg_cigar != NULL);
+
+       if ((seqs = BATdescriptor(*alg_seq)) == NULL ||
+           (poss = BATdescriptor(*alg_pos)) == NULL ||
+           (refs = BATdescriptor(*ref_pos)) == NULL ||
+               (cigars = BATdescriptor(*alg_cigar)) == NULL) {
+                       if( seqs) BBPreleaseref(seqs->batCacheid);
+                       if( poss) BBPreleaseref(poss->batCacheid);
+                       if( refs) BBPreleaseref(refs->batCacheid);
+                       throw(MAL, "seq_char_bat", RUNTIME_OBJECT_MISSING);
+       }
+
+       if(BATcount(seqs) != BATcount(poss) || BATcount(seqs) != 
BATcount(cigars)) {
+               BBPreleaseref(seqs->batCacheid);
+               BBPreleaseref(poss->batCacheid);
+               BBPreleaseref(refs->batCacheid);
+               throw(MAL, "seq_char_bat", 
+                       "Misalignment in input BATs: "BUNFMT"/"BUNFMT"/"BUNFMT, 
+                       BATcount(poss), BATcount(seqs), BATcount(cigars));
+       }
+
+       /* allocate result BAT */
+       result = BATnew(TYPE_void, TYPE_str, BATcount(cigars), TRANSIENT);
+       if (result == NULL) {
+               BBPreleaseref(seqs->batCacheid);
+               BBPreleaseref(poss->batCacheid);
+               BBPreleaseref(refs->batCacheid);
+               throw(MAL, "seq_char_bat", MAL_MALLOC_FAIL);
+       }
+       BATseqbase(result, seqs->hseqbase);
+
+       ref = BUNfirst(refs);
+       seq = BUNfirst(seqs);
+       pos = BUNfirst(poss);
+       cigar = BUNfirst(cigars);
+       seq_end = BUNlast(seqs);
+
+       ref_it = bat_iterator(refs);
+       seq_it = bat_iterator(seqs);
+       pos_it = bat_iterator(poss);
+       cigar_it = bat_iterator(cigars);
+
+       while(seq < seq_end) {
+               str seq_val = (str) BUNtail(seq_it, seq);
+               int * ref_val = (int *) BUNtail(ref_it, ref);
+               int * pos_val = (int *) BUNtail(pos_it, pos);
+               str cigar_val = (str) BUNtail(cigar_it, cigar);
+               str r;
+               str msg;
+
+               if ((msg = seq_char(&r, ref_val, &seq_val, pos_val, 
&cigar_val)) != MAL_SUCCEED) {
+                       BBPreleaseref(refs->batCacheid);
+                       BBPreleaseref(seqs->batCacheid);
+                       BBPreleaseref(poss->batCacheid);
+                       BBPreleaseref(cigars->batCacheid);
+                       BBPreleaseref(result->batCacheid);
+                       return msg;
+               }
+               BUNappend(result, (ptr) r, FALSE);
+               ++seq;
+               ++pos;
+               ++cigar;
+       }
+
+       /* release input BAT-descriptors */
+       BBPreleaseref(refs->batCacheid);
+       BBPreleaseref(seqs->batCacheid);
+       BBPreleaseref(poss->batCacheid);
+       BBPreleaseref(cigars->batCacheid);
+
+       BBPkeepref((*ret = result->batCacheid));
+
+       return MAL_SUCCEED;
+}
diff --git a/sql/backends/monet5/bam/bam_lib.h 
b/sql/backends/monet5/bam/bam_lib.h
--- a/sql/backends/monet5/bam/bam_lib.h
+++ b/sql/backends/monet5/bam/bam_lib.h
@@ -42,10 +42,12 @@ bam_export str bam_flag(bit * ret, sht *
 bam_export str reverse_seq(str * ret, str * seq);
 bam_export str reverse_qual(str * ret, str * qual);
 bam_export str seq_length(int *ret, str * cigar);
+bam_export str seq_char(str * ret, int * ref_pos, str * alg_seq, int * 
alg_pos, str * alg_cigar);
 
 bam_export str bam_flag_bat(bat * ret, bat * bid, str * name);
 bam_export str reverse_seq_bat(bat * ret, bat * bid);
 bam_export str reverse_qual_bat(bat * ret, bat * bid);
 bam_export str seq_length_bat(bat * ret, bat * bid);
+bam_export str seq_char_bat(bat * ret, int * ref_pos, bat * alg_seq, bat * 
alg_pos, bat * alg_cigar);
 
 #endif
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to