Changeset: 3cbc81a925eb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cbc81a925eb
Modified Files:
sql/backends/monet5/bam/bam_loader.c
Branch: DVframework_bam
Log Message:
Replaced BAM header parsing functions by C macro's. Cleaned up the code and
added comments for every function.
diffs (truncated from 607 to 300 lines):
diff --git a/sql/backends/monet5/bam/bam_loader.c
b/sql/backends/monet5/bam/bam_loader.c
--- a/sql/backends/monet5/bam/bam_loader.c
+++ b/sql/backends/monet5/bam/bam_loader.c
@@ -3,17 +3,44 @@
#include "kstring.h"
#include "bam_loader.h"
-/*
-static int CREATED MACRO, ORIGINAL DEFINITION COMMENTED OUT
-_append_option_to_bat_cond_str(_temp_container *ret_tc, _bam_header_option
*opt, str cmp, int table, int col, int *appendErr, int *flag)
-{
- return (strcmp(opt->tag, cmp) == 0 && _append_option_to_bat(ret_tc,
(ptr)opt->value, table, col, appendErr, flag));
-}*/
- #define _append_option_to_bat_cond_str(ret_tc, opt, cmp, table, col,
appendErr, flag) \
- (strcmp((opt)->tag, cmp) == 0 && _append_option_to_bat(ret_tc,
(ptr)((opt)->value), table, col, appendErr, flag))
+
+/* Macro's for parsing BAM header */
+
+#define _append_option_to_bat_cond_str(ret_tc, opt, cmp, table, col,
appendErr, flag) \
+ if(strcmp((opt)->tag, cmp) == 0) \
+ { \
+ _append_option_to_bat((ret_tc), (ptr)((opt)->value), (table), (col),
(appendErr), (flag)); \
+ continue; \
+ }
+#define _append_option_to_bat_cond_lng(ret_tc, opt, cmp, table, col,
appendErr, flag, l, s) \
+ if(strcmp((opt)->tag, (cmp)) == 0) \
+ { \
+ (l) = strtol((opt)->value, &s, 10); \
+ if((s) == ((opt)->value) || (l) == LONG_MIN || (l) == LONG_MAX) \
+ l = -1; \
+ _append_option_to_bat((ret_tc), (ptr)&l, (table), (col), (appendErr),
(flag)); \
+ continue; \
+ }
+
+#define _append_option_to_bat_cond_flt(ret_tc, opt, cmp, table, col,
appendErr, flag, f) \
+ if(strcmp((opt)->tag, (cmp)) == 0) \
+ { \
+ (f) = strtof((opt)->value, NULL); \
+ _append_option_to_bat((ret_tc), (ptr)&f, (table), (col), (appendErr),
(flag)); \
+ continue; \
+ } \
+
+#define _append_option_to_bat(ret_tc, value, table, col, appendErr, flag) \
+ if(BUNappend((ret_tc)->tables_columns[(table)].column_bats[(col)],
(value), TRUE) == NULL) \
+ *appendErr = TRUE; \
+ else \
+ *flag = TRUE;
+
+
+
/*
* NOTE: Copied directly from miniseed/registrar.c
* TODO: Make both miniseed/registrar.c and this file include some generic
library for these kind of structures
@@ -37,6 +64,7 @@ typedef struct {
sht num_tables;
} _temp_container;
+
/*
* File format specific structures
*/
@@ -87,27 +115,19 @@ static str _next_file_id(Client cntxt, M
static str _init_temp_container(_temp_container *ret_tc, sht dbschema);
static str _init_temp_container_simple(_temp_container *ret_tc);
static str _loadfile(str filepath, _temp_container *ret_tc, sht dbschema, sht
file_id); /* load file and add contents to ret_tc */
-static str _process_bam_header(sht file_id, str header, _temp_container
*ret_tc);
-static bit _append_option_to_bat_cond_lng(_temp_container *ret_tc,
_bam_header_option *opt, str cmp, sht table, sht col, bit *appendErr, bit
*flag);
-static bit _append_option_to_bat_cond_flt(_temp_container *ret_tc,
_bam_header_option *opt, str cmp, sht table, sht col, bit *appendErr, bit
*flag);
-static bit _append_option_to_bat(_temp_container *ret_tc, ptr value, sht
table, sht col, bit *appendErr, bit *flag);
-static str _read_bam_header_line(str *header, _bam_header_line *ret_hl, bit
*eof);
+static str _parse_bam_header(sht file_id, str header, _temp_container *ret_tc);
+static str _parse_bam_header_line(str *header, _bam_header_line *ret_hl, bit
*eof);
static void _free_bam_header_line(_bam_header_line *hl);
static str _process_bam_alignment(sht file_id, lng virtual_offset,
bam_header_t *header, bam1_t *alignment, _temp_container *ret_tc, sht schema);
-/*static bit _parse_alignment_str(str *sam_alig, str *dest);
-static bit _parse_alignment_lng(str *sam_alig, lng *dest);*/
/* Generic functions */
static str _init_temp_subcontainer(_temp_subcontainer *ret_tsc, str
*col_names, int *col_types, sht num_cols);
static str _insert_into_vault(Client cntxt, _temp_container* tc);
static int _read_string_until_delim(str *src, str *ret, char *delims, sht
num_delims);
-static bit _parse_lng(str *src, lng *i);
static void _append_to_log(str mssg);
static void _free_temp_container(_temp_container* tc);
-
-
/* File format specific functions */
str
@@ -135,7 +155,7 @@ bam_loader(Client cntxt, MalBlkPtr mb, M
err2 = "Error while loading BAM file: %s\n";
else if((err1 = _insert_into_vault(cntxt, tc)) != MAL_SUCCEED)
err2 = "Error inserting data into database: %s\n";
-
+
_free_temp_container(tc);
if(err2 != NULL)
@@ -180,6 +200,9 @@ static str
return MAL_SUCCEED;
}
+/*
+* The dbschema argument will determine how the _temp_container structure will
be filled
+*/
static str
_init_temp_container(_temp_container *ret_tc, sht dbschema)
{
@@ -188,7 +211,9 @@ static str
throw(MAL, "_init_temp_container", "No temp container initialization
method exists for dbschema %d", dbschema);
}
-
+/*
+* Fill the _temp_container with the simple schema
+*/
static str
_init_temp_container_simple(_temp_container *ret_tc)
{
@@ -245,6 +270,12 @@ static str
return MAL_SUCCEED;
}
+/*
+* Given a filepath, the BAM file at this location will be tried to read. If
this succeeds, all data contained
+* in it will be stored in the _temp_container structure.
+* file_id will be used both as a primary key in the files table for this file
and as a foreign key in all other
+* tables that hold foreign key relations to the files table.
+*/
static str
_loadfile(str filepath, _temp_container *ret_tc, sht dbschema, sht file_id)
{
@@ -285,7 +316,7 @@ static str
if(header == NULL)
throw(MAL, "loadfile", "Error reading header of %s \n", filepath);
headertext_consumable = header->text;
- if((err = _process_bam_header(file_id, headertext_consumable, ret_tc))
!= MAL_SUCCEED)
+ if((err = _parse_bam_header(file_id, headertext_consumable, ret_tc)) !=
MAL_SUCCEED)
{
bam_header_destroy(header);
throw(MAL, "loadfile", "Error processing bam header: %s\n", err);
@@ -316,8 +347,11 @@ static str
return MAL_SUCCEED;
}
+/*
+* Process the ASCII BAM header provided in the header argument. Store the
parsed contents of the header in the _temp_container structure
+*/
static str
-_process_bam_header(sht file_id, str header, _temp_container *ret_tc)
+_parse_bam_header(sht file_id, str header, _temp_container *ret_tc)
{
/* TODO: consider checking whether or not floating point conversions
succeeded */
_bam_header_line *hl;
@@ -335,6 +369,9 @@ static str
bit rg_fields_found[12];
bit pg_fields_found[5];
bit eof = FALSE;
+ str s;
+ lng l;
+ flt f;
/* loop will run until no more header lines are found */
while(TRUE)
@@ -343,18 +380,18 @@ static str
hl = (_bam_header_line *)GDKmalloc(sizeof(_bam_header_line));
if(hl == NULL)
- throw(MAL, "_process_bam_header", MAL_MALLOC_FAIL);
+ throw(MAL, "_parse_bam_header", MAL_MALLOC_FAIL);
hl->num_options = 0;
/* try to read the next header line */
- if((err = _read_bam_header_line(&header, hl, &eof)) != MAL_SUCCEED)
+ if((err = _parse_bam_header_line(&header, hl, &eof)) != MAL_SUCCEED)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "Error while reading header
line: %s\n", err);
+ throw(MAL, "_parse_bam_header", "Error while reading header line:
%s\n", err);
}
- /* if eof is set to TRUE by _read_bam_header_line, this indicates that
we reached the end of the header */
+ /* if eof is set to TRUE by _parse_bam_header_line, this indicates
that we reached the end of the header */
if(eof)
break;
@@ -365,7 +402,7 @@ static str
if(num_hd_lines > 1)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "More than one HD line found
in header\n");
+ throw(MAL, "_parse_bam_header", "More than one HD line found
in header\n");
}
hd_fields_found[0] = FALSE;
@@ -373,24 +410,24 @@ static str
for(o = 0; o < hl->num_options; ++o)
{
- if(_append_option_to_bat_cond_flt(ret_tc, &hl->options[o],
"VN", 0, 2, &appendErr, &hd_fields_found[0])) continue;
- if(_append_option_to_bat_cond_str(ret_tc, &hl->options[o],
"SO", 0, 3, &appendErr, &hd_fields_found[1])) continue;
+ _append_option_to_bat_cond_flt(ret_tc, &hl->options[o], "VN",
0, 2, &appendErr, &hd_fields_found[0], f);
+ _append_option_to_bat_cond_str(ret_tc, &hl->options[o], "SO",
0, 3, &appendErr, &hd_fields_found[1]);
/* if this point is reached, option either wasn't recognized
or couldn't be appended to BAT */
_free_bam_header_line(hl);
if(appendErr)
- throw(MAL, "_process_bam_header", "Error appending header
tag HD to database");
- throw(MAL, "_process_bam_header", "Unknown option found in
header tag HD");
+ throw(MAL, "_parse_bam_header", "Error appending header
tag HD to database");
+ throw(MAL, "_parse_bam_header", "Unknown option found in
header tag HD");
}
if(!hd_fields_found[0])
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "VN tag not found in HD
header line\n");
+ throw(MAL, "_parse_bam_header", "VN tag not found in HD header
line\n");
}
if(!hd_fields_found[1] &&
BUNappend(ret_tc->tables_columns[0].column_bats[2], (ptr) str_nil, TRUE) ==
NULL)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "Error appending NULL to
column of files table\n");
+ throw(MAL, "_parse_bam_header", "Error appending NULL to
column of files table\n");
}
}
else if(strcmp(hl->header_tag, "SQ") == 0)
@@ -398,7 +435,7 @@ static str
if(BUNappend(ret_tc->tables_columns[1].column_bats[1], (ptr)
&file_id, TRUE) == NULL)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "Appending file_id to SQ BAT
failed\n");
+ throw(MAL, "_parse_bam_header", "Appending file_id to SQ BAT
failed\n");
}
for(i=0; i<6; ++i)
@@ -406,18 +443,18 @@ static str
for(o = 0; o < hl->num_options; ++o)
{
- if(_append_option_to_bat_cond_str(ret_tc, &hl->options[o],
"SN", 1, 0, &appendErr, &sq_fields_found[0])) continue;
- if(_append_option_to_bat_cond_lng(ret_tc, &hl->options[o],
"LN", 1, 2, &appendErr, &sq_fields_found[1])) continue;
- if(_append_option_to_bat_cond_lng(ret_tc, &hl->options[o],
"AS", 1, 3, &appendErr, &sq_fields_found[2])) continue;
- if(_append_option_to_bat_cond_str(ret_tc, &hl->options[o],
"M5", 1, 4, &appendErr, &sq_fields_found[3])) continue;
- if(_append_option_to_bat_cond_str(ret_tc, &hl->options[o],
"SP", 1, 5, &appendErr, &sq_fields_found[4])) continue;
- if(_append_option_to_bat_cond_str(ret_tc, &hl->options[o],
"UR", 1, 6, &appendErr, &sq_fields_found[5])) continue;
+ _append_option_to_bat_cond_str(ret_tc, &hl->options[o], "SN",
1, 0, &appendErr, &sq_fields_found[0]);
+ _append_option_to_bat_cond_lng(ret_tc, &hl->options[o], "LN",
1, 2, &appendErr, &sq_fields_found[1], l, s);
+ _append_option_to_bat_cond_lng(ret_tc, &hl->options[o], "AS",
1, 3, &appendErr, &sq_fields_found[2], l, s);
+ _append_option_to_bat_cond_str(ret_tc, &hl->options[o], "M5",
1, 4, &appendErr, &sq_fields_found[3]);
+ _append_option_to_bat_cond_str(ret_tc, &hl->options[o], "SP",
1, 5, &appendErr, &sq_fields_found[4]);
+ _append_option_to_bat_cond_str(ret_tc, &hl->options[o], "UR",
1, 6, &appendErr, &sq_fields_found[5]);
/* if this point is reached, option either wasn't recognized
or couldn't be appended to BAT */
_free_bam_header_line(hl);
if(appendErr)
- throw(MAL, "_process_bam_header", "Error appending header
tag SQ to database");
- throw(MAL, "_process_bam_header", "Unknown option found in
header tag SQ");
+ throw(MAL, "_parse_bam_header", "Error appending header
tag SQ to database");
+ throw(MAL, "_parse_bam_header", "Unknown option found in
header tag SQ");
}
if(!sq_fields_found[0])
err = "SN tag not found in SQ header line\n";
@@ -427,7 +464,7 @@ static str
if(err != NULL)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "%s", err);
+ throw(MAL, "_parse_bam_header", "%s", err);
}
if((!sq_fields_found[2] &&
BUNappend(ret_tc->tables_columns[1].column_bats[3], (ptr) &int_nil, TRUE) ==
NULL)
@@ -437,7 +474,7 @@ static str
)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "Error appending NULL to
column of sq table\n");
+ throw(MAL, "_parse_bam_header", "Error appending NULL to
column of sq table\n");
}
}
else if(strcmp(hl->header_tag, "RG") == 0)
@@ -445,7 +482,7 @@ static str
if(BUNappend(ret_tc->tables_columns[2].column_bats[1], (ptr)
&file_id, TRUE) == NULL)
{
_free_bam_header_line(hl);
- throw(MAL, "_process_bam_header", "Appending file_id to RG BAT
failed\n");
+ throw(MAL, "_parse_bam_header", "Appending file_id to RG BAT
failed\n");
}
for(i=0; i<12; ++i)
@@ -453,29 +490,29 @@ static str
for(o = 0; o < hl->num_options; ++o)
{
- if(_append_option_to_bat_cond_str(ret_tc, &hl->options[o],
"ID", 2, 0, &appendErr, &rg_fields_found[0])) continue;
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list