Changeset: 9ea2b0a95b36 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9ea2b0a95b36
Added Files:
        sql/backends/monet5/bam/bam_clear_1.sql
        sql/backends/monet5/bam/bam_schema_1.sql
Removed Files:
        sql/backends/monet5/bam/bam_clear.sql
        sql/backends/monet5/bam/bam_schema.sql
Modified Files:
        sql/backends/monet5/bam/bamloader.c
        sql/backends/monet5/bam/bamloader.h
Branch: DVframework_bam
Log Message:

Made adjustments to enable testing with different DB schemas


diffs (truncated from 702 to 300 lines):

diff --git a/sql/backends/monet5/bam/bam_clear.sql 
b/sql/backends/monet5/bam/bam_clear.sql
deleted file mode 100644
--- a/sql/backends/monet5/bam/bam_clear.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-DROP FUNCTION bam.bam_files_reg;
-DROP FUNCTION bam.bam_sq_reg;
-DROP FUNCTION bam.bam_rg_reg;
-DROP FUNCTION bam.bam_pg_reg;
-DROP FUNCTION bam.bam_alignments_reg;
-DROP FUNCTION bam.bam_alignments_extra_reg;
-
-DROP TABLE bam.alignments_extra;
-DROP TABLE bam.alignments;
-DROP TABLE bam.pg;
-DROP TABLE bam.rg;
-DROP TABLE bam.sq;
-DROP TABLE bam.files;
-DROP SCHEMA bam;
diff --git a/sql/backends/monet5/bam/bam_clear_1.sql 
b/sql/backends/monet5/bam/bam_clear_1.sql
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/bam/bam_clear_1.sql
@@ -0,0 +1,14 @@
+DROP FUNCTION bam.bam_files_reg;
+DROP FUNCTION bam.bam_sq_reg;
+DROP FUNCTION bam.bam_rg_reg;
+DROP FUNCTION bam.bam_pg_reg;
+DROP FUNCTION bam.bam_alignments_reg;
+DROP FUNCTION bam.bam_alignments_extra_reg;
+
+DROP TABLE bam.alignments_extra;
+DROP TABLE bam.alignments;
+DROP TABLE bam.pg;
+DROP TABLE bam.rg;
+DROP TABLE bam.sq;
+DROP TABLE bam.files;
+DROP SCHEMA bam;
diff --git a/sql/backends/monet5/bam/bam_schema.sql 
b/sql/backends/monet5/bam/bam_schema.sql
deleted file mode 100644
--- a/sql/backends/monet5/bam/bam_schema.sql
+++ /dev/null
@@ -1,98 +0,0 @@
-CREATE SCHEMA bam;
-
-CREATE TABLE "bam"."files" (
-    "file_location"                 STRING      NOT NULL      UNIQUE,
-    "format_version"                REAL,
-    "sorting_order"                 VARCHAR(10),
-    "comments"                      STRING,
-    CONSTRAINT "files_pkey_file_location" PRIMARY KEY (file_location)
-);
-
-CREATE TABLE "bam"."sq" (
-    "sn"                            STRING      NOT NULL,
-    "file_location"                 STRING      NOT NULL,
-    "ln"                            INT         NOT NULL,
-    "as"                            INT,
-    "m5"                            STRING,
-    "sp"                            STRING,
-    "ur"                            STRING,
-    CONSTRAINT "sq_pkey_sn_file_location" PRIMARY KEY (sn, file_location),
-    CONSTRAINT "sq_fkey_file_location" FOREIGN KEY (file_location) REFERENCES 
bam.files (file_location)
-);
-
-CREATE TABLE "bam"."rg" (
-    "id"                            STRING      NOT NULL,
-    "file_location"                 STRING      NOT NULL,
-    "cn"                            STRING,
-    "ds"                            STRING,
-    "dt"                            TIMESTAMP,
-    "fo"                            STRING,
-    "ks"                            STRING,
-    "lb"                            STRING,
-    "pg"                            STRING,
-    "pi"                            INT,
-    "PL"                            STRING,
-    "PU"                            STRING,
-    "SM"                            STRING,
-    CONSTRAINT "rg_pkey_id_file_location" PRIMARY KEY (id, file_location),
-    CONSTRAINT "rg_fkey_file_location" FOREIGN KEY (file_location) REFERENCES 
bam.files (file_location)
-);
-
-CREATE TABLE "bam"."pg" (
-    "id"                            STRING      NOT NULL,
-    "file_location"                 STRING      NOT NULL,
-    "pn"                            STRING,
-    "cl"                            STRING,
-    "pp"                            STRING,
-    "vn"                            STRING,
-    CONSTRAINT "pg_pkey_id_file_location" PRIMARY KEY (id, file_location),
-    CONSTRAINT "pg_fkey_file_location" FOREIGN KEY (file_location) REFERENCES 
bam.files (file_location)
-);
-
-CREATE TABLE "bam"."alignments" (
-    "file_location"                 STRING      NOT NULL,
-    "virtual_offset"                BIGINT      NOT NULL,
-    "qname"                         STRING      NOT NULL,
-    "flag_temp_mult_segm"           BOOLEAN     NOT NULL,
-    "flag_each_segm_prop_alig"      BOOLEAN     NOT NULL,
-    "flag_segm_unma"                BOOLEAN     NOT NULL,
-    "flag_next_segm_unma"           BOOLEAN     NOT NULL,
-    "flag_seq_reve_comp"            BOOLEAN     NOT NULL,
-    "flag_seq_next_segm_reve"       BOOLEAN     NOT NULL,
-    "flag_first_segm"               BOOLEAN     NOT NULL,
-    "flag_last_segm"                BOOLEAN     NOT NULL,
-    "flag_seco_alig"                BOOLEAN     NOT NULL,
-    "flag_not_pass_qual_cont"       BOOLEAN     NOT NULL,
-    "flag_pcr_opti_dupl"            BOOLEAN     NOT NULL,
-    "rname"                         STRING      NOT NULL,
-    "pos"                           INT         NOT NULL,
-    "mapq"                          INT         NOT NULL,
-    "cigar"                         STRING      NOT NULL,
-    "rnext"                         STRING      NOT NULL,
-    "pnext"                         INT         NOT NULL,
-    "tlen"                          INT         NOT NULL,
-    "seq"                           STRING      NOT NULL,
-    "qual"                          STRING      NOT NULL,
-    CONSTRAINT "alignments_pkey_file_location_virtual_offset" PRIMARY KEY 
(file_location, virtual_offset),
-    CONSTRAINT "alignments_fkey_file_location" FOREIGN KEY (file_location) 
REFERENCES bam.files (file_location)
-);
-
-CREATE TABLE "bam"."alignments_extra" (
-    "tag"                           CHAR(2)     NOT NULL,
-    "file_location"                 STRING      NOT NULL,
-    "virtual_offset"                BIGINT      NOT NULL,
-    "type"                          CHAR(1)     NOT NULL,
-    "value"                         STRING,
-    CONSTRAINT "alignments_extra_pkey_tag_file_location_virtual_offset" 
PRIMARY KEY (tag, file_location, virtual_offset),
-    CONSTRAINT "alignments_extra_fkey_file_location_virtual_offset" FOREIGN 
KEY (file_location, virtual_offset) REFERENCES bam.alignments (file_location, 
virtual_offset)
-);
-
-
-CREATE FUNCTION bam.bam_files_reg(ticket bigint, table_idx int) RETURNS 
table("file_location" string, "format_version" real, "sorting_order" string, 
"comments" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_sq_reg(ticket bigint, table_idx int) RETURNS 
table("sn" string, "file_location" string, "ln" int, "as" int, "m5" string, 
"sp" string, "ur" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_rg_reg(ticket bigint, table_idx int) RETURNS 
table("id" string, "file_location" string, "cn" string, "ds" string, "dt" 
timestamp, "fo" string, "ks" string, "lb" string, "pg" string, "pi" int, "pl" 
string, "pu" string, "sm" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_pg_reg(ticket bigint, table_idx int) RETURNS 
table("id" string, "file_location" string, "pn" string, "cl" string, "pp" 
string, "vn" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_alignments_reg(ticket bigint, table_idx int) RETURNS 
table("file_location" string, "virtual_offset" bigint, "qname" string, 
"flag_temp_mult_segm" boolean, "flag_each_segm_prop_alig" boolean, 
"flag_segm_unma" boolean, "flag_next_segm_unma" boolean, "flag_seq_reve_comp" 
boolean, "flag_seq_next_segm_reve" boolean, "flag_first_segm" boolean, 
"flag_last_segm" boolean, "flag_seco_alig" boolean, "flag_not_pass_qual_cont" 
boolean, "flag_pcr_opti_dupl" boolean, "rname" string, "pos" int, "mapq" int, 
"cigar" string, "rnext" string, "pnext" int, "tlen" int, "seq" string, "qual" 
string) external name bam.register_table;
-CREATE FUNCTION bam.bam_alignments_extra_reg(ticket bigint, table_idx int) 
RETURNS table("tag" string, "file_location" string, "virtual_offset" bigint, 
"type" char, "value" string) external name bam.register_table;
-
-
diff --git a/sql/backends/monet5/bam/bam_schema_1.sql 
b/sql/backends/monet5/bam/bam_schema_1.sql
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/bam/bam_schema_1.sql
@@ -0,0 +1,89 @@
+CREATE SCHEMA bam;
+
+CREATE TABLE "bam"."files" (
+    "file_id"                       SMALLINT    NOT NULL      UNIQUE,
+    "file_location"                 STRING      NOT NULL      UNIQUE,
+    "format_version"                REAL,
+    "sorting_order"                 VARCHAR(10),
+    "comments"                      STRING,
+    CONSTRAINT "files_pkey_file_id" PRIMARY KEY (file_id)
+);
+
+CREATE TABLE "bam"."sq" (
+    "sn"                            STRING      NOT NULL,
+    "file_id"                       SMALLINT    NOT NULL,
+    "ln"                            INT         NOT NULL,
+    "as"                            INT,
+    "m5"                            STRING,
+    "sp"                            STRING,
+    "ur"                            STRING,
+    CONSTRAINT "sq_pkey_sn_file_id" PRIMARY KEY (sn, file_id),
+    CONSTRAINT "sq_fkey_file_id" FOREIGN KEY (file_id) REFERENCES bam.files 
(file_id)
+);
+
+CREATE TABLE "bam"."rg" (
+    "id"                            STRING      NOT NULL,
+    "file_id"                       SMALLINT    NOT NULL,
+    "cn"                            STRING,
+    "ds"                            STRING,
+    "dt"                            TIMESTAMP,
+    "fo"                            STRING,
+    "ks"                            STRING,
+    "lb"                            STRING,
+    "pg"                            STRING,
+    "pi"                            INT,
+    "PL"                            STRING,
+    "PU"                            STRING,
+    "SM"                            STRING,
+    CONSTRAINT "rg_pkey_id_file_id" PRIMARY KEY (id, file_id),
+    CONSTRAINT "rg_fkey_file_id" FOREIGN KEY (file_id) REFERENCES bam.files 
(file_id)
+);
+
+CREATE TABLE "bam"."pg" (
+    "id"                            STRING      NOT NULL,
+    "file_id"                       SMALLINT    NOT NULL,
+    "pn"                            STRING,
+    "cl"                            STRING,
+    "pp"                            STRING,
+    "vn"                            STRING,
+    CONSTRAINT "pg_pkey_id_file_id" PRIMARY KEY (id, file_id),
+    CONSTRAINT "pg_fkey_file_id" FOREIGN KEY (file_id) REFERENCES bam.files 
(file_id)
+);
+
+CREATE TABLE "bam"."alignments" (
+    "virtual_offset"                BIGINT      NOT NULL,
+    "file_id"                       SMALLINT    NOT NULL,
+    "qname"                         STRING      NOT NULL,
+    "flag"                                     SMALLINT    NOT NULL,
+    "rname"                         STRING      NOT NULL,
+    "pos"                           INT         NOT NULL,
+    "mapq"                          SMALLINT    NOT NULL,
+    "cigar"                         STRING      NOT NULL,
+    "rnext"                         STRING      NOT NULL,
+    "pnext"                         INT         NOT NULL,
+    "tlen"                          INT         NOT NULL,
+    "seq"                           STRING      NOT NULL,
+    "qual"                          STRING      NOT NULL,
+    CONSTRAINT "alignments_pkey_virtual_offset_file_id" PRIMARY KEY 
(virtual_offset, file_id),
+    CONSTRAINT "alignments_fkey_file_id" FOREIGN KEY (file_id) REFERENCES 
bam.files (file_id)
+);
+
+CREATE TABLE "bam"."alignments_extra" (
+    "tag"                           CHAR(2)     NOT NULL,
+    "virtual_offset"                BIGINT      NOT NULL,
+    "file_id"                       SMALLINT    NOT NULL,
+    "type"                          CHAR(1)     NOT NULL,
+    "value"                         STRING,
+    CONSTRAINT "alignments_extra_pkey_tag_virtual_offset_file_id" PRIMARY KEY 
(tag, virtual_offset, file_id),
+    CONSTRAINT "alignments_extra_fkey_virtual_offset_file_id" FOREIGN KEY 
(virtual_offset, file_id) REFERENCES bam.alignments (virtual_offset, file_id)
+);
+
+
+CREATE FUNCTION bam.bam_files_reg(ticket bigint, table_idx int) RETURNS 
table("file_id" int, "file_location" string, "format_version" real, 
"sorting_order" string, "comments" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_sq_reg(ticket bigint, table_idx int) RETURNS 
table("sn" string, "file_id" smallint, "ln" int, "as" int, "m5" string, "sp" 
string, "ur" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_rg_reg(ticket bigint, table_idx int) RETURNS 
table("id" string, "file_id" smallint, "cn" string, "ds" string, "dt" 
timestamp, "fo" string, "ks" string, "lb" string, "pg" string, "pi" int, "pl" 
string, "pu" string, "sm" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_pg_reg(ticket bigint, table_idx int) RETURNS 
table("id" string, "file_id" smallint, "pn" string, "cl" string, "pp" string, 
"vn" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_alignments_reg(ticket bigint, table_idx int) RETURNS 
table("file_id" smallint, "virtual_offset" bigint, "qname" string, "flag" int, 
"rname" string, "pos" int, "mapq" int, "cigar" string, "rnext" string, "pnext" 
int, "tlen" int, "seq" string, "qual" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_alignments_extra_reg(ticket bigint, table_idx int) 
RETURNS table("tag" string, "file_id" smallint, "virtual_offset" bigint, "type" 
char, "value" string) external name bam.register_table;
+
+
diff --git a/sql/backends/monet5/bam/bamloader.c 
b/sql/backends/monet5/bam/bamloader.c
--- a/sql/backends/monet5/bam/bamloader.c
+++ b/sql/backends/monet5/bam/bamloader.c
@@ -10,7 +10,6 @@
 typedef struct {
        bat *column_bats; /* keeps bats of the columns: lower array */
        str *column_names; /* names of columns that are kept in the higher 
array */
-    str *column_types_strs; /* type strings of columns */
 } temp_subcontainer;
 
 /*
@@ -45,12 +44,35 @@ typedef struct {
 FILE *logfile = NULL; /* keep logfile file opened in this global var while the 
bam code runs,  */
                         /* since opening and closing every time something has 
to be written turned out to be very slow */
 
+/* SQL schema details that are common to every schema */
+int num_col_files               = 5;
+int num_col_sq                  = 7;
+int num_col_rg                  = 13;
+int num_col_pg                  = 6;
+int num_col_alignments_extra    = 5;
+
+str coln_files[]  = {"file_id", "file_location", "format_version", 
"sorting_order", "comments"};
+int colt_files[]  = {TYPE_sht , TYPE_str       , TYPE_flt        , TYPE_str    
   , TYPE_str  };
+
+str coln_sq[]  = {"sn"    , "file_id", "ln"    , "as"    , "m5"    , "sp"    , 
"ur"    };
+int colt_sq[]  = {TYPE_str, TYPE_sht , TYPE_int, TYPE_int, TYPE_str, TYPE_str, 
TYPE_str};
+
+str coln_rg[]  = {"id"    , "file_id", "cn"    , "ds"    , "dt"    , "fo"    , 
"ks"    , "lb"    , "pg"    , "pi"    , "pl"    , "pu"    , "sm"    };
+int colt_rg[]  = {TYPE_str, TYPE_sht , TYPE_str, TYPE_str, TYPE_int, TYPE_str, 
TYPE_str, TYPE_str, TYPE_str, TYPE_int, TYPE_str, TYPE_str, TYPE_str};
+
+str coln_pg[]  = {"id"    , "file_id", "pn"    , "cl"    , "pp"    , "vn"      
};
+int colt_pg[]  = {TYPE_str, TYPE_sht , TYPE_str, TYPE_str, TYPE_str, TYPE_str  
};
+
+str coln_alignments_extra[]  = {"tag"   , "virtual_offset", "file_id", "type"  
, "value" };
+int colt_alignments_extra[]  = {TYPE_str, TYPE_lng        , TYPE_sht , 
TYPE_str, TYPE_str};
+
+
 
 
 /* File format specific functions */
-static str init_temp_container(temp_container *ret_tc);
+static str init_temp_container_simple(temp_container *ret_tc);
 static str loadfile(str filepath, temp_container *ret_tc); /* load file and 
add contents to tc */
-static str process_bam_header(str filepath, str header, temp_container 
*ret_tc);
+static str process_bam_header(int file_id, str header, temp_container *ret_tc);
 static int append_option_to_bat_cond_str(temp_container *ret_tc, 
bam_header_option *opt, str cmp, int table, int col, int *appendErr, int *flag);
 static int append_option_to_bat_cond_lng(temp_container *ret_tc, 
bam_header_option *opt, str cmp, int table, int col, int *appendErr, int *flag);
 static int append_option_to_bat_cond_flt(temp_container *ret_tc, 
bam_header_option *opt, str cmp, int table, int col, int *appendErr, int *flag);
@@ -58,18 +80,16 @@ static int append_option_to_bat_cond_tms
 static int append_option_to_bat(temp_container *ret_tc, ptr value, int table, 
int col, int *appendErr, int *flag);
 static str read_bam_header_line(str *header, bam_header_line *ret_hl, int 
*eof);
 static void free_bam_header_line(bam_header_line *hl);
-static str process_bam_alignment(str filepath, lng virtual_offset, 
bam_header_t *header, bam1_t *alignment, temp_container *ret_tc);
+static str process_bam_alignment(int file_id, lng virtual_offset, bam_header_t 
*header, bam1_t *alignment, temp_container *ret_tc, int schema);
 static int parse_alignment_str(str *sam_alig, str *dest);
 static int parse_alignment_lng(str *sam_alig, lng *dest);
 
 /* Generic functions */
-static str init_temp_subcontainer(temp_subcontainer *ret_tsc,
-    str *col_names, str *col_types_strs, int *col_types, int num_cols);
+static str init_temp_subcontainer(temp_subcontainer *ret_tsc, str *col_names, 
int *col_types, int num_cols);
 static str append_to_bat(bat cb, ptr val);
 static str insert_into_vault(Client cntxt, temp_container* tc);
 static int read_string_until_delim(str *src, str *ret, char *delims, int 
num_delims);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to