Changeset: 9ea2b0a95b36 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9ea2b0a95b36
Added Files:
sql/backends/monet5/bam/bam_clear_1.sql
sql/backends/monet5/bam/bam_schema_1.sql
Removed Files:
sql/backends/monet5/bam/bam_clear.sql
sql/backends/monet5/bam/bam_schema.sql
Modified Files:
sql/backends/monet5/bam/bamloader.c
sql/backends/monet5/bam/bamloader.h
Branch: DVframework_bam
Log Message:
Made adjustments to enable testing with different DB schemas
diffs (truncated from 702 to 300 lines):
diff --git a/sql/backends/monet5/bam/bam_clear.sql
b/sql/backends/monet5/bam/bam_clear.sql
deleted file mode 100644
--- a/sql/backends/monet5/bam/bam_clear.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-DROP FUNCTION bam.bam_files_reg;
-DROP FUNCTION bam.bam_sq_reg;
-DROP FUNCTION bam.bam_rg_reg;
-DROP FUNCTION bam.bam_pg_reg;
-DROP FUNCTION bam.bam_alignments_reg;
-DROP FUNCTION bam.bam_alignments_extra_reg;
-
-DROP TABLE bam.alignments_extra;
-DROP TABLE bam.alignments;
-DROP TABLE bam.pg;
-DROP TABLE bam.rg;
-DROP TABLE bam.sq;
-DROP TABLE bam.files;
-DROP SCHEMA bam;
diff --git a/sql/backends/monet5/bam/bam_clear_1.sql
b/sql/backends/monet5/bam/bam_clear_1.sql
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/bam/bam_clear_1.sql
@@ -0,0 +1,14 @@
+DROP FUNCTION bam.bam_files_reg;
+DROP FUNCTION bam.bam_sq_reg;
+DROP FUNCTION bam.bam_rg_reg;
+DROP FUNCTION bam.bam_pg_reg;
+DROP FUNCTION bam.bam_alignments_reg;
+DROP FUNCTION bam.bam_alignments_extra_reg;
+
+DROP TABLE bam.alignments_extra;
+DROP TABLE bam.alignments;
+DROP TABLE bam.pg;
+DROP TABLE bam.rg;
+DROP TABLE bam.sq;
+DROP TABLE bam.files;
+DROP SCHEMA bam;
diff --git a/sql/backends/monet5/bam/bam_schema.sql
b/sql/backends/monet5/bam/bam_schema.sql
deleted file mode 100644
--- a/sql/backends/monet5/bam/bam_schema.sql
+++ /dev/null
@@ -1,98 +0,0 @@
-CREATE SCHEMA bam;
-
-CREATE TABLE "bam"."files" (
- "file_location" STRING NOT NULL UNIQUE,
- "format_version" REAL,
- "sorting_order" VARCHAR(10),
- "comments" STRING,
- CONSTRAINT "files_pkey_file_location" PRIMARY KEY (file_location)
-);
-
-CREATE TABLE "bam"."sq" (
- "sn" STRING NOT NULL,
- "file_location" STRING NOT NULL,
- "ln" INT NOT NULL,
- "as" INT,
- "m5" STRING,
- "sp" STRING,
- "ur" STRING,
- CONSTRAINT "sq_pkey_sn_file_location" PRIMARY KEY (sn, file_location),
- CONSTRAINT "sq_fkey_file_location" FOREIGN KEY (file_location) REFERENCES
bam.files (file_location)
-);
-
-CREATE TABLE "bam"."rg" (
- "id" STRING NOT NULL,
- "file_location" STRING NOT NULL,
- "cn" STRING,
- "ds" STRING,
- "dt" TIMESTAMP,
- "fo" STRING,
- "ks" STRING,
- "lb" STRING,
- "pg" STRING,
- "pi" INT,
- "PL" STRING,
- "PU" STRING,
- "SM" STRING,
- CONSTRAINT "rg_pkey_id_file_location" PRIMARY KEY (id, file_location),
- CONSTRAINT "rg_fkey_file_location" FOREIGN KEY (file_location) REFERENCES
bam.files (file_location)
-);
-
-CREATE TABLE "bam"."pg" (
- "id" STRING NOT NULL,
- "file_location" STRING NOT NULL,
- "pn" STRING,
- "cl" STRING,
- "pp" STRING,
- "vn" STRING,
- CONSTRAINT "pg_pkey_id_file_location" PRIMARY KEY (id, file_location),
- CONSTRAINT "pg_fkey_file_location" FOREIGN KEY (file_location) REFERENCES
bam.files (file_location)
-);
-
-CREATE TABLE "bam"."alignments" (
- "file_location" STRING NOT NULL,
- "virtual_offset" BIGINT NOT NULL,
- "qname" STRING NOT NULL,
- "flag_temp_mult_segm" BOOLEAN NOT NULL,
- "flag_each_segm_prop_alig" BOOLEAN NOT NULL,
- "flag_segm_unma" BOOLEAN NOT NULL,
- "flag_next_segm_unma" BOOLEAN NOT NULL,
- "flag_seq_reve_comp" BOOLEAN NOT NULL,
- "flag_seq_next_segm_reve" BOOLEAN NOT NULL,
- "flag_first_segm" BOOLEAN NOT NULL,
- "flag_last_segm" BOOLEAN NOT NULL,
- "flag_seco_alig" BOOLEAN NOT NULL,
- "flag_not_pass_qual_cont" BOOLEAN NOT NULL,
- "flag_pcr_opti_dupl" BOOLEAN NOT NULL,
- "rname" STRING NOT NULL,
- "pos" INT NOT NULL,
- "mapq" INT NOT NULL,
- "cigar" STRING NOT NULL,
- "rnext" STRING NOT NULL,
- "pnext" INT NOT NULL,
- "tlen" INT NOT NULL,
- "seq" STRING NOT NULL,
- "qual" STRING NOT NULL,
- CONSTRAINT "alignments_pkey_file_location_virtual_offset" PRIMARY KEY
(file_location, virtual_offset),
- CONSTRAINT "alignments_fkey_file_location" FOREIGN KEY (file_location)
REFERENCES bam.files (file_location)
-);
-
-CREATE TABLE "bam"."alignments_extra" (
- "tag" CHAR(2) NOT NULL,
- "file_location" STRING NOT NULL,
- "virtual_offset" BIGINT NOT NULL,
- "type" CHAR(1) NOT NULL,
- "value" STRING,
- CONSTRAINT "alignments_extra_pkey_tag_file_location_virtual_offset"
PRIMARY KEY (tag, file_location, virtual_offset),
- CONSTRAINT "alignments_extra_fkey_file_location_virtual_offset" FOREIGN
KEY (file_location, virtual_offset) REFERENCES bam.alignments (file_location,
virtual_offset)
-);
-
-
-CREATE FUNCTION bam.bam_files_reg(ticket bigint, table_idx int) RETURNS
table("file_location" string, "format_version" real, "sorting_order" string,
"comments" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_sq_reg(ticket bigint, table_idx int) RETURNS
table("sn" string, "file_location" string, "ln" int, "as" int, "m5" string,
"sp" string, "ur" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_rg_reg(ticket bigint, table_idx int) RETURNS
table("id" string, "file_location" string, "cn" string, "ds" string, "dt"
timestamp, "fo" string, "ks" string, "lb" string, "pg" string, "pi" int, "pl"
string, "pu" string, "sm" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_pg_reg(ticket bigint, table_idx int) RETURNS
table("id" string, "file_location" string, "pn" string, "cl" string, "pp"
string, "vn" string) external name bam.register_table;
-CREATE FUNCTION bam.bam_alignments_reg(ticket bigint, table_idx int) RETURNS
table("file_location" string, "virtual_offset" bigint, "qname" string,
"flag_temp_mult_segm" boolean, "flag_each_segm_prop_alig" boolean,
"flag_segm_unma" boolean, "flag_next_segm_unma" boolean, "flag_seq_reve_comp"
boolean, "flag_seq_next_segm_reve" boolean, "flag_first_segm" boolean,
"flag_last_segm" boolean, "flag_seco_alig" boolean, "flag_not_pass_qual_cont"
boolean, "flag_pcr_opti_dupl" boolean, "rname" string, "pos" int, "mapq" int,
"cigar" string, "rnext" string, "pnext" int, "tlen" int, "seq" string, "qual"
string) external name bam.register_table;
-CREATE FUNCTION bam.bam_alignments_extra_reg(ticket bigint, table_idx int)
RETURNS table("tag" string, "file_location" string, "virtual_offset" bigint,
"type" char, "value" string) external name bam.register_table;
-
-
diff --git a/sql/backends/monet5/bam/bam_schema_1.sql
b/sql/backends/monet5/bam/bam_schema_1.sql
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/bam/bam_schema_1.sql
@@ -0,0 +1,89 @@
+CREATE SCHEMA bam;
+
+CREATE TABLE "bam"."files" (
+ "file_id" SMALLINT NOT NULL UNIQUE,
+ "file_location" STRING NOT NULL UNIQUE,
+ "format_version" REAL,
+ "sorting_order" VARCHAR(10),
+ "comments" STRING,
+ CONSTRAINT "files_pkey_file_id" PRIMARY KEY (file_id)
+);
+
+CREATE TABLE "bam"."sq" (
+ "sn" STRING NOT NULL,
+ "file_id" SMALLINT NOT NULL,
+ "ln" INT NOT NULL,
+ "as" INT,
+ "m5" STRING,
+ "sp" STRING,
+ "ur" STRING,
+ CONSTRAINT "sq_pkey_sn_file_id" PRIMARY KEY (sn, file_id),
+ CONSTRAINT "sq_fkey_file_id" FOREIGN KEY (file_id) REFERENCES bam.files
(file_id)
+);
+
+CREATE TABLE "bam"."rg" (
+ "id" STRING NOT NULL,
+ "file_id" SMALLINT NOT NULL,
+ "cn" STRING,
+ "ds" STRING,
+ "dt" TIMESTAMP,
+ "fo" STRING,
+ "ks" STRING,
+ "lb" STRING,
+ "pg" STRING,
+ "pi" INT,
+ "PL" STRING,
+ "PU" STRING,
+ "SM" STRING,
+ CONSTRAINT "rg_pkey_id_file_id" PRIMARY KEY (id, file_id),
+ CONSTRAINT "rg_fkey_file_id" FOREIGN KEY (file_id) REFERENCES bam.files
(file_id)
+);
+
+CREATE TABLE "bam"."pg" (
+ "id" STRING NOT NULL,
+ "file_id" SMALLINT NOT NULL,
+ "pn" STRING,
+ "cl" STRING,
+ "pp" STRING,
+ "vn" STRING,
+ CONSTRAINT "pg_pkey_id_file_id" PRIMARY KEY (id, file_id),
+ CONSTRAINT "pg_fkey_file_id" FOREIGN KEY (file_id) REFERENCES bam.files
(file_id)
+);
+
+CREATE TABLE "bam"."alignments" (
+ "virtual_offset" BIGINT NOT NULL,
+ "file_id" SMALLINT NOT NULL,
+ "qname" STRING NOT NULL,
+ "flag" SMALLINT NOT NULL,
+ "rname" STRING NOT NULL,
+ "pos" INT NOT NULL,
+ "mapq" SMALLINT NOT NULL,
+ "cigar" STRING NOT NULL,
+ "rnext" STRING NOT NULL,
+ "pnext" INT NOT NULL,
+ "tlen" INT NOT NULL,
+ "seq" STRING NOT NULL,
+ "qual" STRING NOT NULL,
+ CONSTRAINT "alignments_pkey_virtual_offset_file_id" PRIMARY KEY
(virtual_offset, file_id),
+ CONSTRAINT "alignments_fkey_file_id" FOREIGN KEY (file_id) REFERENCES
bam.files (file_id)
+);
+
+CREATE TABLE "bam"."alignments_extra" (
+ "tag" CHAR(2) NOT NULL,
+ "virtual_offset" BIGINT NOT NULL,
+ "file_id" SMALLINT NOT NULL,
+ "type" CHAR(1) NOT NULL,
+ "value" STRING,
+ CONSTRAINT "alignments_extra_pkey_tag_virtual_offset_file_id" PRIMARY KEY
(tag, virtual_offset, file_id),
+ CONSTRAINT "alignments_extra_fkey_virtual_offset_file_id" FOREIGN KEY
(virtual_offset, file_id) REFERENCES bam.alignments (virtual_offset, file_id)
+);
+
+
+CREATE FUNCTION bam.bam_files_reg(ticket bigint, table_idx int) RETURNS
table("file_id" int, "file_location" string, "format_version" real,
"sorting_order" string, "comments" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_sq_reg(ticket bigint, table_idx int) RETURNS
table("sn" string, "file_id" smallint, "ln" int, "as" int, "m5" string, "sp"
string, "ur" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_rg_reg(ticket bigint, table_idx int) RETURNS
table("id" string, "file_id" smallint, "cn" string, "ds" string, "dt"
timestamp, "fo" string, "ks" string, "lb" string, "pg" string, "pi" int, "pl"
string, "pu" string, "sm" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_pg_reg(ticket bigint, table_idx int) RETURNS
table("id" string, "file_id" smallint, "pn" string, "cl" string, "pp" string,
"vn" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_alignments_reg(ticket bigint, table_idx int) RETURNS
table("file_id" smallint, "virtual_offset" bigint, "qname" string, "flag" int,
"rname" string, "pos" int, "mapq" int, "cigar" string, "rnext" string, "pnext"
int, "tlen" int, "seq" string, "qual" string) external name bam.register_table;
+CREATE FUNCTION bam.bam_alignments_extra_reg(ticket bigint, table_idx int)
RETURNS table("tag" string, "file_id" smallint, "virtual_offset" bigint, "type"
char, "value" string) external name bam.register_table;
+
+
diff --git a/sql/backends/monet5/bam/bamloader.c
b/sql/backends/monet5/bam/bamloader.c
--- a/sql/backends/monet5/bam/bamloader.c
+++ b/sql/backends/monet5/bam/bamloader.c
@@ -10,7 +10,6 @@
typedef struct {
bat *column_bats; /* keeps bats of the columns: lower array */
str *column_names; /* names of columns that are kept in the higher
array */
- str *column_types_strs; /* type strings of columns */
} temp_subcontainer;
/*
@@ -45,12 +44,35 @@ typedef struct {
FILE *logfile = NULL; /* keep logfile file opened in this global var while the
bam code runs, */
/* since opening and closing every time something has
to be written turned out to be very slow */
+/* SQL schema details that are common to every schema */
+int num_col_files = 5;
+int num_col_sq = 7;
+int num_col_rg = 13;
+int num_col_pg = 6;
+int num_col_alignments_extra = 5;
+
+str coln_files[] = {"file_id", "file_location", "format_version",
"sorting_order", "comments"};
+int colt_files[] = {TYPE_sht , TYPE_str , TYPE_flt , TYPE_str
, TYPE_str };
+
+str coln_sq[] = {"sn" , "file_id", "ln" , "as" , "m5" , "sp" ,
"ur" };
+int colt_sq[] = {TYPE_str, TYPE_sht , TYPE_int, TYPE_int, TYPE_str, TYPE_str,
TYPE_str};
+
+str coln_rg[] = {"id" , "file_id", "cn" , "ds" , "dt" , "fo" ,
"ks" , "lb" , "pg" , "pi" , "pl" , "pu" , "sm" };
+int colt_rg[] = {TYPE_str, TYPE_sht , TYPE_str, TYPE_str, TYPE_int, TYPE_str,
TYPE_str, TYPE_str, TYPE_str, TYPE_int, TYPE_str, TYPE_str, TYPE_str};
+
+str coln_pg[] = {"id" , "file_id", "pn" , "cl" , "pp" , "vn"
};
+int colt_pg[] = {TYPE_str, TYPE_sht , TYPE_str, TYPE_str, TYPE_str, TYPE_str
};
+
+str coln_alignments_extra[] = {"tag" , "virtual_offset", "file_id", "type"
, "value" };
+int colt_alignments_extra[] = {TYPE_str, TYPE_lng , TYPE_sht ,
TYPE_str, TYPE_str};
+
+
/* File format specific functions */
-static str init_temp_container(temp_container *ret_tc);
+static str init_temp_container_simple(temp_container *ret_tc);
static str loadfile(str filepath, temp_container *ret_tc); /* load file and
add contents to tc */
-static str process_bam_header(str filepath, str header, temp_container
*ret_tc);
+static str process_bam_header(int file_id, str header, temp_container *ret_tc);
static int append_option_to_bat_cond_str(temp_container *ret_tc,
bam_header_option *opt, str cmp, int table, int col, int *appendErr, int *flag);
static int append_option_to_bat_cond_lng(temp_container *ret_tc,
bam_header_option *opt, str cmp, int table, int col, int *appendErr, int *flag);
static int append_option_to_bat_cond_flt(temp_container *ret_tc,
bam_header_option *opt, str cmp, int table, int col, int *appendErr, int *flag);
@@ -58,18 +80,16 @@ static int append_option_to_bat_cond_tms
static int append_option_to_bat(temp_container *ret_tc, ptr value, int table,
int col, int *appendErr, int *flag);
static str read_bam_header_line(str *header, bam_header_line *ret_hl, int
*eof);
static void free_bam_header_line(bam_header_line *hl);
-static str process_bam_alignment(str filepath, lng virtual_offset,
bam_header_t *header, bam1_t *alignment, temp_container *ret_tc);
+static str process_bam_alignment(int file_id, lng virtual_offset, bam_header_t
*header, bam1_t *alignment, temp_container *ret_tc, int schema);
static int parse_alignment_str(str *sam_alig, str *dest);
static int parse_alignment_lng(str *sam_alig, lng *dest);
/* Generic functions */
-static str init_temp_subcontainer(temp_subcontainer *ret_tsc,
- str *col_names, str *col_types_strs, int *col_types, int num_cols);
+static str init_temp_subcontainer(temp_subcontainer *ret_tsc, str *col_names,
int *col_types, int num_cols);
static str append_to_bat(bat cb, ptr val);
static str insert_into_vault(Client cntxt, temp_container* tc);
static int read_string_until_delim(str *src, str *ret, char *delims, int
num_delims);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list