Changeset: a02498cf33c0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a02498cf33c0
Added Files:
sql/backends/monet5/bamloader/bam_db_interface.c
sql/backends/monet5/bamloader/bam_db_interface.h
sql/backends/monet5/bamloader/bam_wrapper.c
sql/backends/monet5/bamloader/bam_wrapper.h
Removed Files:
sql/backends/monet5/bamloader/bam_sql.c
sql/backends/monet5/bamloader/bam_sql.h
sql/backends/monet5/bamloader/sql/bam_clear.sql
sql/backends/monet5/bamloader/sql/bam_create_alignments_storage_0.sql
sql/backends/monet5/bamloader/sql/bam_create_alignments_storage_1.sql
sql/backends/monet5/bamloader/sql/bam_drop_alignments_storage_0.sql
sql/backends/monet5/bamloader/sql/bam_drop_alignments_storage_1.sql
sql/backends/monet5/bamloader/sql/bam_schema.sql
Modified Files:
sql/backends/monet5/bamloader/85_bam.sql
sql/backends/monet5/bamloader/Makefile.ag
sql/backends/monet5/bamloader/bam.mal
sql/backends/monet5/bamloader/bam_globals.h
sql/backends/monet5/bamloader/bam_loader.c
sql/backends/monet5/bamloader/bam_loader.h
Branch: bamloader
Log Message:
Finished basic bam loader functionality. Divided code nicely over multiple
files so it will be relatively easy to separate the code between client-side
code and server-side code at some later point.
diffs (truncated from 4985 to 300 lines):
diff --git a/sql/backends/monet5/bamloader/85_bam.sql
b/sql/backends/monet5/bamloader/85_bam.sql
--- a/sql/backends/monet5/bamloader/85_bam.sql
+++ b/sql/backends/monet5/bamloader/85_bam.sql
@@ -1,10 +1,13 @@
-CREATE PROCEDURE bam_loader_repos(bam_repos STRING, dbschema INT, storage_mask
STRING, nr_threads INT)
+CREATE PROCEDURE bam_loader_repos(bam_repos STRING, dbschema SMALLINT,
nr_threads SMALLINT)
EXTERNAL NAME bam.bam_loader_repos;
-CREATE PROCEDURE bam_loader_file(bam_file STRING, dbschema INT, storage_mask
STRING)
+CREATE PROCEDURE bam_loader_files(bam_files STRING, dbschema SMALLINT,
nr_threads SMALLINT)
+EXTERNAL NAME bam.bam_loader_files;
+
+CREATE PROCEDURE bam_loader_file(bam_file STRING, dbschema SMALLINT)
EXTERNAL NAME bam.bam_loader_file;
-CREATE PROCEDURE bam_drop_file(file_id SMALLINT, dbschema SMALLINT)
+CREATE PROCEDURE bam_drop_file(file_id BIGINT, dbschema SMALLINT)
EXTERNAL NAME bam.bam_drop_file;
diff --git a/sql/backends/monet5/bamloader/Makefile.ag
b/sql/backends/monet5/bamloader/Makefile.ag
--- a/sql/backends/monet5/bamloader/Makefile.ag
+++ b/sql/backends/monet5/bamloader/Makefile.ag
@@ -29,12 +29,14 @@ INCLUDES = .. \
../../../../common/options \
../../../../common/stream \
../../../../gdk \
+ ../../../../tools/merovingian \
+ ../../../../tools/merovingian/daemon \
$(SAMTOOLS_CFLAGS)
lib__bam = {
MODULE
DIR = libdir/monetdb5
- SOURCES = bam_loader.c bam_loader.h bam_lib.h bam_lib.c bam_sql.h
bam_sql.c
+ SOURCES = bam_loader.c bam_loader.h bam_wrapper.c bam_wrapper.h
bam_db_interface.c bam_db_interface.h bam_globals.h bam_lib.h bam_lib.c
LIBS = ../../../../monetdb5/tools/libmonetdb5 \
../../../../gdk/libbat \
$(SAMTOOLS_LIBS)
diff --git a/sql/backends/monet5/bamloader/bam.mal
b/sql/backends/monet5/bamloader/bam.mal
--- a/sql/backends/monet5/bamloader/bam.mal
+++ b/sql/backends/monet5/bamloader/bam.mal
@@ -1,19 +1,23 @@
module bam;
-# Bam_loader related signatures
+# Bam loader related signatures
-pattern bam_loader_repos(bam_repos_list:str, dbschema:int, storage_mask:str,
nr_threads:int):void
+pattern bam_loader_repos(bam_repos:str, dbschema:sht, nr_threads:sht):void
address bam_loader_repos
-comment "Read the files in the given list of bam files and store them in the
database";
+comment "Read all bam files in the given bam_repos directory (non-recursive)
and store them in the given dbschema";
-pattern bam_loader_file(bam_file:str, dbschema:int, storage_mask:str):void
+pattern bam_loader_files(bam_files:str, dbschema:sht, nr_threads:sht):void
+address bam_loader_files
+comment "Read all bam files in the file list stored in the file bam_files
(separated by a newline) and store them in the given dbschema";
+
+pattern bam_loader_file(bam_file:str, dbschema:sht):void
address bam_loader_file
-comment "Read the bam file given as the first parameter and store it in the
database";
+comment "Read bam_file and store it in the given dbschema";
-pattern bam_drop_file(file_id:sht, dbschema:sht):void
+pattern bam_drop_file(file_id:lng, dbschema:sht):void
address bam_drop_file
-comment "Drop alignment tables and header data for the given file"
+comment "Drop alignment tables and header data for the bam file with the given
file_id"
# Scalar signatures for bam_lib
diff --git a/sql/backends/monet5/bamloader/bam_db_interface.c
b/sql/backends/monet5/bamloader/bam_db_interface.c
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/bamloader/bam_db_interface.c
@@ -0,0 +1,482 @@
+#include "monetdb_config.h"
+#include "bam_globals.h"
+#include "bam_db_interface.h"
+
+#define SQL_CREATE_STORAGE_0 \
+ "CREATE TABLE bam.alignments_"LLFMT" ( \n\
+ virtual_offset BIGINT NOT NULL, \n\
+ qname STRING NOT NULL, \n\
+ flag SMALLINT NOT NULL, \n\
+ rname STRING NOT NULL, \n\
+ pos INT NOT NULL, \n\
+ mapq SMALLINT NOT NULL, \n\
+ cigar STRING NOT NULL, \n\
+ rnext STRING NOT NULL, \n\
+ pnext INT NOT NULL, \n\
+ tlen INT NOT NULL, \n\
+ seq STRING NOT NULL, \n\
+ qual STRING NOT NULL, \n\
+ CONSTRAINT alignments_"LLFMT"_pkey_virtual_offset PRIMARY KEY
(virtual_offset) \n\
+ ); \n\
+ \n\
+ CREATE TABLE bam.alignments_extra_"LLFMT" ( \n\
+ tag CHAR(2) NOT NULL, \n\
+ virtual_offset BIGINT NOT NULL, \n\
+ type CHAR(1) NOT NULL, \n\
+ value STRING, \n\
+ CONSTRAINT alignments_extra_"LLFMT"_pkey_tag_virtual_offset PRIMARY
KEY (tag, virtual_offset), \n\
+ CONSTRAINT alignments_extra_"LLFMT"_fkey_virtual_offset FOREIGN KEY
(virtual_offset) \n\
+ REFERENCES bam.alignments_"LLFMT" (virtual_offset) \n\
+ );"
+
+#define SQL_CREATE_STORAGE_1 \
+ "CREATE TABLE bam.paired_primary_alignments_"LLFMT" ( \n\
+ l_virtual_offset BIGINT NOT NULL, \n\
+ r_virtual_offset BIGINT NOT NULL, \n\
+ qname STRING NOT NULL, \n\
+ l_flag SMALLINT NOT NULL, \n\
+ l_rname STRING NOT NULL, \n\
+ l_pos INT NOT NULL, \n\
+ l_mapq SMALLINT NOT NULL, \n\
+ l_cigar STRING NOT NULL, \n\
+ l_rnext STRING NOT NULL, \n\
+ l_pnext INT NOT NULL, \n\
+ l_tlen INT NOT NULL, \n\
+ l_seq STRING NOT NULL, \n\
+ l_qual STRING NOT NULL, \n\
+ r_flag SMALLINT NOT NULL, \n\
+ r_rname STRING NOT NULL, \n\
+ r_pos INT NOT NULL, \n\
+ r_mapq SMALLINT NOT NULL, \n\
+ r_cigar STRING NOT NULL, \n\
+ r_rnext STRING NOT NULL, \n\
+ r_pnext INT NOT NULL, \n\
+ r_tlen INT NOT NULL, \n\
+ r_seq STRING NOT NULL, \n\
+ r_qual STRING NOT NULL, \n\
+ CONSTRAINT
paired_primary_alignments_"LLFMT"_pkey_l_virtual_offset_r_virtual_offset \n\
+ PRIMARY KEY (l_virtual_offset, r_virtual_offset) \n\
+ ); \n\
+ \n\
+ CREATE TABLE bam.paired_secondary_alignments_"LLFMT" ( \n\
+ l_virtual_offset BIGINT NOT NULL, \n\
+ r_virtual_offset BIGINT NOT NULL, \n\
+ qname STRING NOT NULL, \n\
+ l_flag SMALLINT NOT NULL, \n\
+ l_rname STRING NOT NULL, \n\
+ l_pos INT NOT NULL, \n\
+ l_mapq SMALLINT NOT NULL, \n\
+ l_cigar STRING NOT NULL, \n\
+ l_rnext STRING NOT NULL, \n\
+ l_pnext INT NOT NULL, \n\
+ l_tlen INT NOT NULL, \n\
+ l_seq STRING NOT NULL, \n\
+ l_qual STRING NOT NULL, \n\
+ r_flag SMALLINT NOT NULL, \n\
+ r_rname STRING NOT NULL, \n\
+ r_pos INT NOT NULL, \n\
+ r_mapq SMALLINT NOT NULL, \n\
+ r_cigar STRING NOT NULL, \n\
+ r_rnext STRING NOT NULL, \n\
+ r_pnext INT NOT NULL, \n\
+ r_tlen INT NOT NULL, \n\
+ r_seq STRING NOT NULL, \n\
+ r_qual STRING NOT NULL, \n\
+ CONSTRAINT
paired_secondary_alignments_"LLFMT"_pkey_l_virtual_offset_r_virtual_offset \n\
+ PRIMARY KEY (l_virtual_offset, r_virtual_offset) \n\
+ ); \n\
+ \n\
+ CREATE TABLE bam.unpaired_alignments_"LLFMT" ( \n\
+ virtual_offset BIGINT NOT NULL, \n\
+ qname STRING NOT NULL, \n\
+ flag SMALLINT NOT NULL, \n\
+ rname STRING NOT NULL, \n\
+ pos INT NOT NULL, \n\
+ mapq SMALLINT NOT NULL, \n\
+ cigar STRING NOT NULL, \n\
+ rnext STRING NOT NULL, \n\
+ pnext INT NOT NULL, \n\
+ tlen INT NOT NULL, \n\
+ seq STRING NOT NULL, \n\
+ qual STRING NOT NULL, \n\
+ CONSTRAINT unpaired_alignments_"LLFMT"_pkey_virtual_offset PRIMARY KEY
(virtual_offset) \n\
+ ); \n\
+ \n\
+ CREATE TABLE bam.alignments_extra_"LLFMT" ( \n\
+ tag CHAR(2) NOT NULL, \n\
+ virtual_offset BIGINT NOT NULL, \n\
+ type CHAR(1) NOT NULL, \n\
+ value STRING, \n\
+ CONSTRAINT alignments_extra_"LLFMT"_pkey_tag_virtual_offset PRIMARY KEY
(tag, virtual_offset) \n\
+ ); \n\
+ \n\
+ CREATE VIEW bam.unpaired_primary_alignments_"LLFMT" AS \n\
+ SELECT l_virtual_offset AS virtual_offset, qname, l_flag AS flag,
l_rname AS rname, l_pos AS pos, l_mapq AS mapq, \n\
+ l_cigar AS cigar, l_rnext AS rnext, l_pnext AS pnext, l_tlen AS
tlen, l_seq AS seq, l_qual AS qual \n\
+ FROM bam.paired_primary_alignments_"LLFMT" \n\
+ UNION ALL \n\
+ SELECT r_virtual_offset AS virtual_offset, qname, r_flag AS flag,
r_rname AS rname, r_pos AS pos, r_mapq AS mapq, \n\
+ r_cigar AS cigar, r_rnext AS rnext, r_pnext AS pnext, r_tlen AS
tlen, r_seq AS seq, r_qual AS qual \n\
+ FROM bam.paired_primary_alignments_"LLFMT"; \n\
+ \n\
+ CREATE VIEW bam.unpaired_secondary_alignments_"LLFMT" AS \n\
+ SELECT l_virtual_offset AS virtual_offset, qname, l_flag AS flag,
l_rname AS rname, l_pos AS pos, l_mapq AS mapq, \n\
+ l_cigar AS cigar, l_rnext AS rnext, l_pnext AS pnext, l_tlen AS
tlen, l_seq AS seq, l_qual AS qual \n\
+ FROM bam.paired_secondary_alignments_"LLFMT" \n\
+ UNION ALL \n\
+ SELECT r_virtual_offset AS virtual_offset, qname, r_flag AS flag,
r_rname AS rname, r_pos AS pos, r_mapq AS mapq, \n\
+ r_cigar AS cigar, r_rnext AS rnext, r_pnext AS pnext, r_tlen AS
tlen, r_seq AS seq, r_qual AS qual \n\
+ FROM bam.paired_secondary_alignments_"LLFMT"; \n\
+ \n\
+ CREATE VIEW bam.unpaired_all_alignments_"LLFMT" AS \n\
+ SELECT * \n\
+ FROM bam.unpaired_primary_alignments_"LLFMT" \n\
+ UNION ALL \n\
+ SELECT * \n\
+ FROM bam.unpaired_secondary_alignments_"LLFMT" \n\
+ UNION ALL \n\
+ SELECT * \n\
+ FROM bam.unpaired_alignments_"LLFMT";"
+
+#define SQL_DROP_HEADER \
+ "DELETE FROM bam.pg WHERE file_id = "LLFMT";\n" \
+ "DELETE FROM bam.rg WHERE file_id = "LLFMT";\n" \
+ "DELETE FROM bam.sq WHERE file_id = "LLFMT";\n" \
+ "DELETE FROM bam.files WHERE file_id = "LLFMT";\n"
+
+#define SQL_DROP_STORAGE_0 \
+ "DROP TABLE bam.alignments_"LLFMT";\n" \
+ "DROP TABLE bam.alignments_extra_"LLFMT";\n"
+
+#define SQL_DROP_STORAGE_1 \
+ "DROP VIEW bam.unpaired_all_alignments_"LLFMT";\n"\
+ "DROP VIEW bam.unpaired_secondary_alignments_"LLFMT";\n"\
+ "DROP VIEW bam.unpaired_primary_alignments_"LLFMT";\n"\
+ "DROP TABLE bam.paired_primary_alignments_"LLFMT";\n" \
+ "DROP TABLE bam.paired_secondary_alignments_"LLFMT";\n" \
+ "DROP TABLE bam.unpaired_alignments_"LLFMT";\n" \
+ "DROP TABLE bam.alignments_extra_"LLFMT";\n"
+
+
+
+#define SQL_COPY_INTO_FILES "COPY BINARY INTO bam.files FROM ('%s', '%s',
'%s', '%s', '%s', '%s');\n"
+#define SQL_COPY_INTO_SQ "COPY BINARY INTO bam.sq FROM ('%s', '%s',
'%s', '%s', '%s', '%s', '%s');\n"
+#define SQL_COPY_INTO_RG "COPY BINARY INTO bam.rg FROM ('%s', '%s',
'%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');\n"
+#define SQL_COPY_INTO_PG "COPY BINARY INTO bam.pg FROM ('%s', '%s',
'%s', '%s', '%s', '%s');\n"
+
+#define SQL_COPY_INTO_ALIGNMENTS "COPY BINARY INTO bam.%salignments_"LLFMT"
FROM \
+ ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s',
'%s');\n"
+#define SQL_COPY_INTO_PAIRED_ALIGNMENTS "COPY BINARY INTO
bam.paired_%s_alignments_"LLFMT" FROM \
+ ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', \
+ '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');\n"
+
+#define SQL_COPY_INTO_ALIGNMENTS_EXTRA "COPY BINARY INTO
bam.alignments_extra_"LLFMT" FROM ('%s', '%s', '%s', '%s');\n"
+
+#define BUF_SIZE_CREATE_STORAGE_0 2048
+#define BUF_SIZE_CREATE_STORAGE_1 8192
+#define BUF_SIZE_DROP_FILE 1024
+#define BUF_SIZE_COPY_INTO 8192
+
+
+
+
+/* Some buffers */
+char buf_sql_create_storage_0[BUF_SIZE_CREATE_STORAGE_0];
+char buf_sql_create_storage_1[BUF_SIZE_CREATE_STORAGE_1];
+char buf_sql_drop_file[BUF_SIZE_DROP_FILE];
+char buf_sql_copy_into[BUF_SIZE_COPY_INTO];
+
+
+
+
+/* TODO Find out if executed SQL queries get logged somewhere else already,
since in that case we shouldn't log it again */
+
+str
+create_schema_if_not_exists(Client cntxt, mvc *m, str schemaname, str descr,
sql_schema **ret) {
+ sql_schema *result;
+ if((result = mvc_bind_schema(m, schemaname)) == NULL) {
+ char buf_sql_create_schema[64];
+ str sql_create_schema = buf_sql_create_schema;
+ str msg;
+
+ snprintf(sql_create_schema, 64, "CREATE SCHEMA %s;", schemaname);
+
+ TO_LOG("<bam_loader> Creating schema '%s'...", schemaname);
+ RUN_SQL(cntxt, &sql_create_schema, descr, msg);
+ if(msg != MAL_SUCCEED) {
+ REUSE_EXCEPTION(msg, MAL, "create_schema_if_not_exists", "Could
not create bam schema: %s", msg);
+ return msg;
+ }
+ if((result = mvc_bind_schema(m, schemaname)) == NULL) {
+ throw(MAL, "create_schema_if_not_exists", "Could not create bam
schema");
+ }
+ }
+ if(ret) *ret = result;
+ return MAL_SUCCEED;
+}
+
+
+/**
+ * Function tries to bind to a table with the given name. If it fails (==
NULL), it attempts to create the table.
+ * The function fails if a binding to the table is impossible, even after
creation.
+ * If the function succeeds, it adjusts the optionally given pointer to point
to the binded sql_table.
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list