Changeset: 6a45c22ca31d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6a45c22ca31d
Modified Files:
sql/backends/monet5/bam/Tests/bam_lib.stable.err
sql/backends/monet5/bam/Tests/bam_lib.stable.out
sql/backends/monet5/bam/bam_lib.c
Branch: bamloader
Log Message:
Replaced linear time case-statement by constant time dictionary lookup for
reverse_seq udf, also improved its output
diffs (296 lines):
diff --git a/sql/backends/monet5/bam/Tests/bam_lib.stable.err
b/sql/backends/monet5/bam/Tests/bam_lib.stable.err
--- a/sql/backends/monet5/bam/Tests/bam_lib.stable.err
+++ b/sql/backends/monet5/bam/Tests/bam_lib.stable.err
@@ -1,9 +1,9 @@
stderr of test 'bam_lib` in directory 'sql/backends/monet5/bam` itself:
-# 09:28:35 >
-# 09:28:35 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"mapi_open=true" "--set" "mapi_port=33399" "--set"
"mapi_usock=/var/tmp/mtest-22883/.s.monetdb.33399" "--set" "monet_prompt="
"--forcemito" "--set" "mal_listing=2"
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
"--set" "mal_listing=0" "--set" "embedded_r=yes"
-# 09:28:35 >
+# 14:48:31 >
+# 14:48:31 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"mapi_open=true" "--set" "mapi_port=35990" "--set"
"mapi_usock=/var/tmp/mtest-20228/.s.monetdb.35990" "--set" "monet_prompt="
"--forcemito" "--set" "mal_listing=2"
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
"--set" "mal_listing=0" "--set" "embedded_r=yes"
+# 14:48:31 >
# builtin opt gdk_dbpath = /home/robin/MonetDB/PREFIX/var/monetdb5/dbfarm/demo
# builtin opt gdk_debug = 0
@@ -17,71 +17,23 @@ stderr of test 'bam_lib` in directory 's
# builtin opt sql_debug = 0
# cmdline opt gdk_nr_threads = 0
# cmdline opt mapi_open = true
-# cmdline opt mapi_port = 33399
-# cmdline opt mapi_usock = /var/tmp/mtest-22883/.s.monetdb.33399
+# cmdline opt mapi_port = 35990
+# cmdline opt mapi_usock = /var/tmp/mtest-20228/.s.monetdb.35990
# cmdline opt monet_prompt =
# cmdline opt mal_listing = 2
# cmdline opt gdk_dbpath =
/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam
# cmdline opt mal_listing = 0
# cmdline opt embedded_r = yes
# cmdline opt gdk_debug = 536870922
-# <bam_loader>: Loader started for 1 BAM file...
-# # <bam_loader> Retrieving next file id...
-# # <bam_loader> Initializing BAM wrapper for file
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'...
-# # <bam_loader> Parsing header for file
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'...
-# # <bam_loader> Creating alignment tables for file
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'...
-# # CREATE TABLE bam.alignments_1 (
-# virtual_offset BIGINT NOT NULL,
-# qname STRING NOT
NULL,
-# flag SMALLINT
NOT NULL,
-# rname STRING NOT
NULL,
-# pos INT
NOT NULL,
-# mapq SMALLINT
NOT NULL,
-# cigar STRING NOT
NULL,
-# rnext STRING NOT
NULL,
-# pnext INT
NOT NULL,
-# tlen INT
NOT NULL,
-# seq STRING
NOT NULL,
-# qual STRING
NOT NULL,
-# CONSTRAINT alignments_1_pkey_virtual_offset PRIMARY KEY
(virtual_offset)
-# );
-#
-# CREATE TABLE bam.alignments_extra_1 (
-# tag CHAR(2)
NOT NULL,
-# virtual_offset BIGINT NOT NULL,
-# type CHAR(1)
NOT NULL,
-# value STRING,
-# CONSTRAINT alignments_extra_1_pkey_tag_virtual_offset PRIMARY
KEY (tag, virtual_offset),
-# CONSTRAINT alignments_extra_1_fkey_virtual_offset FOREIGN KEY
(virtual_offset)
-# REFERENCES bam.alignments_1 (virtual_offset)
-# );
-# # <bam_loader> Creating reader threads...
-# # <bam_loader> Waiting for reader threads to finish...
-# # <Thread 0> Starting on next file...
-# # <Thread 0> Processing alignments of file
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'
(file id 1)...
-# # <Thread 0> All alignments in file
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'
(file id 1) processed!
-# # <Thread 0> Starting on next file...
-# # <Thread 0> No files left to work on; thread done
-# # <bam_loader> Copying data into DB...
-# # COPY BINARY INTO bam.files FROM ('bam_binaries/1/files_0',
'bam_binaries/1/files_1', 'bam_binaries/1/files_2', 'bam_binaries/1/files_3',
'bam_binaries/1/files_4', 'bam_binaries/1/files_5');
-# COPY BINARY INTO bam.sq FROM ('bam_binaries/1/sq_0',
'bam_binaries/1/sq_1', 'bam_binaries/1/sq_2', 'bam_binaries/1/sq_3',
'bam_binaries/1/sq_4', 'bam_binaries/1/sq_5', 'bam_binaries/1/sq_6');
-# COPY BINARY INTO bam.pg FROM ('bam_binaries/1/pg_0',
'bam_binaries/1/pg_1', 'bam_binaries/1/pg_2', 'bam_binaries/1/pg_3',
'bam_binaries/1/pg_4', 'bam_binaries/1/pg_5');
-# COPY BINARY INTO bam.alignments_1 FROM ('bam_binaries/1/alignments_0',
'bam_binaries/1/alignments_1', 'bam_binaries/1/alignments_2',
'bam_binaries/1/alignments_3', 'bam_binaries/1/alignments_4',
'bam_binaries/1/alignments_5', 'bam_binaries/1/alignments_6',
'bam_binaries/1/alignments_7', 'bam_binaries/1/alignments_8',
'bam_binaries/1/alignments_9', 'bam_binaries/1/alignments_10',
'bam_binaries/1/alignments_11');
-# COPY BINARY INTO bam.alignments_extra_1 FROM
('bam_binaries/1/alignments_extra_0', 'bam_binaries/1/alignments_extra_1',
'bam_binaries/1/alignments_extra_2', 'bam_binaries/1/alignments_extra_3');
-#
-# # <bam_loader>: Loader finished processing 1 BAM file...
-#
-# 09:28:35 >
-# 09:28:35 > "/usr/bin/python2" "bam_lib.SQL.py" "bam_lib"
-# 09:28:35 >
-MAPI = (monetdb) /var/tmp/mtest-22883/.s.monetdb.33399
+# 14:48:31 >
+# 14:48:31 > "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e"
"--host=/var/tmp/mtest-20228" "--port=35990"
+# 14:48:31 >
+
+MAPI = (monetdb) /var/tmp/mtest-20228/.s.monetdb.35990
QUERY = SELECT bam_flag(111, 'Fail-hard');
ERROR = !Unknown flag name given: Fail-hard
-MAPI = (monetdb) /var/tmp/mtest-22883/.s.monetdb.33399
-QUERY = SELECT reverse_seq('invalidchars');
-ERROR = !Invalid character found in sequence: 'i'
-# 09:28:36 >
-# 09:28:36 > "Done."
-# 09:28:36 >
+# 14:48:32 >
+# 14:48:32 > "Done."
+# 14:48:32 >
diff --git a/sql/backends/monet5/bam/Tests/bam_lib.stable.out
b/sql/backends/monet5/bam/Tests/bam_lib.stable.out
--- a/sql/backends/monet5/bam/Tests/bam_lib.stable.out
+++ b/sql/backends/monet5/bam/Tests/bam_lib.stable.out
@@ -1,9 +1,9 @@
stdout of test 'bam_lib` in directory 'sql/backends/monet5/bam` itself:
-# 08:43:53 >
-# 08:43:53 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"mapi_open=true" "--set" "mapi_port=35352" "--set"
"mapi_usock=/var/tmp/mtest-9342/.s.monetdb.35352" "--set" "monet_prompt="
"--forcemito" "--set" "mal_listing=2"
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
"--set" "mal_listing=0" "--set" "embedded_r=yes"
-# 08:43:53 >
+# 14:48:31 >
+# 14:48:31 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"mapi_open=true" "--set" "mapi_port=35990" "--set"
"mapi_usock=/var/tmp/mtest-20228/.s.monetdb.35990" "--set" "monet_prompt="
"--forcemito" "--set" "mal_listing=2"
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
"--set" "mal_listing=0" "--set" "embedded_r=yes"
+# 14:48:31 >
# MonetDB 5 server v11.20.0
# This is an unreleased version
@@ -13,47 +13,16 @@ stdout of test 'bam_lib` in directory 's
# Copyright (c) 1993-July 2008 CWI.
# Copyright (c) August 2008-2014 MonetDB B.V., all rights reserved
# Visit http://www.monetdb.org/ for further information
-# Listening for connection requests on mapi:monetdb://robin-xps13:35352/
-# Listening for UNIX domain connection requests on
mapi:monetdb:///var/tmp/mtest-9342/.s.monetdb.35352
+# Listening for connection requests on mapi:monetdb://robin-xps13:35990/
+# Listening for UNIX domain connection requests on
mapi:monetdb:///var/tmp/mtest-20228/.s.monetdb.35990
# MonetDB/SQL module loaded
# MonetDB/R module loaded
Ready.
-# SQL catalog created, loading sql scripts once
-# loading sql script: 09_like.sql
-# loading sql script: 10_math.sql
-# loading sql script: 11_times.sql
-# loading sql script: 12_url.sql
-# loading sql script: 13_date.sql
-# loading sql script: 14_inet.sql
-# loading sql script: 15_querylog.sql
-# loading sql script: 16_tracelog.sql
-# loading sql script: 19_cluster.sql
-# loading sql script: 20_vacuum.sql
-# loading sql script: 21_dependency_functions.sql
-# loading sql script: 22_clients.sql
-# loading sql script: 23_skyserver.sql
-# loading sql script: 24_zorder.sql
-# loading sql script: 25_debug.sql
-# loading sql script: 26_sysmon.sql
-# loading sql script: 39_analytics.sql
-# loading sql script: 39_analytics_hge.sql
-# loading sql script: 40_json.sql
-# loading sql script: 40_json_hge.sql
-# loading sql script: 41_jsonstore.sql
-# loading sql script: 45_uuid.sql
-# loading sql script: 75_storagemodel.sql
-# loading sql script: 80_statistics.sql
-# loading sql script: 80_udf.sql
-# loading sql script: 80_udf_hge.sql
-# loading sql script: 85_bam.sql
-# loading sql script: 89_generator_hge.sql
-# loading sql script: 90_generator.sql
-# loading sql script: 99_system.sql
-# 08:43:54 >
-# 08:43:54 > "/usr/bin/python2" "bam_lib.SQL.py" "bam_lib"
-# 08:43:54 >
+# 14:48:31 >
+# 14:48:31 > "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e"
"--host=/var/tmp/mtest-20228" "--port=35990"
+# 14:48:31 >
#SET SCHEMA bam;
#SELECT bam_flag(1, 'mult_segm');
@@ -456,6 +425,12 @@ Ready.
% clob # type
% 15 # length
[ "NVBHDKMWSRYCGAT" ]
+#SELECT reverse_seq('invalidchars');
+% .L # table_name
+% reverse_seq_single_value # name
+% clob # type
+% 12 # length
+[ "????????????" ]
#SELECT seq, reverse_seq(seq) AS reverse_seq
#FROM bam.alignments_1;
% bam.alignments_1, bam.L # table_name
@@ -780,6 +755,6 @@ Ready.
% 8, 100, 8, 21, 1 # length
[ 17922987,
"TATACTTATAGAACAAATGAACCCAAAACCACATAAGGTAAACAACAAAGCTACTGGTTCAAAATTAAGCCTAACTTCAACAGTACCAGGCAAAAACCAT",
17922987, "3=1X1=1X43=1X16=1X33=", "T" ]
-# 08:43:54 >
-# 08:43:54 > "Done."
-# 08:43:54 >
+# 14:48:32 >
+# 14:48:32 > "Done."
+# 14:48:32 >
diff --git a/sql/backends/monet5/bam/bam_lib.c
b/sql/backends/monet5/bam/bam_lib.c
--- a/sql/backends/monet5/bam/bam_lib.c
+++ b/sql/backends/monet5/bam/bam_lib.c
@@ -54,68 +54,51 @@ bam_flag(bit * ret, sht * flag, str * na
return MAL_SUCCEED;
}
+char reverse_seq_map[] = {
+ 'T', //A
+ 'V', //B
+ 'G', //C
+ 'H', //D
+ 0 , //E
+ 0 , //F
+ 'C', //G
+ 'D', //H
+ 0 , //I
+ 0 , //J
+ 'M', //K
+ 0 , //L
+ 'K', //M
+ 'N', //N
+ 0 , //O
+ 0 , //P
+ 0 , //Q
+ 'Y', //R
+ 'S', //S
+ 'A', //T
+ 0 , //U
+ 'B', //V
+ 'W', //W
+ 0 , //X
+ 'R' //Y
+};
+
str
reverse_seq(str * ret, str * seq)
{
str result;
unsigned int i;
unsigned int len = strlen(*seq);
+ sht map_index;
result = GDKmalloc((len + 1) * sizeof(char));
if (result == NULL)
throw(MAL, "reverse_seq", MAL_MALLOC_FAIL);
+
for (i = 0; i < len; ++i) {
- switch ((*seq)[i]) {
- case 'A':
- result[len - i - 1] = 'T';
- break;
- case 'T':
- result[len - i - 1] = 'A';
- break;
- case 'C':
- result[len - i - 1] = 'G';
- break;
- case 'G':
- result[len - i - 1] = 'C';
- break;
- case 'R':
- result[len - i - 1] = 'Y';
- break;
- case 'Y':
- result[len - i - 1] = 'R';
- break;
- case 'S':
- result[len - i - 1] = 'S';
- break;
- case 'W':
- result[len - i - 1] = 'W';
- break;
- case 'K':
- result[len - i - 1] = 'M';
- break;
- case 'M':
- result[len - i - 1] = 'K';
- break;
- case 'H':
- result[len - i - 1] = 'D';
- break;
- case 'D':
- result[len - i - 1] = 'H';
- break;
- case 'V':
- result[len - i - 1] = 'B';
- break;
- case 'B':
- result[len - i - 1] = 'V';
- break;
- case 'N':
- result[len - i - 1] = 'N';
- break;
- default:
- GDKfree(result);
- throw(MAL, "reverse_seq",
- "Invalid character found in sequence: '%c'\n",
- (*seq)[i]);
+ map_index = (sht)((*seq)[i] - 'A');
+ if(map_index < 0 || map_index > 24 ||
+ (result[len - i - 1] =
reverse_seq_map[map_index]) == 0) {
+ result[len - i - 1] = '?';
}
}
result[len] = '\0';
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list