Changeset: a8bb9d5909ae for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a8bb9d5909ae
Modified Files:
        sql/backends/monet5/bam/bam_globals.h
        sql/backends/monet5/bam/bam_loader.c
        sql/backends/monet5/bam/bam_wrapper.c
        sql/backends/monet5/bam/bam_wrapper.h
Branch: bamloader
Log Message:

Started with support for loading SAM files. Not finished yet due to annoying 
problem with Samtools. Will look further into it after implementing SAM/BAM 
export functionality.


diffs (truncated from 1284 to 300 lines):

diff --git a/sql/backends/monet5/bam/bam_globals.h 
b/sql/backends/monet5/bam/bam_globals.h
--- a/sql/backends/monet5/bam/bam_globals.h
+++ b/sql/backends/monet5/bam/bam_globals.h
@@ -38,7 +38,6 @@
 #endif
 
 
-
 #ifdef BAM_DEBUG
 
 /**
diff --git a/sql/backends/monet5/bam/bam_loader.c 
b/sql/backends/monet5/bam/bam_loader.c
--- a/sql/backends/monet5/bam/bam_loader.c
+++ b/sql/backends/monet5/bam/bam_loader.c
@@ -33,6 +33,22 @@
 #include "bam_loader.h"
 
 
+/* Macro that checks whether or not a filename ends with either .sam or .bam 
+ *(case insensitive) */
+#define IS_SAMORBAM(filename, len) \
+    ((filename[len-4] == '.') && \
+     (filename[len-3] == 'b' || filename[len-3] == 'B' || \
+         filename[len-3] == 's' || filename[len-4] == 's') && \
+     (filename[len-2] == 'a' || filename[len-2] == 'A') && \
+     (filename[len-1] == 'm' || filename[len-1] == 'M'))
+
+/* Given a filename that ends on either .sam or .bam, this macro checks if 
+ * it ends on .bam */
+#define IS_BAM(samorbam_filename, len) \
+    (samorbam_filename[len-3] == 'b' || samorbam_filename[len-3] == 'B')
+    
+
+
 typedef struct reader_thread_data {
        sht thread_id;
        /* BAM wrappers of all BAM files that have to be processed */
@@ -126,7 +142,7 @@ run_process_bam_alignments(void *d)
               bw->file_id);
 
        if ((data->msg =
-            process_bam_alignments(bw, data->failure)) != MAL_SUCCEED) {
+            process_alignments(bw, data->failure)) != MAL_SUCCEED) {
                TO_LOG("<Thread %d> Error while processing alignments of file 
'%s' (file id " LLFMT ") (%s)\n", data->thread_id, bw->file_location, 
bw->file_id, data->msg);
                REUSE_EXCEPTION(data->msg, MAL, "run_process_bam_alignments",
                                "Error while processing alignments of file '%s' 
(file id "
@@ -250,10 +266,11 @@ bam_loader(Client cntxt, MalBlkPtr mb, s
        memset(bws, 0, nr_files * sizeof(bam_wrapper));
 
        for (i = 0; i < nr_files; ++i) {
+           int fln = strlen(filenames[i]);
                TO_LOG("<bam_loader> Initializing BAM wrapper for file 
'%s'...\n", filenames[i]);
                if ((msg =
-                    init_bam_wrapper(bws + i, filenames[i], cur_file_id++,
-                                     dbschema)) != MAL_SUCCEED) {
+                    init_bam_wrapper(bws + i, (IS_BAM(filenames[i], fln) ? BAM 
: SAM), 
+                             filenames[i], cur_file_id++, dbschema)) != 
MAL_SUCCEED) {
                        goto cleanup;
                }
        }
@@ -262,7 +279,7 @@ bam_loader(Client cntxt, MalBlkPtr mb, s
        for (i = 0; i < nr_files; ++i) {
                TO_LOG("<bam_loader> Parsing BAM header for file '%s'...\n",
                       filenames[i]);
-               if ((msg = process_bam_header(bws + i)) != MAL_SUCCEED) {
+               if ((msg = process_header(bws + i)) != MAL_SUCCEED) {
                        goto cleanup;
                }
        }
@@ -408,14 +425,6 @@ bam_loader(Client cntxt, MalBlkPtr mb, s
 }
 
 
-/* Macro that checks whether or not a filename ends with .bam (case
- * insensitive) */
-#define IS_BAM(filename, len) \
-    (filename[len-4] == '.') && \
-    (filename[len-3] == 'b' || filename[len-3] == 'B') && \
-    (filename[len-2] == 'a' || filename[len-2] == 'A') && \
-    (filename[len-1] == 'm' || filename[len-1] == 'M') \
-
 /**
  * Gathers all BAM files in the given repository and calls bam_loader for 
these files
  */
@@ -452,17 +461,17 @@ bam_loader_repos(Client cntxt, MalBlkPtr
                goto cleanup;
        }
 
-       /* First, count number of BAM files */
+       /* First, count number of SAM/BAM files */
        while ((direntry = readdir(d)) != NULL) {
                int len = strlen(direntry->d_name);
 
-               if (IS_BAM(direntry->d_name, len))
+               if (IS_SAMORBAM(direntry->d_name, len))
                        ++filecount;
        }
 
        if (filecount == 0) {
                msg = createException(MAL, "bam_loader_repos",
-                                     "No BAM files found in directory '%s'",
+                                     "No SAM or BAM files found in directory 
'%s'",
                                      bam_repos);
                goto cleanup;
        }
@@ -484,18 +493,17 @@ bam_loader_repos(Client cntxt, MalBlkPtr
         * slashes in a path, but as far as I know, this is no problem
         * on all OS's */
        while ((direntry = readdir(d)) != NULL) {
-               /* Check if d_name has the .bam extension (case
+               /* Check if d_name has the .sam or .bam extension (case
                 * insensitive) */
                int len = strlen(direntry->d_name);
-
-               if (IS_BAM(direntry->d_name, len)) {
-                       /* This is a BAM file, construct its path and
+               if (IS_SAMORBAM(direntry->d_name, len)) {
+                       /* This is a SAM or BAM file, construct its path and
                         * add that to the files array */
                        if (snprintf
                            (path, 4096, "%s/%s", bam_repos,
                             direntry->d_name) < 0) {
                                msg = createException(MAL, "bam_loader_repos",
-                                                     "Could not construct 
filepath for BAM file '%s'",
+                                                     "Could not construct 
filepath for SAM/BAM file '%s'",
                                                      direntry->d_name);
                                goto cleanup;
                        }
diff --git a/sql/backends/monet5/bam/bam_wrapper.c 
b/sql/backends/monet5/bam/bam_wrapper.c
--- a/sql/backends/monet5/bam/bam_wrapper.c
+++ b/sql/backends/monet5/bam/bam_wrapper.c
@@ -24,8 +24,9 @@
 #include "monetdb_config.h"
 #include "mal_exception.h"
 #include "stream.h"
+#include "bam_globals.h"
 
-#include <samtools/bam.h>
+#include <samtools/sam.h>
 #ifdef HAVE_SAMTOOLS_KSTRING_H
 #include <samtools/kstring.h>
 #else
@@ -33,7 +34,6 @@
  * version */
 #include "mykstring.h"
 #endif
-#include "bam_globals.h"
 #include "bam_wrapper.h"
 
 str
@@ -63,7 +63,6 @@ get_ordering(str ord)
        return ORDERING_UNKNOWN;
 }
 
-
 static stream *
 bsopen(str filepath)
 {
@@ -96,8 +95,8 @@ bsopen(str filepath)
  * working directory.
  */
 str
-init_bam_wrapper(bam_wrapper * bw, str file_location, lng file_id,
-                sht dbschema)
+init_bam_wrapper(bam_wrapper * bw, filetype type, str file_location, 
+         lng file_id, sht dbschema)
 {
        unsigned int i;
        char flushdir[128];
@@ -120,22 +119,35 @@ init_bam_wrapper(bam_wrapper * bw, str f
                      file_location, flushdir, strerror(errno));
        }
 
-       /* Open BAM file */
-       if ((bw->input = bam_open(file_location, "r")) == NULL) {
-               throw(MAL, "init_bam_wrapper",
-                     ERR_INIT_BAM_WRAPPER "BAM file could not be opened",
-                     file_location);
-       }
-
-       /* Get BAM header */
-       if ((bw->header = bam_header_read(bw->input)) == NULL) {
-               throw(MAL, "init_bam_wrapper",
-                     ERR_INIT_BAM_WRAPPER "Unable to read header from file",
-                     file_location);
-       }
-
+    if (type == BAM) {
+           /* Open BAM file and read its header */
+           if ((bw->bam.input = bam_open(file_location, "r")) == NULL) {
+                   throw(MAL, "init_bam_wrapper",
+                         ERR_INIT_BAM_WRAPPER "BAM file could not be opened",
+                         file_location);
+           }
+           if ((bw->header = bam_header_read(bw->bam.input)) == NULL) {
+               throw(MAL, "init_bam_wrapper",
+                   ERR_INIT_BAM_WRAPPER "Unable to read header from file",
+                   file_location);
+        }
+    } else {
+        /* Open SAM file and read its header */
+        if ((bw->sam.input = samopen(file_location, "r", NULL)) == NULL) {
+                   throw(MAL, "init_bam_wrapper",
+                         ERR_INIT_BAM_WRAPPER "SAM file could not be opened",
+                         file_location);
+           }
+           if ((bw->header = bw->sam.input->header) == NULL) {
+               throw(MAL, "init_bam_wrapper",
+                   ERR_INIT_BAM_WRAPPER "Unable to read header from file",
+                   file_location);
+        }
+    }
+    
        /* Set ordering to unknown, since we don't know until we have
         * processed the header */
+       bw->type = type;
        bw->ord = ORDERING_UNKNOWN;
 
        bw->file_id = file_id;
@@ -247,10 +259,10 @@ init_bam_wrapper(bam_wrapper * bw, str f
 }
 
 static void
-close_streams(bam_wrapper * bw, bit unlink_files)
+close_write_streams(bam_wrapper * bw, bit unlink_files)
 {
        int i;
-
+    
        for (i = 0; i < 6; ++i) {
                if (bw->files[i]) {
                        close_stream(bw->files[i]);
@@ -324,7 +336,7 @@ close_streams(bam_wrapper * bw, bit unli
 void
 prepare_for_copy(bam_wrapper * bw)
 {
-       close_streams(bw, FALSE);
+       close_write_streams(bw, FALSE);
 }
 
 void
@@ -332,16 +344,22 @@ clear_bam_wrapper(bam_wrapper * bw)
 {
        char flushdir[128];
 
-       /* Clear fields */
-       if (bw->header) {
-               bam_header_destroy(bw->header);
-       }
-       if (bw->input) {
-               bam_close(bw->input);
-       }
+       /* Clear bam/sam specific fields */
+       if (bw->type == BAM) {
+           if (bw->header) {
+                   bam_header_destroy(bw->header);
+           }
+           if (bw->bam.input) {
+                   bam_close(bw->bam.input);
+           }
+    } else {
+        if (bw->sam.input) {
+            samclose(bw->sam.input);
+        }
+    }
 
        /* Close file streams and remove files */
-       close_streams(bw, TRUE);
+       close_write_streams(bw, TRUE);
 
        /* Finally, attempt to remove flush directory */
        snprintf(flushdir, 128, DIR_BINARIES "/" LLFMT, bw->file_id);
@@ -422,7 +440,7 @@ typedef struct bam_header_line {
 } bam_header_line;
 
 
-#define ERR_PROCESS_BAM_HEADER_LINE "Could not parse a header line in BAM file 
'%s': "
+#define ERR_PROCESS_HEADER_LINE "Could not parse a header line in BAM file 
'%s': "
 
 /**
  * Parses the next BAM header line from the given header.
@@ -430,13 +448,13 @@ typedef struct bam_header_line {
  * and attempts to parse it into the provided bam_header_line
  * structure. In case the function fails, the calling function must
  * call clear_bam_header_line to free possible resources that are
- * malloced by process_bam_header_line.  The *eof flag will be set to
+ * malloced by process_header_line.  The *eof flag will be set to
  * TRUE if the input doesn't contain a header line anymore.  The
  * function needs the file_location in order to generate decent error
  * messages
  */
 static str
-process_bam_header_line(str * header, bam_header_line * ret_hl, bit * eof,
+process_header_line(str * header, bam_header_line * ret_hl, bit * eof,
                        str file_location)
 {
        bam_header_option *opt = NULL;
@@ -457,8 +475,8 @@ process_bam_header_line(str * header, ba
 
        if (**header != '@') {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to