[med-svn] [dazzdb] 01/05: Imported Upstream version 1.0+20161112

Afif Elghraoui Wed, 18 Jan 2017 23:02:18 -0800

This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository dazzdb.


commit 49a5b8b9525073504b07e6af19d52e65217de620
Author: Afif Elghraoui <[email protected]>
Date:   Wed Jan 18 22:14:15 2017 -0800

    Imported Upstream version 1.0+20161112
---
 DB.c                     |  95 +++++++++++++-
 DB.h                     |  14 ++-
 DB2quiva.c => DB2arrow.c | 105 ++++++++--------
 DB2quiva.c               |   4 +
 DBdump.c                 |  65 ++++++++--
 DBshow.c                 | 113 +++++++++++++----
 DBsplit.c                |   3 +-
 DBstats.c                |   2 +-
 DBwipe.c                 |  89 +++++++++++++
 Makefile                 |  12 +-
 QV.c                     |  94 ++++++++++++++
 QV.h                     |   3 +
 README.md                | 169 ++++++++++++++++++-------
 quiva2DB.c => arrow2DB.c | 319 +++++++++++++++++++++++++----------------------
 fasta2DAM.c              |   1 +
 fasta2DB.c               |  52 ++++----
 quiva2DB.c               |  12 +-
 17 files changed, 824 insertions(+), 328 deletions(-)

diff --git a/DB.c b/DB.c
index 3764842..3afff14 100644
--- a/DB.c
+++ b/DB.c
@@ -314,6 +314,14 @@ void Upper_Read(char *s)
   *s = '\0';
 }
 
+void Letter_Arrow(char *s)
+{ static char letter[4] = { '1', '2', '3', '4' };
+
+  for ( ; *s != 4; s++)
+    *s = letter[(int) *s];
+  *s = '\0';
+}
+
 //  Convert read in ascii representation to [0-3] representation (end with 4)
 
 void Number_Read(char *s)
@@ -341,6 +349,31 @@ void Number_Read(char *s)
   *s = 4;
 }
 
+void Number_Arrow(char *s)
+{ static char arrow[128] =
+    { 3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 0, 1, 2, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 2,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+    };
+
+  for ( ; *s != '\0'; s++)
+    *s = arrow[(int) *s];
+  *s = 4;
+}
+
 
 
/*******************************************************************************************
  *
@@ -426,7 +459,7 @@ int Open_DB(char* path, HITS_DB *db)
     if (fscanf(dbvis,DB_NBLOCK,&nblocks) != 1)
       if (part == 0)
         { cutoff = 0;
-          all    = 1;
+          all    = DB_ALL;
         }
       else
         { EPRINTF(EPLACE,"%s: DB %s has not yet been partitioned, cannot 
request a block !\n",
@@ -466,7 +499,7 @@ int Open_DB(char* path, HITS_DB *db)
   db->tracks  = NULL;
   db->part    = part;
   db->cutoff  = cutoff;
-  db->all     = all;
+  db->allarr |= all;
   db->ufirst  = ufirst;
   db->tfirst  = tfirst;
 
@@ -557,10 +590,10 @@ void Trim_DB(HITS_DB *db)
 
   if (db->trimmed) return;
 
-  if (db->cutoff <= 0 && db->all) return;
+  if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return;
 
   cutoff = db->cutoff;
-  if (db->all)
+  if ((db->allarr & DB_ALL) != 0)
     allflag = 0;
   else
     allflag = DB_BEST;
@@ -660,10 +693,10 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK 
*track, int ispart)
 
   if (!db->trimmed) return (0);
 
-  if (db->cutoff <= 0 && db->all) return (0);
+  if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return (0);
 
   cutoff = db->cutoff;
-  if (db->all)
+  if ((db->allarr & DB_ALL) != 0)
     allflag = 0;
   else
     allflag = DB_BEST;
@@ -1435,6 +1468,56 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii)
   return (0);
 }
 
+// Load into 'read' the i'th arrow in 'db'.  As an ASCII string if ascii is 1, 
+//   and as a numeric string otherwise.
+//
+
+HITS_DB *Arrow_DB = NULL;         //  Last db/arw used by "Load_Arrow"
+FILE    *Arrow_File = NULL;       //    Becomes invalid after closing
+
+int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
+{ FILE      *arrow;
+  int64      off;
+  int        len, clen;
+  HITS_READ *r = db->reads;
+
+  if (i >= db->nreads)
+    { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
+      EXIT(1);
+    }
+  if (Arrow_DB != db)
+    { fclose(Arrow_File);
+      arrow = Fopen(Catenate(db->path,"","",".arw"),"r");
+      if (arrow == NULL)
+        EXIT(1);
+      Arrow_File = arrow;
+      Arrow_DB   = db;
+    }
+  else
+    arrow = Arrow_File;
+
+  off = r[i].boff;
+  len = r[i].rlen;
+
+  if (ftello(arrow) != off)
+    fseeko(arrow,off,SEEK_SET);
+  clen = COMPRESSED_LEN(len);
+  if (clen > 0)
+    { if (fread(read,clen,1,arrow) != 1)
+        { EPRINTF(EPLACE,"%s: Failed read of .bps file 
(Load_Arrow)\n",Prog_Name);
+          EXIT(1);
+        }
+    }
+  Uncompress_Read(len,read);
+  if (ascii == 1)
+    { Letter_Arrow(read);
+      read[-1] = '\0';
+    }
+  else
+    read[-1] = 4;
+  return (0);
+}
+
 char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
 { FILE      *bases  = (FILE *) db->bases;
   int64      off;
diff --git a/DB.h b/DB.h
index 983bee7..dc281de 100644
--- a/DB.h
+++ b/DB.h
@@ -164,6 +164,9 @@ void Lower_Read(char *s);     //  Convert read from numbers 
to lowercase letters
 void Upper_Read(char *s);     //  Convert read from numbers to uppercase 
letters (0-3 to ACGT)
 void Number_Read(char *s);    //  Convert read from letters to numbers
 
+void Letter_Arrow(char *s);   //  Convert arrow pw's from numbers to uppercase 
letters (0-3 to 1234)
+void Number_Arrow(char *s);   //  Convert arrow pw string from letters to 
numbers
+
 
 
/*******************************************************************************************
  *
@@ -175,6 +178,9 @@ void Number_Read(char *s);    //  Convert read from letters 
to numbers
 #define DB_CSS  0x0400   //  This is the second or later of a group of reads 
from a given insert
 #define DB_BEST 0x0800   //  This is the longest read of a given insert (may 
be the only 1)
 
+#define DB_ARROW 0x2     //  DB is an arrow DB
+#define DB_ALL   0x1     //  all wells are in the trimmed DB
+
 //  Fields have different interpretations if a .db versus a .dam
 
 typedef struct
@@ -185,6 +191,7 @@ typedef struct
                     //    uncompressed bases in memory block
     int64   coff;   //  Offset (in bytes) of compressed quiva streams in 
'.qvs' file (DB),
                     //  Offset (in bytes) of scaffold header string in '.hdr' 
file (DAM)
+                    //  4 compressed shorts containing snr info if an arrow DB.
     int     flags;  //  QV of read + flags above (DB only)
   } HITS_READ;
 
@@ -226,7 +233,7 @@ typedef struct
   { int         ureads;     //  Total number of reads in untrimmed DB
     int         treads;     //  Total number of reads in trimmed DB
     int         cutoff;     //  Minimum read length in block (-1 if not yet 
set)
-    int         all;        //  Consider multiple reads from a given well
+    int         allarr;     //  DB_ALL | DB_ARROW
     float       freq[4];    //  frequency of A, C, G, T, respectively
 
     //  Set with respect to "active" part of DB (all vs block, untrimmed vs 
trimmed)
@@ -368,6 +375,11 @@ char *New_Read_Buffer(HITS_DB *db);
 
 int  Load_Read(HITS_DB *db, int i, char *read, int ascii);
 
+  // Exactly the same as Load_Read, save the arrow information is loaded, not 
the DNA sequence,
+  //   and there is only a choice between numeric (0) or ascii (1);
+
+int  Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
+
   // Load into 'read' the subread [beg,end] of the i'th read in 'db' and 
return a pointer to the
   //   the start of the subinterval (not necessarily = to read !!! ).  As a 
lower case ascii
   //   string if ascii is 1, an upper case ascii string if ascii is 2, and a 
numeric string
diff --git a/DB2quiva.c b/DB2arrow.c
similarity index 59%
copy from DB2quiva.c
copy to DB2arrow.c
index a7b93da..9247ab4 100644
--- a/DB2quiva.c
+++ b/DB2arrow.c
@@ -1,9 +1,9 @@
 
/********************************************************************************************
  *
- *  Recreate all the .quiva files that have been loaded into a specified 
database.
+ *  Recreate all the .arrow files that have been loaded into a specified 
database.
  *
  *  Author:  Gene Myers
- *  Date  :  May 2014
+ *  Date  :  October 2016
  *
  
********************************************************************************************/
 
@@ -12,38 +12,40 @@
 #include <string.h>
 
 #include "DB.h"
-#include "QV.h"
 
-#ifdef HIDE_FILES
-#define PATHSEP "/."
-#else
-#define PATHSEP "/"
-#endif
-
-static char *Usage = "[-vU] <path:db>";
+static char *Usage = "[-v] [-w<int(80)>] <path:db>";
 
 int main(int argc, char *argv[])
 { HITS_DB    _db, *db = &_db;
-  FILE       *dbfile, *quiva;
-  int         VERBOSE, UPPER;
+  FILE       *dbfile;
+  int         VERBOSE, WIDTH;
 
   //  Process arguments
 
   { int   i, j, k;
     int   flags[128];
+    char *eptr;
+
+    ARG_INIT("DB2arrow")
 
-    ARG_INIT("DB2quiva")
+    WIDTH = 80;
 
     j = 1;
     for (i = 1; i < argc; i++)
       if (argv[i][0] == '-')
-        { ARG_FLAGS("vU") }
+        switch (argv[i][1])
+        { default:
+            ARG_FLAGS("vU")
+            break;
+          case 'w':
+            ARG_NON_NEGATIVE(WIDTH,"Line width")
+            break;
+        }
       else
         argv[j++] = argv[i];
     argc = j;
 
     VERBOSE = flags['v'];
-    UPPER   = flags['U'];
 
     if (argc != 2)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
@@ -51,10 +53,10 @@ int main(int argc, char *argv[])
       }
   }
 
-  //  Open db, db stub file, and .qvs file
+  //  Open db, and db stub file
 
-  { char *pwd, *root;
-    int   status;
+  { int   status;
+    char *pwd, *root;
 
     status = Open_DB(argv[1],db);
     if (status < 0)
@@ -67,14 +69,17 @@ int main(int argc, char *argv[])
       { fprintf(stderr,"%s: Cannot be called on a block: 
%s\n",Prog_Name,argv[1]);
         exit (1);
       }
+    if ((db->allarr & DB_ARROW) == 0)
+      { fprintf(stderr,"%s: There is no Arrow information in the DB: 
%s\n",Prog_Name,argv[1]);
+        exit (1);
+      }
 
     pwd    = PathTo(argv[1]);
     root   = Root(argv[1],".db");
     dbfile = Fopen(Catenate(pwd,"/",root,".db"),"r");
-    quiva  = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"r");
     free(pwd);
     free(root);
-    if (dbfile == NULL || quiva == NULL)
+    if (dbfile == NULL)
       exit (1);
   }
 
@@ -84,22 +89,19 @@ int main(int argc, char *argv[])
     char        lname[MAX_NAME];
     FILE       *ofile = NULL;
     int         f, first, last, ofirst, nfiles;
-    QVcoding   *coding;
-    char      **entry;
+    char       *read;
 
     if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
       SYSTEM_ERROR
 
     reads = db->reads;
-    entry = New_QV_Buffer(db);
+    read  = New_Read_Buffer(db);
     first = ofirst = 0;
     for (f = 0; f < nfiles; f++)
       { int   i;
         char  prolog[MAX_NAME], fname[MAX_NAME];
 
-        //  Scan db image file line, create .quiva file for writing
-
-        if (reads[first].coff < 0) break;
+        //  Scan db image file line, create .arrow file for writing
 
         if (fscanf(dbfile,DB_FDATA,&last,fname,prolog) != 3)
           SYSTEM_ERROR
@@ -107,7 +109,7 @@ int main(int argc, char *argv[])
         if (f == 0 || strcmp(fname,lname) != 0)
           { if (f > 0)
               { if (ofile == stdout)
-                  { fprintf(stderr," %d quivas\n",first-ofirst);
+                  { fprintf(stderr," %d reads\n",first-ofirst);
                     fflush(stderr);
                   }
                 else
@@ -124,48 +126,44 @@ int main(int argc, char *argv[])
                   }
               }
             else
-              { if ((ofile = Fopen(Catenate(".","/",fname,".quiva"),"w")) == 
NULL)
+              { if ((ofile = Fopen(Catenate(".","/",fname,".arrow"),"w")) == 
NULL)
                   exit (1);
 
                 if (VERBOSE)
-                  { fprintf(stderr,"Creating %s.quiva ...\n",fname);
-                    fflush(stderr);
+                  { fprintf(stderr,"Creating %s.arrow ...\n",fname);
+                    fflush(stdout);
                   }
               }
 
             strcpy(lname,fname);
           }
 
-        //   For the relevant range of reads, write the header for each to the 
file
-        //     and then uncompress and write the quiva entry for each
-
-        coding = Read_QVcoding(quiva);
+        //   For the relevant range of reads, write each to the file
+        //     recreating the original headers with the index meta-data about 
each read
 
         for (i = first; i < last; i++)
-          { int        e, flags, qv, rlen;
+          { int        j, len;
+            uint64     big;
+            float      snr[4];
             HITS_READ *r;
 
             r     = reads + i;
-            flags = r->flags;
-            rlen  = r->rlen;
-            qv    = (flags & DB_QV);
-            
fprintf(ofile,"@%s/%d/%d_%d",prolog,r->origin,r->fpulse,r->fpulse+rlen);
-            if (qv > 0)
-              fprintf(ofile," RQ=0.%3d",qv);
+            len   = r->rlen;
+            big   = *((uint64 *) &(r->coff));
+            for (j = 0; j < 4; j++)
+              { snr[3-j] = (big & 0xffff) / 100.;
+                big    >>= 16;
+              }
+            fprintf(ofile,">%s",prolog);
+            fprintf(ofile," 
SN=%.2f,%.2f,%.2f,%.2f",snr[0],snr[1],snr[2],snr[3]);
             fprintf(ofile,"\n");
 
-            Uncompress_Next_QVentry(quiva,entry,coding,rlen);
-
-            if (UPPER)
-              { char *deltag = entry[1];
-                int   j;
-
-                for (j = 0; j < rlen; j++)
-                  deltag[j] -= 32;
-              }
+            Load_Arrow(db,i,read,1);
 
-            for (e = 0; e < 5; e++)
-              fprintf(ofile,"%.*s\n",rlen,entry[e]);
+            for (j = 0; j+WIDTH < len; j += WIDTH)
+              fprintf(ofile,"%.*s\n",WIDTH,read+j);
+            if (j < len)
+              fprintf(ofile,"%s\n",read+j);
           }
 
         first = last;
@@ -173,7 +171,7 @@ int main(int argc, char *argv[])
 
     if (f > 0)
       { if (ofile == stdout)
-          { fprintf(stderr," %d quivas\n",first-ofirst);
+          { fprintf(stderr," %d reads\n",first-ofirst);
             fflush(stderr);
           }
         else
@@ -181,7 +179,6 @@ int main(int argc, char *argv[])
       }
   }
 
-  fclose(quiva);
   fclose(dbfile);
   Close_DB(db);
 
diff --git a/DB2quiva.c b/DB2quiva.c
index a7b93da..45c5cff 100644
--- a/DB2quiva.c
+++ b/DB2quiva.c
@@ -67,6 +67,10 @@ int main(int argc, char *argv[])
       { fprintf(stderr,"%s: Cannot be called on a block: 
%s\n",Prog_Name,argv[1]);
         exit (1);
       }
+    if (db->reads[0].coff < 0 || (db->allarr & DB_ARROW) != 0)
+      { fprintf(stderr,"%s: There is no Quiver information in the DB: 
%s\n",Prog_Name,argv[1]);
+        exit (1);
+      }
 
     pwd    = PathTo(argv[1]);
     root   = Root(argv[1],".db");
diff --git a/DBdump.c b/DBdump.c
index 735e726..6227251 100644
--- a/DBdump.c
+++ b/DBdump.c
@@ -21,8 +21,8 @@
 #endif
 
 static char *Usage[] =
-    { "[-rhsiqp] [-uU] [-m<track>]+",
-      "          <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
+    { "[-rhsaiqp] [-uU] [-m<track>]+",
+      "           <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
     };
 
 #define LAST_READ_SYMBOL   '$'
@@ -93,6 +93,7 @@ static int prof_map[41] =
 
 int main(int argc, char *argv[])
 { HITS_DB    _db, *db = &_db;
+  int         Quiva_DB, Arrow_DB;
   FILE       *hdrs = NULL;
   int64      *qv_idx = NULL;
   uint8      *qv_val = NULL;
@@ -110,7 +111,7 @@ int main(int argc, char *argv[])
   FILE          *input = NULL;
 
   int         TRIM, UPPER;
-  int         DORED, DOSEQ, DOQVS, DOHDR, DOIQV, DOPRF, DAM;
+  int         DORED, DOSEQ, DOARW, DOQVS, DOHDR, DOIQV, DOPRF, DAM;
 
   int          MMAX, MTOP;
   char       **MASK;
@@ -134,7 +135,7 @@ int main(int argc, char *argv[])
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("hpqrsiuU")
+            ARG_FLAGS("hpqrsaiuU")
             break;
           case 'm':
             if (MTOP >= MMAX)
@@ -156,6 +157,7 @@ int main(int argc, char *argv[])
     DOQVS = flags['q'];
     DORED = flags['r'];
     DOSEQ = flags['s'];
+    DOARW = flags['a'];
     DOHDR = flags['h'];
     DOIQV = flags['i'];
     DOPRF = flags['p'];
@@ -195,13 +197,32 @@ int main(int argc, char *argv[])
           exit (1);
         DAM = 1;
         if (DOQVS)
-          { fprintf(stderr,"%s: -Q and -q options not compatible with a .dam 
DB\n",Prog_Name);
+          { fprintf(stderr,"%s: -q option is not compatible with a .dam 
DB\n",Prog_Name);
+            exit (1);
+          }
+        if (DOARW)
+          { fprintf(stderr,"%s: -a option is not compatible with a .dam 
DB\n",Prog_Name);
             exit (1);
           }
 
         free(root);
         free(pwd);
       }
+
+    Arrow_DB = ((db->allarr & DB_ARROW) != 0);
+    Quiva_DB = (db->reads[0].coff >= 0 && (db->allarr & DB_ARROW) == 0);
+    if (DOARW)
+      { if (!Arrow_DB)
+          { fprintf(stderr,"%s: -a option set but no Arrow data in 
DB\n",Prog_Name);
+            exit (1);
+          }
+      }
+    if (DOQVS)
+      { if (!Quiva_DB)
+          { fprintf(stderr,"%s: -q option set but no Quiver data in 
DB\n",Prog_Name);
+            exit (1);
+          }
+      }
   }
 
   //  Load QVs if requested
@@ -294,7 +315,7 @@ int main(int argc, char *argv[])
 
           reads  = db->reads - db->ufirst;
           cutoff = db->cutoff;
-          if (db->all)
+          if ((db->allarr & DB_ALL) != 0)
             allflag = 0;
           else
             allflag = DB_BEST;
@@ -566,7 +587,7 @@ int main(int argc, char *argv[])
                 lst = len;
               }
 
-            if (DOSEQ | DOQVS)
+            if (DOSEQ | DOQVS | DOARW)
               { int ten = lst-fst;
                 if (ten > seqmax)
                   seqmax = ten;
@@ -597,7 +618,7 @@ int main(int argc, char *argv[])
       { printf("+ T%d %lld\n",m,trktot[m]);
         printf("@ T%d %lld\n",m,trkmax[m]);
       }
-    if (DOSEQ | DOQVS)
+    if (DOSEQ | DOQVS | DOARW)
       { printf("+ S %lld\n",seqtot);
         printf("@ S %lld\n",seqmax);
       }
@@ -615,7 +636,7 @@ int main(int argc, char *argv[])
   //    range pairs in pts[0..reps) and according to the display options.
 
   { HITS_READ  *reads;
-    char       *read, **entry;
+    char       *read, *arrow, **entry;
     int         c, b, e, i, m;
     int         substr;
     int         map;
@@ -626,6 +647,10 @@ int main(int argc, char *argv[])
       entry = New_QV_Buffer(db);
     else
       entry = NULL;
+    if (DOARW)
+      arrow = New_Read_Buffer(db);
+    else
+      arrow = NULL;
 
     map    = 0;
     reads  = db->reads;
@@ -685,8 +710,19 @@ int main(int argc, char *argv[])
                       map += 1;
                     printf("H %ld %s\n",strlen(flist[map]),flist[map]);
                     printf("L %d %d %d\n",r->origin,r->fpulse,r->fpulse+len);
-                    if (qv > 0)
-                      printf("Q: %d\n",qv);
+                    if (Quiva_DB && qv > 0)
+                      printf("Q %d\n",qv);
+                    else if (Arrow_DB)
+                      { int   j, snr[4];
+                        int64 big;
+
+                        big   = *((uint64 *) &(r->coff));
+                        for (j = 0; j < 4; j++)
+                          { snr[3-j] = (big & 0xffff);
+                            big    >>= 16;
+                          }
+                        printf("N %d %d %d %d\n",snr[0],snr[1],snr[2],snr[3]);
+                      }
                   }
               }
 
@@ -694,6 +730,8 @@ int main(int argc, char *argv[])
               Load_QVentry(db,i,entry,UPPER);
             if (DOSEQ)
               Load_Read(db,i,read,UPPER);
+            if (DOARW)
+              Load_Arrow(db,i,arrow,1);
 
             for (m = 0; m < MTOP; m++)
               { int64 *anno;
@@ -727,6 +765,11 @@ int main(int argc, char *argv[])
                 printf("%.*s\n",lst-fst,read+fst);
               }
 
+            if (DOARW)
+              { printf("A %d ",lst-fst);
+                printf("%.*s\n",lst-fst,arrow+fst);
+              }
+
             if (DOIQV)
               { int64 k, e;
 
diff --git a/DBshow.c b/DBshow.c
index d4346a5..c376ed4 100644
--- a/DBshow.c
+++ b/DBshow.c
@@ -26,8 +26,8 @@
 #endif
 
 static char *Usage[] =
-    { "[-unqUQ] [-w<int(80)>] [-m<track>]+",
-      "         <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
+    { "[-unqaUQA] [-w<int(80)>] [-m<track>]+",
+      "           <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
     };
 
 #define LAST_READ_SYMBOL   '$'
@@ -93,7 +93,7 @@ int main(int argc, char *argv[])
   FILE          *input;
 
   int         TRIM, UPPER;
-  int         DOSEQ, DOQVS, QUIVA, DAM;
+  int         DOSEQ, DOQVS, DOARR, QUIVA, ARROW, DAM;
   int         WIDTH;
 
   int         MMAX, MTOP;
@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("unqUQ")
+            ARG_FLAGS("unqaUQA")
             break;
           case 'w':
             ARG_NON_NEGATIVE(WIDTH,"Line width")
@@ -142,15 +142,26 @@ int main(int argc, char *argv[])
     TRIM  = 1-flags['u'];
     UPPER = 1+flags['U'];
     DOQVS = flags['q'];
+    DOARR = flags['a'];
     DOSEQ = 1-flags['n'];
     QUIVA = flags['Q'];
-    if (QUIVA && (!DOSEQ || MTOP > 0))
-      { fprintf(stderr,"%s: -Q (quiva) format request inconsistent with -n and 
-m options\n",
+    ARROW = flags['A'];
+    if ((QUIVA || DOQVS) && (ARROW || DOARR))
+      { fprintf(stderr,"%s: Cannot request both Quiver (-Q,-q) and Arrow 
(-A,a) information\n",
                        Prog_Name);
         exit (1);
       }
+
     if (QUIVA)
-      DOQVS = 1;
+      { DOQVS = 1;
+        DOSEQ = 0;
+        MTOP  = 0;
+      }
+    if (ARROW)
+      { DOARR = 1;
+        DOSEQ = 0;
+        MTOP  = 0;
+      }
 
     if (argc <= 1)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
@@ -177,14 +188,27 @@ int main(int argc, char *argv[])
         if (hdrs == NULL)
           exit (1);
         DAM = 1;
-        if (QUIVA || DOQVS)
-          { fprintf(stderr,"%s: -Q and -q options not compatible with a .dam 
DB\n",Prog_Name);
+        if (DOQVS || DOARR)
+          { fprintf(stderr,"%s: -q, a, Q and A options not compatible with a 
.dam DB\n",Prog_Name);
             exit (1);
           }
 
         free(root);
         free(pwd);
       }
+
+    if (DOQVS)
+      { if (db->reads[0].coff < 0 || (db->allarr & DB_ARROW) != 0)
+          { fprintf(stderr,"%s: -q or Q option but no Quiver data in 
DB!\n",Prog_Name);
+            exit (1);
+          }
+      }
+    if (DOARR)
+      { if ((db->allarr & DB_ARROW) == 0)
+          { fprintf(stderr,"%s: -a or A option but no Arrow data in 
DB!\n",Prog_Name);
+            exit (1);
+          }
+      }
   }
 
   //  Load QVs if requested
@@ -263,11 +287,11 @@ int main(int argc, char *argv[])
 
           reads  = db->reads - db->ufirst;
           cutoff = db->cutoff;
-          if (db->all)
+          if ((db->allarr & DB_ALL) != 0)
             allflag = 0;
           else
             allflag = DB_BEST;
-          
+ 
           nid = 0;
           oid = db->ufirst;
           lid = oid + db->nreads;
@@ -394,7 +418,7 @@ int main(int argc, char *argv[])
 
   { HITS_READ  *reads;
     HITS_TRACK *first;
-    char       *read, **entry;
+    char       *read, *arrow, **entry;
     int         c, b, e, i;
     int         hilight, substr;
     int         map;
@@ -409,6 +433,8 @@ int main(int argc, char *argv[])
       { entry = NULL;
         first = db->tracks;
       }
+    if (DOARR)
+      arrow = New_Read_Buffer(db);
 
     if (UPPER == 1)
       { hilight = 'A'-'a';
@@ -446,21 +472,40 @@ int main(int argc, char *argv[])
           { int         len;
             int         fst, lst;
             int         flags, qv;
+            float       snr[4];
             HITS_READ  *r;
             HITS_TRACK *track;
 
             r   = reads + i;
             len = r->rlen;
+            if (substr)
+              { fst = iter->beg;
+                lst = iter->end;
+              }
+            else
+              { fst = 0;
+                lst = len;
+              }
 
             flags = r->flags;
             qv    = (flags & DB_QV);
+            if (DOARR)
+              { uint64 big;
+                int    j;
+
+                big   = *((uint64 *) &(r->coff));
+                for (j = 0; j < 4; j++)
+                  { snr[3-j] = (big & 0xffff) / 100.;
+                    big    >>= 16;
+                  }
+              }
             if (DAM)
               { char header[MAX_NAME];
 
                 fseeko(hdrs,r->coff,SEEK_SET);
                 fgets(header,MAX_NAME,hdrs);
                 header[strlen(header)-1] = '\0';
-                printf("%s :: Contig 
%d[%d,%d]",header,r->origin,r->fpulse,r->fpulse+len);
+                printf("%s :: Contig 
%d[%d,%d]",header,r->origin,r->fpulse+fst,r->fpulse+lst);
               }
             else
               { while (i < findx[map-1])
@@ -468,11 +513,15 @@ int main(int argc, char *argv[])
                 while (i >= findx[map])
                   map += 1;
                 if (QUIVA)
-                  
printf("@%s/%d/%d_%d",flist[map],r->origin,r->fpulse,r->fpulse+len);
+                  
printf("@%s/%d/%d_%d",flist[map],r->origin,r->fpulse+fst,r->fpulse+lst);
+                else if (ARROW)
+                  printf(">%s",flist[map]);
                 else
-                  
printf(">%s/%d/%d_%d",flist[map],r->origin,r->fpulse,r->fpulse+len);
+                  
printf(">%s/%d/%d_%d",flist[map],r->origin,r->fpulse+fst,r->fpulse+lst);
                 if (qv > 0)
                   printf(" RQ=0.%3d",qv);
+                if (DOARR)
+                  printf(" 
SN=%.2f,%.2f,%.2f,%.2f",snr[0],snr[1],snr[2],snr[3]);
               }
             printf("\n");
 
@@ -480,6 +529,8 @@ int main(int argc, char *argv[])
               Load_QVentry(db,i,entry,UPPER);
             if (DOSEQ)
               Load_Read(db,i,read,UPPER);
+            if (DOARR)
+              Load_Arrow(db,i,arrow,1);
 
             for (track = first; track != NULL; track = track->next)
               { int64 *anno;
@@ -508,21 +559,20 @@ int main(int argc, char *argv[])
                   }
               }
 
-            if (substr)
-              { fst = iter->beg;
-                lst = iter->end;
-              }
-            else
-              { fst = 0;
-                lst = len;
-              }
-
             if (QUIVA)
               { int k;
 
                 for (k = 0; k < 5; k++)
                   printf("%.*s\n",lst-fst,entry[k]+fst);
               }
+            else if (ARROW)
+              { int k;
+    
+                for (k = fst; k+WIDTH < lst; k += WIDTH)
+                  printf("%.*s\n",WIDTH,arrow+k);
+                if (k < lst)
+                  printf("%.*s\n",lst-k,arrow+k);
+              }
             else
               { if (DOQVS)
                   { int j, k;
@@ -543,6 +593,21 @@ int main(int argc, char *argv[])
                         printf("\n");
                       }
                   }
+                else if (DOARR)
+                  { int j;
+
+                    printf("\n");
+                    for (j = fst; j+WIDTH < lst; j += WIDTH)
+                      { if (DOSEQ)
+                          printf("%.*s\n",WIDTH,read+j);
+                        printf("%.*s\n\n",WIDTH,arrow+j);
+                      }
+                    if (j < lst)
+                      { if (DOSEQ)
+                          printf("%.*s\n",lst-j,read+j);
+                        printf("%.*s\n\n",lst-j,arrow+j);
+                      }
+                  }
                 else if (DOSEQ)
                   { int j;
     
diff --git a/DBsplit.c b/DBsplit.c
index 6a218ec..9ef8b77 100644
--- a/DBsplit.c
+++ b/DBsplit.c
@@ -202,7 +202,8 @@ int main(int argc, char *argv[])
     fprintf(dbfile,DB_NBLOCK,nblock);
 
     dbs.cutoff = CUTOFF;
-    dbs.all    = ALL;
+    if (ALL)
+      dbs.allarr |= DB_ALL;
     dbs.treads = treads;
     rewind(ixfile);
     fwrite(&dbs,sizeof(HITS_DB),1,ixfile);
diff --git a/DBstats.c b/DBstats.c
index b0e46c2..4a3badf 100644
--- a/DBstats.c
+++ b/DBstats.c
@@ -152,7 +152,7 @@ int main(int argc, char *argv[])
 
     if (dam)
       printf("\nStatistics for all contigs");
-    else if (db->all || !TRIM)
+    else if ((db->allarr & DB_ALL) != 0 || !TRIM)
       printf("\nStatistics for all wells");
     else
       printf("\nStatistics for all reads");
diff --git a/DBwipe.c b/DBwipe.c
new file mode 100644
index 0000000..9730062
--- /dev/null
+++ b/DBwipe.c
@@ -0,0 +1,89 @@
+/*******************************************************************************************
+ *
+ *  Split a .db into a set of sub-database blocks for use by the Dazzler:
+ *     Divide the database <path>.db conceptually into a series of blocks 
referable to on the
+ *     command line as <path>.1.db, <path>.2.db, ...  If the -x option is set 
then all reads
+ *     less than the given length are ignored, and if the -a option is not set 
then secondary
+ *     reads from a given well are also ignored.  The remaining reads are 
split amongst the
+ *     blocks so that each block is of size -s * 1Mbp except for the last 
which necessarily
+ *     contains a smaller residual.  The default value for -s is 400Mbp 
because blocks of this
+ *     size can be compared by our "overlapper" dalign in roughly 16Gb of 
memory.  The blocks
+ *     are very space efficient in that their sub-index of the master .idx is 
computed on the
+ *     fly when loaded, and the .bps file of base pairs is shared with the 
master DB.  Any
+ *     tracks associated with the DB are also computed on the fly when loading 
a database block.
+ *
+ *  Author:  Gene Myers
+ *  Date  :  September 2013
+ *  Mod   :  New splitting definition to support incrementality, and new stub 
file format
+ *  Date  :  April 2014
+ *
+ 
********************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "DB.h"
+
+#ifdef HIDE_FILES
+#define PATHSEP "/."
+#else
+#define PATHSEP "/"
+#endif
+
+static char *Usage = "<path:db>";
+
+int main(int argc, char *argv[])
+{ HITS_DB    db;
+  int        status;
+
+  Prog_Name = Strdup("DBwipe","Allocating Program Name");
+
+  if (argc != 2)
+    { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+      exit (1);
+    }
+
+  //  Open db
+
+  status = Open_DB(argv[1],&db);
+  if (status < 0)
+    exit (1);
+  if (db.part > 0)
+    { fprintf(stderr,"%s: Cannot be called on a block: 
%s\n",Prog_Name,argv[1]);
+      exit (1);
+    }
+  if (status)
+    { fprintf(stderr,"%s: Cannot be called on a .dam: %s\n",Prog_Name,argv[1]);
+      exit (1);
+    }
+
+  { char    *pwd, *root;
+    FILE    *index;
+    int      i;
+
+    pwd    = PathTo(argv[1]);
+    root   = Root(argv[1],".db");
+    unlink(Catenate(pwd,PATHSEP,root,".arw"));
+    unlink(Catenate(pwd,PATHSEP,root,".qvs"));
+
+    for (i = 0; i < db.nreads; i++)
+      db.reads[i].coff = -1;
+    db.allarr &= ~DB_ARROW;
+
+    if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"w")) == NULL)
+      { fprintf(stderr,"%s: Cannot open 
%s%s%s.idx\n",Prog_Name,pwd,PATHSEP,root);
+        exit (1);
+      }
+
+    fwrite(&db,sizeof(HITS_DB),1,index);
+    fwrite(db.reads,sizeof(HITS_READ),db.nreads,index);
+
+    fclose(index);
+  }
+
+  Close_DB(&db);
+
+  exit (0);
+}
diff --git a/Makefile b/Makefile
index cf0afb8..873296f 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ DEST_DIR = ~/bin
 CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
 
 ALL = fasta2DB DB2fasta quiva2DB DB2quiva DBsplit DBdust Catrack DBshow 
DBstats DBrm simulator \
-      fasta2DAM DAM2fasta DBdump rangen
+      fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe
 
 all: $(ALL)
 
@@ -19,6 +19,12 @@ quiva2DB: quiva2DB.c DB.c DB.h QV.c QV.h
 DB2quiva: DB2quiva.c DB.c DB.h QV.c QV.h
        gcc $(CFLAGS) -o DB2quiva DB2quiva.c DB.c QV.c -lm
 
+DB2arrow: DB2arrow.c DB.c QV.c DB.h QV.h
+       gcc $(CFLAGS) -o DB2arrow DB2arrow.c DB.c QV.c -lz
+
+arrow2DB: arrow2DB.c DB.c QV.c DB.h QV.h
+       gcc $(CFLAGS) -o arrow2DB arrow2DB.c DB.c QV.c -lz
+
 DBsplit: DBsplit.c DB.c DB.h QV.c QV.h
        gcc $(CFLAGS) -o DBsplit DBsplit.c DB.c QV.c -lm
 
@@ -52,6 +58,10 @@ fasta2DAM: fasta2DAM.c DB.c DB.h QV.c QV.h
 DAM2fasta: DAM2fasta.c DB.c DB.h QV.c QV.h
        gcc $(CFLAGS) -o DAM2fasta DAM2fasta.c DB.c QV.c -lm
 
+DBwipe: DBwipe.c DB.c DB.h QV.c QV.h
+       gcc $(CFLAGS) -o DBwipe DBwipe.c DB.c QV.c -lm
+
+
 clean:
        rm -f $(ALL)
        rm -fr *.dSYM
diff --git a/QV.c b/QV.c
index cdb6a63..d7d7263 100644
--- a/QV.c
+++ b/QV.c
@@ -863,6 +863,62 @@ static int      delChar, subChar;
 
   // Referred by:  QVcoding_Scan, Create_QVcoding
 
+void QVcoding_Scan1(int rlen, char *delQV, char *delTag, char *insQV, char 
*mergeQV, char *subQV)
+{
+  if (rlen == 0)   //  Initialization call
+    { int   i;
+
+      //  Zero histograms
+
+      bzero(delHist,sizeof(uint64)*256);
+      bzero(mrgHist,sizeof(uint64)*256);
+      bzero(insHist,sizeof(uint64)*256);
+      bzero(subHist,sizeof(uint64)*256);
+
+      for (i = 0; i < 256; i++)
+        delRun[i] = subRun[i] = 1;
+
+      totChar    = 0;
+      delChar    = -1;
+      subChar    = -1;
+      return;
+    }
+
+  //  Add streams to accumulating histograms and figure out the run chars
+  //    for the deletion and substition streams
+
+  Histogram_Seqs(delHist,(uint8 *) delQV,rlen);
+  Histogram_Seqs(insHist,(uint8 *) insQV,rlen);
+  Histogram_Seqs(mrgHist,(uint8 *) mergeQV,rlen);
+  Histogram_Seqs(subHist,(uint8 *) subQV,rlen);
+
+  if (delChar < 0)
+    { int   k;
+
+      for (k = 0; k < rlen; k++)
+        if (delTag[k] == 'n' || delTag[k] == 'N')
+          { delChar = delQV[k];
+            break;
+          }
+    }
+  if (delChar >= 0)
+    Histogram_Runs( delRun,(uint8 *) delQV,rlen,delChar);
+  totChar += rlen;
+  if (subChar < 0)
+    { if (totChar >= 100000)
+        { int k;
+
+          subChar = 0;
+          for (k = 1; k < 256; k++)
+            if (subHist[k] > subHist[subChar])
+              subChar = k;
+        }
+    }
+  if (subChar >= 0)
+    Histogram_Runs( subRun,(uint8 *) subQV,rlen,subChar);
+  return;
+}
+
 int QVcoding_Scan(FILE *input, int num, FILE *temp)
 { char *slash;
   int   rlen;
@@ -1284,6 +1340,44 @@ void Free_QVcoding(QVcoding *coding)
  *
  
********************************************************************************************/
 
+void Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char 
*mrg, char *sub,
+                            FILE *output, QVcoding *coding, int lossy)
+{ int clen;
+
+  if (coding->delChar < 0)
+    { Encode(coding->delScheme, output, (uint8 *) del, rlen);
+      clen = rlen;
+    }
+  else
+    { Encode_Run(coding->delScheme, coding->dRunScheme, output,
+                 (uint8 *) del, rlen, coding->delChar);
+      clen = Pack_Tag(tag,del,rlen,coding->delChar);
+    }
+  Number_Read(tag);
+  Compress_Read(clen,tag);
+  fwrite(tag,1,COMPRESSED_LEN(clen),output);
+
+  if (lossy)
+    { uint8 *insert = (uint8 *) ins;
+      uint8 *merge  = (uint8 *) mrg;
+      int    k;
+
+      for (k = 0; k < rlen; k++)
+        { insert[k] = (uint8) ((insert[k] >> 1) << 1);
+          merge[k]  = (uint8) (( merge[k] >> 2) << 2);
+        }
+    }
+
+  Encode(coding->insScheme, output, (uint8 *) ins, rlen);
+  Encode(coding->mrgScheme, output, (uint8 *) mrg, rlen);
+  if (coding->subChar < 0)
+    Encode(coding->subScheme, output, (uint8 *) sub, rlen);
+  else
+    Encode_Run(coding->subScheme, coding->sRunScheme, output,
+               (uint8 *) sub, rlen, coding->subChar);
+  return;
+}
+
 int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int 
lossy)
 { int rlen, clen;
 
diff --git a/QV.h b/QV.h
index 532b2f4..e5c9485 100644
--- a/QV.h
+++ b/QV.h
@@ -58,6 +58,7 @@ int       Get_QV_Line();
   //   If there is an error then -1 is returned, otherwise the number of 
entries read.
 
 int       QVcoding_Scan(FILE *input, int num, FILE *temp);
+void      QVcoding_Scan1(int rlen, char *del, char *tag, char *ins, char *mrg, 
char *sub);
 
   // Given QVcoding_Scan has been called at least once, create an encoding 
scheme based on
   //   the accumulated statistics and return a pointer to it.  The returned 
encoding object
@@ -83,6 +84,8 @@ void      Free_QVcoding(QVcoding *coding);
   //    occurred, and the sequence length otherwise.
 
 int      Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, 
int lossy);
+void     Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, 
char *mrg, char *sub,
+                                FILE *output, QVcoding *coding, int lossy);
 
   //  Assuming the input is position just beyond the compressed encoding of an 
entry header,
   //    read the set of compressed encodings for the ensuing 5 QV vectors, 
decompress them,
diff --git a/README.md b/README.md
index 515de32..ba37408 100644
--- a/README.md
+++ b/README.md
@@ -22,12 +22,15 @@ to the data base over time.
 is accomplished with the concept of *tracks* that implementors can add as they
 need them.
 
-4. The data is held in a compressed form equivalent to the .dexta and .dexqv 
files of
-the data extraction module.  Both the .fasta and .quiva information for each
-read is held in the data base and can be recreated from it.  The .quiva
-information can be added separately and later on if desired.
+4. The data is held in a compressed form equivalent to the .dexta and 
.dexqv/.dexar
+files of the data extraction module.
 
-5. To facilitate job parallel, cluster operation of the phases of our 
assembler, the
+5. Quiver or Arrow information can be added separately from the sequence 
information
+and later on if desired, but a database can only hold either Quiver or Arrow 
information,
+but not both.  The Arrow or Quiver information can be removed from the 
database at any
+time leaving a database just containing sequence information.
+
+6. To facilitate job parallel, cluster operation of the phases of our 
assembler, the
 data base has a concept of a *current partitioning* in which all the reads that
 are over a given length and optionally unique to a well, are divided up into
 *blocks* containing roughly a given number of bases, except possibly the last
@@ -37,12 +40,18 @@ all the same size.  One must be careful about changing the 
partition during an
 assembly as doing so can void the structural validity of any interim
 block-based results.
 
+A DB con contain the information needed by Quiver, or by Arrow, or neither, but
+not both.  A DB containing neither Quiver or Arrow information is termed a
+Sequence-DB (S-DB).  A DB with Quiver information is a Quiver-DB (Q-DB) and
+a DB with Arrow information is an Arrow-DB (A-DB). All commands are aware of
+the state of a DB and respond to options according to their type. 
+
 A Dazzler DB consists of one named, *visible* file, e.g. FOO.db, and several
 *invisible* secondary files encoding various elements of the DB.  The 
secondary files
 are "invisible" to the UNIX OS in the sense that they begin with a "." and 
hence are
 not listed by "ls" unless one specifies the -a flag.  We chose to do this so 
that when
 a user lists the contents of a directory they just see a single name, e.g. 
FOO.db, that
-is the one used to refer to the DB in commands.  The files associated with a 
database
+is used to refer to the DB in commands.  The files associated with a database
 named, say FOO,  are as follows:
 
 * "FOO.db": a text file containing
@@ -60,7 +69,11 @@ named, say FOO,  are as follows:
 
 * ".FOO.qvs": a binary compressed "store" of the 5 Pacbio quality value 
streams for
   the reads.  Its size is roughly 5/3M bytes depending on the
-  compression acheived.  This file only exists if .quiva files have
+  compression acheived.  This file only exists if Quiver information has
+  been added to the database.
+
+* ".FOO.arw": a binary compressed "store" of the clipped pulse width stream for
+  the reads.  Its size is roughly M/4 bytes.  This file only exists if Arrow 
information has
   been added to the database.
 
 * ".FOO.\<track\>.[anno,data]": a *track* containing customized meta-data for 
each read.  For
@@ -83,7 +96,7 @@ been introduced in order to facilitate the mapping of reads 
to an assembly and h
 been given the suffix .dam to distinguish it from an ordinary DB.  It is 
structurally
 identical to a .db except:
 
-* there is no concept of quality values, and hence no .FOO.qvs file.
+* there is no concept of quality values, and hence no .FOO.qvs or .FOO.arw 
file.
 
 * every .fasta scaffold (a sequence with runs of N's between contigs 
estimating the
   length of the gap) is broken into a separate contig sequence in the DB and 
the
@@ -134,7 +147,9 @@ are currently as follows:
 Builds an initial data base, or adds to an existing database, either (a) the 
list of
 .fasta files following the database name argument, or (b) the list of .fasta 
files in
 \<file\> if the -f option is used, or (c) entries piped from the standard 
input if the
--i option is used.  If a faux file name, \<name\>, follows the -i option then 
all the
+-i option is used.  If the DB is being created it is established as a 
Sequence-DB (S-DB)
+otherwise its type is unchanged.  If a faux file name, \<name\>, follows the 
-i option
+then all the
 input received is considered to have come from a file by the name of 
\<name\>.fasta by
 DB2fasta, otherwise it will be sent to the standard output by DB2fasta.  The 
SMRT cells
 in a given named input (i.e. all sources other than -i without a name) can 
only be
@@ -165,12 +180,13 @@ set to any positive value with the -w option.
 3. quiva2DB [-vl] <path:db> ( -f<file> | -i | <input:quiva> ... )
 ```
 
-Adds .quiva streams to an existing DB "path".  The data comes from (a) the 
given .quiva
+Adds .quiva streams to an existing DB "path".  The DB must either be an S-DB 
or a
+Q-DB and upon completion the DB is a Q-DB.  The data comes from (a) the given 
.quiva
 files on the command line, or (b) those in the file specified by the -f 
option, or
-(c) the standard input if the -i option is given. The input files must be 
added in the
+(c) the standard input if the -i option is given. The input files can be added 
incrementally
+but must be added in the
 same order as the .fasta files were and have the same root names, e.g. 
FOO.fasta and
-FOO.quiva. The files can be added incrementally but must be added in the same 
order as
-their corresponding .fasta files. This is enforced by the program. With the -l 
option
+FOO.quiva.  This is enforced by the program. With the -l option
 set the compression scheme is a bit lossy to get more compression (see the 
description
 of dexqv in the DEXTRACTOR module here).
 
@@ -178,18 +194,44 @@ of dexqv in the DEXTRACTOR module here).
 4. DB2quiva [-vU] <path:db>
 ```
 
-The set of .quiva files within the given DB are recreated from the DB exactly 
as they
+The set of .quiva files within the given Q-DB are recreated from the DB 
exactly as they
 were input.  That is, this is a perfect inversion, including the 
reconstitution of the
 proper .quiva headers.  Because of this property, one can, if desired, delete 
the
 .quiva source files once they are in the DB as they can always be recreated 
from it.
-Entries imported from the standard input will be place in the faux file name 
given on
+Entries imported from the standard input will be placed in the faux file name 
given on
 import, or to the standard output if no name was given. 
 By .fastq convention each QV vector is output as a line without new-lines, and 
by
 default the Deletion Tag entry is in lower case letters.  The -U option 
specifies
 upper case letters should be used instead.
 
 ```
-5. fasta2DAM [-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )
+5. arrow2DB [-v] <path:db> ( -f<file> | -i | <input:arrow> ... )
+```
+
+Adds .arrow streams to an existing DB "path".  The DB must either be an S-DB 
or an
+A-DB and upon completion the DB is an A-DB.  The data comes from (a) the given 
.arrow
+files on the command line, or (b) those in the file specified by the -f 
option, or
+(c) the standard input if the -i option is given. The input files can be added
+incrementally but must be added in the
+same order as the .fasta files were and have the same root names, e.g. 
FOO.fasta and
+FOO.quiva.  This is enforced by the program.
+
+```
+6. DB2arrow [-v] [-w<int(80)>] <path:db>
+```
+
+The set of .arrow files within the given A-DB are recreated from the DB 
exactly as they
+were input.  That is, this is a perfect inversion, including the 
reconstitution of the
+proper .arrow headers.  Because of this property, one can, if desired, delete 
the
+.arrow source files once they are in the DB as they can always be recreated 
from it.
+Entries imported from the standard input will be placed in the faux file name 
given on
+import, or to the standard output if no name was given. 
+By default the output sequences are formatted 80 chars per line,
+but the characters per line, or line width, can be
+set to any positive value with the -w option.
+
+```
+7. fasta2DAM [-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )
 ```
 
 Builds an initial map DB or DAM, or adds to an existing DAM, either (a) the 
list of
@@ -197,13 +239,13 @@ Builds an initial map DB or DAM, or adds to an existing 
DAM, either (a) the list
 \<file\> if the -f option is used, or (c) entries piped from the standard 
input if the -i
 option is used.  If a faux file name, \<name\>, follows the -i option then all 
the input
 received is considered to have come from a file by the name of \<name\>.fasta 
by
-DB2fasta, otherwise it will be sent to the standard output by DB2fasta.  Any 
.fasta
+DAM2fasta, otherwise it will be sent to the standard output by DAM2fasta.  Any 
.fasta
 entry that has a run of N's in it will be split into separate "contig" entries 
and the
 interval of the contig in the original entry recorded. The header for each 
.fasta entry
 is saved with the contigs created from it.
 
 ```
-6. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
+8. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
 ```
 
 The set of .fasta files for the given map DB or DAM are recreated from the DAM
@@ -217,7 +259,7 @@ should be used, and the characters per line, or line width, 
can be set to any po
 value with the -w option.
 
 ```
-7. DBsplit [-a] [-x<int>] [-s<int(200)>] <path:db|dam>
+9. DBsplit [-af] [-x<int>] [-s<int(200)>] <path:db|dam>
 ```
 
 Divide the database \<path\>.db or \<path\>.dam conceptually into a series of 
blocks
@@ -237,7 +279,7 @@ question has previously bin split, i.e. one is not 
interactively asked if they w
 to proceed.
 
 ```
-8. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
+10. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
 ```
 
 Runs the symmetric DUST algorithm over the reads in the untrimmed DB 
\<path\>.db or
@@ -258,7 +300,7 @@ This permits job parallelism in block-sized chunks, and the 
resulting sequence o
 block tracks can then be merged into a track for the entire untrimmed DB with 
Catrack.
 
 ```
-9. Catrack [-v] <path:db|dam> <track:name>
+11. Catrack [-v] <path:db|dam> <track:name>
 ```
 
 Find all block tracks of the form .\<path\>.#.\<track\>... and merge them into 
a single
@@ -267,8 +309,8 @@ encode the same kind of track data (this is checked), and 
the files must exist f
 block 1, 2, 3, ... up to the last block number.
 
 ```
-10. DBshow [-unqUQ] [-w<int(80)>] [-m<track>]+
-                    <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
+12. DBshow [-unqaUQA] [-w<int(80)>] [-m<track>]+
+                      <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
 ```
 
 Displays the requested reads in the database \<path\>.db or \<path\>.dam.  By 
default the
@@ -285,10 +327,14 @@ represents the index of the last read in the actively 
loaded db.  For example,
 example, 1-$ displays every read in the active db (the default).
 
 By default a .fasta file of the read sequences is displayed.  If the -q option 
is
-set, then the QV streams are also displayed in a non-standard modification of 
the
-fasta format.  If the -n option is set then the DNA sequence is *not* 
displayed.
-If the -Q option is set then a .quiva file is displayed  and in this case the 
-n
-and -m options mayt not be set (and the -q and -w options have no effect).
+set and the DB is a Q-DB, then the QV streams are also displayed in a 
non-standard
+modification of the fasta format.
+Similarly, if the -a option is set and the DB is an A-DB, then the pulse width 
stream is
+also displayed in a non-standard format.
+If the -n option is set then the DNA sequence is *not* displayed.
+If the -Q option is set then a .quiva file of the selected reads is displayed 
and
+all other options except -u and -U are ignored.  If the -A option is set then 
a .arrow
+file of the selected reads is displayed and all other options except -u and -w 
are ignored.
 
 If one or more masks are set with the -m option then the track intervals are 
also
 displayed in an additional header line and the bases within an interval are 
displayed
@@ -297,44 +343,63 @@ sequences are in lower case and 80 chars per line.  The 
-U option specifies uppe
 case should be used, and the characters per line, or line width, can be set to 
any
 positive value with the -w option.
 
-The .fasta or .quiva files that are output can be converted into a DB by 
fasta2DB
-and quiva2DB (if the -q and -n options are not set and no -m options are set),
-giving one a simple way to make a DB of a subset of the reads for testing 
purposes.
+The .fasta, .quiva, and .arrow files that are output can be used to build a 
new DB with
+fasta2DB, quiva2D, and arrow2DB, giving one a simple way to make a DB of a 
subset of
+the reads for testing purposes.
 
 ```
-12. DBdump [-rhsiqp] [-uU] [-m<track>]+
-                     <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
+13. DBdump [-rhsaqip] [-uU] [-m<track>]+
+                      <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
 ```
 
 Like DBshow, DBdump allows one to display a subset of the reads in the DB and 
select
 which information to show about them including any mask tracks.  The 
difference is
 that the information is written in a very simple "1-code" ASCII format that 
makes it
-easy for one to read and parse the information for further use.  -r requests 
that each
-read number be displayed (useful if only a subset of reads is requested).  -h 
prints
-the header information which is the source file name, well #, and pulse range.
--s requests the sequence be output, -i requests that the intrinsic quality 
values be
-output, -q requests that the 5 quiva sequences be output, -p requests the 
repeat
-profile be output (if available), and -m\<track\> requests that mask \<track\> 
be output.
+easy for one to read and parse the information for further use.  The option 
flags determine
+which items of information are output as follows:
+
+* -r requests that each read number be displayed in an R-line (see below, 
useful if only a
+subset of reads is requested).
+
+* -h requests the header information be output as the source file name on an 
H-line, the
+well # and pulse range on an L-line, and optionally the quality of the read if 
given on a Q-line.
+
+* -s requests the sequence be output on an S-line.
+
+* -a requests the Arrow information be output as a pulse-width string on an 
A-line and
+the 4 SNR channel values on an N-line,
+
+* -q requests that the 5 Quiver quality streams be output on d-, c-, i-, m-, 
and s-lines.
+
+* -i requests that the intrinsic quality values be output on an I-line.
+
+* -p requests the repeat profile be output (if available) on a P-line, on a 
P-line
+
+* -m\<track\> requests that mask \<track\> be output on a T-line.
+
 Set -u if you want data from the untrimmed database (the default is trimmed) 
and
 set -U if you'd like upper-case letter used in the DNA sequence strings.
 
-The format is very simple.  Each requested piece of information occurs on a 
line.  The
+The format is very simple.  A requested unit of information occurs on a line.  
The
 first character of every line is a "1-code" character that tells you what 
information
-to expect on the line.  The rest of the line contains information where each 
item is
+to expect on the line.  The rest of the line contains the information where 
each item is
 separated by a single blank space.  Strings are output as first an integer 
giving the
 length of the string, a blank space, and then the string terminated by a 
new-line.
 Intrinsic quality values are between 0 and 50, inclusive, and a vector of said 
are
 displayed as an alphabetic string where 'a' is 0, 'b' is '1', ... 'z' is 25, 
'A' is
 26, 'B' is 27, ... and 'Y' is 50.  Repeat profiles are also displayed as 
string where
 '_' denotes 0 repetitions, and then 'a' through 'N' denote the values 1 
through 40,
-respectively.  
+respectively.   The set of all possible lines is as follows:
 
 ```
     R #              - read number
     H # string       - original file name string (header)
     L # # #          - location: well, pulse start, pulse end
+    Q #              - quality of read (#/1000)
+    N # # # #        - SNR of ACGT channels (#/100)
     Tx #n (#b #e)^#n - x'th track on command line, #n intervals all on same 
line
     S # string       - sequence string
+    A # string       - arrow pulse-width string
     I # string       - intrinsic quality vector (as an ASCII string)
     P # string       - repeat profile vector (as an ASCII string)
     d # string       - Quiva deletion values (as an ASCII string)
@@ -352,12 +417,12 @@ the number of reads (X=R), the number of masks (X=M), or 
the total number of
 characters in all headers (X=H), sequences (X=S), intrinsic quality vectors 
(X=I),
 read profile vector (X=P), or track (X=T#).  And '@ X #' gives the maximum 
number of
 characters in any header (X=H), sequence (X=S), intrincic quality vector 
(X=I), read
-profile vector (X=P), or track (X=T#).  The size numbers for the Quiva strings 
are
-identical to that for the sequence as they are all of the same length for any
-given entry.
+profile vector (X=P), or track (X=T#).  The size numbers for the Quiva strings 
and
+Arrow pulse width strings are identical to that for the sequence as they are 
all of
+the same length for any given entry.
 
 ```
-12. DBstats [-nu] [-b<int(1000)] [-m<track>]+ <path:db|dam>
+14. DBstats [-nu] [-b<int(1000)] [-m<track>]+ <path:db|dam>
 ```
 
 Show overview statistics for all the reads in the trimmed data base 
\<path\>.db or
@@ -369,7 +434,7 @@ intervals along the read can be specified with the -m 
option in which case a sum
 and a histogram of the interval lengths is displayed.
 
 ```
-13. DBrm <path:db|dam> ...
+15. DBrm <path:db|dam> ...
 ```
 
 Delete all the files for the given data bases.  Do not use rm to remove a 
database, as
@@ -377,7 +442,15 @@ there are at least two and often several secondary files 
for each DB including t
 files, and all of these are removed by DBrm.
 
 ```
-14.  simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] 
[-e<double(.15)]
+16. DBwipe <path:db|dam> ...
+```
+
+Delete any Arrow or Quiver data from the given databases.  This removes the 
.arw or
+.qvs file and resets information in the .idx file containing information for 
Arrow
+or Quiver.  Basically, converts an A-DB or Q-DB back to a simple S-DB.
+
+```
+17.  simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] 
[-e<double(.15)]
                                   [-c<double(50.)>] [-f<double(.5)>] 
[-x<int(4000)>]
                                   [-w<int(80)>] [-r<int>] [-M<file>]
 ```
@@ -412,7 +485,7 @@ a read is say 's b e' then if b \< e the read is a 
perturbed copy of s[b,e] in t
 forward direction, and a perturbed copy s[e,b] in the reverse direction 
otherwise.
 
 ```
-15. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
+18. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
 ```
 
 Generate a random DNA sequence of length genlen*1Mbp that has an AT-bias of -b.
diff --git a/quiva2DB.c b/arrow2DB.c
similarity index 58%
copy from quiva2DB.c
copy to arrow2DB.c
index 6d099f5..19975aa 100644
--- a/quiva2DB.c
+++ b/arrow2DB.c
@@ -1,8 +1,8 @@
 
/*******************************************************************************************
  *
- *  Adds the given .quiva files to an existing DB "path".  The input files 
must be added in
+ *  Adds the given .arrow files to an existing DB "path".  The input files 
must be added in
  *  the same order as the .fasta files were and have the same root names, e.g. 
FOO.fasta
- *  and FOO.quiva.  The files can be added incrementally but must be added in 
the same order  
+ *  and FOO.arrow.  The files can be added incrementally but must be added in 
the same order  
  *  as the .fasta files.  This is enforced by the program.  With the -l option 
set the
  *  compression scheme is a bit lossy to get more compression (see the 
description of dexqv
  *  in the DEXTRACTOR module).
@@ -31,7 +31,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-vl] <path:string> ( -f<file> | -i | <input:quiva> ... 
)";
+static char *Usage = "[-v] <path:db> ( -f<file> | -i | <input:arrow> ... )";
 
 typedef struct
   { int    argc;
@@ -95,18 +95,14 @@ int main(int argc, char *argv[])
 { FILE      *istub;
   char      *root, *pwd;
 
-  FILE      *quiva, *indx;
-  int64      coff;
+  FILE      *arrow, *indx;
+  int64      boff;
 
   HITS_DB    db;
   HITS_READ *reads;
   int        nfiles;
 
-  FILE      *temp;
-  char      *tname;
-
   int        VERBOSE;
-  int        LOSSY;
   int        PIPE;
   FILE      *INFILE;
 
@@ -115,7 +111,7 @@ int main(int argc, char *argv[])
   { int   i, j, k;
     int   flags[128];
 
-    ARG_INIT("quiva2DB")
+    ARG_INIT("arrow2DB")
 
     INFILE = NULL;
 
@@ -139,7 +135,6 @@ int main(int argc, char *argv[])
     argc = j;
 
     VERBOSE = flags['v'];
-    LOSSY   = flags['l'];
     PIPE    = flags['i'];
 
     if (INFILE != NULL && PIPE)
@@ -154,78 +149,77 @@ int main(int argc, char *argv[])
       }
   }
 
-  //  Open DB stub file, index, and .qvs file for appending.  Load db and read 
records,
-  //    get number of cells from stub file, and note current offset to end of 
.qvs
+  //  Open DB stub file, index, and .arw file for appending.  Load db and read 
records,
+  //    get number of cells from stub file, and note current offset to end of 
.arw
 
   root   = Root(argv[1],".db");
   pwd    = PathTo(argv[1]);
   istub  = Fopen(Catenate(pwd,"/",root,".db"),"r");
   if (istub == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
-      exit (1);
-    }
+    exit (1);
   if (fscanf(istub,DB_NFILE,&nfiles) != 1)
-    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 
   indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
   if (indx == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
-      exit (1);
-    }
+    exit (1);
   if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 
   reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*db.ureads,"Allocating DB 
index");
   if (reads == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
+    exit (1);
+  if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
-  if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+
+  if (reads[0].coff >= 0 && (db.allarr & DB_ARROW) == 0)
+    { fprintf(stderr,"%s: Database %s has Quiver data!\n",Prog_Name,root);
       exit (1);
     }
 
-  quiva = NULL;
-  temp  = NULL;
-  coff  = 0;
+  arrow = NULL;
+  boff  = 0;
 
   if (reads[0].coff < 0)
-    quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"w");
+    arrow = Fopen(Catenate(pwd,PATHSEP,root,".arw"),"w");
   else
-    quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"r+");
+    arrow = Fopen(Catenate(pwd,PATHSEP,root,".arw"),"r+");
 
-  tname = 
Strdup(Catenate(".",PATHSEP,root,Numbered_Suffix("",getpid(),".tmp")),
-                 "Allocating temporary name");
-  temp = Fopen(tname,"w+");
+  if (arrow == NULL)
+    goto error;
+  fseeko(arrow,0,SEEK_END);
+  boff = ftello(arrow);
 
-  if (quiva == NULL || temp == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
-      goto error;
-    }
-  fseeko(quiva,0,SEEK_END);
-  coff = ftello(quiva);
-
-  //  Do a merged traversal of cell lines in .db stub file and .quiva files to 
be
+  //  Do a merged traversal of cell lines in .db stub file and .arrow files to 
be
   //    imported, driving the loop with the cell line #
 
   { FILE          *input = NULL;
     char          *path = NULL;
     char          *core = NULL;
+    char          *read;
+    int            rmax, rlen, eof;
     File_Iterator *ng = NULL;
     char           lname[MAX_NAME];
     int            first, last, cline;
     int            cell;
 
+    //  Buffer for accumulating .arrow sequence over multiple lines
+
+    rmax  = MAX_NAME + 60000;
+    read  = (char *) Malloc(rmax+1,"Allocating line buffer");
+    if (read == NULL)
+      goto error;
+
     if (!PIPE)
       { ng = init_file_iterator(argc,argv,INFILE,2);
         if (ng == NULL)
-          { fprintf(stderr,"%s",Ebuffer);
-            goto error;
-          }
+          goto error;
       }
 
     for (cell = 0; cell < nfiles; cell++)
@@ -233,7 +227,7 @@ int main(int argc, char *argv[])
 
         if (cell == 0)
 
-          //  First addition, a pipe: find the first cell that does not have 
.quiva's yet
+          //  First addition, a pipe: find the first cell that does not have 
.arrow's yet
           //     (error if none) and set input source to stdin.
 
           if (PIPE)
@@ -249,23 +243,23 @@ int main(int argc, char *argv[])
                   cell += 1;
                 }
               if (cell >= nfiles)
-                { fprintf(stderr,"%s: All .quiva's have already been added 
!?\n",Prog_Name);
+                { fprintf(stderr,"%s: All .arrows's have already been added 
!?\n",Prog_Name);
                   goto error;
                 }
 
               input = stdin;
 
               if (VERBOSE)
-                { fprintf(stderr,"Adding quiva's from stdin ...\n");
+                { fprintf(stderr,"Adding arrows's from stdin ...\n");
                   fflush(stderr);
                 }
               cline = 0;
             }
 
-          //  First addition, not a pipe: then get first .quiva file name 
(error if not one) to
+          //  First addition, not a pipe: then get first .arrow file name 
(error if not one) to
           //    add, find the first cell name whose file name matches (error 
if none), check that
-          //    the previous .quiva's have been added and this is the next 
slot.  Then open
-          //    the .quiva file for compression
+          //    the previous .arrow's have been added and this is the next 
slot.  Then open
+          //    the .arrow file for compression
 
           else
             { if (! next_file(ng))
@@ -273,16 +267,12 @@ int main(int argc, char *argv[])
                   goto error;
                 }
               if (ng->name == NULL)
-                { fprintf(stderr,"%s",Ebuffer);
-                  goto error;
-                }
+                goto error;
   
-              core = Root(ng->name,".quiva");
+              core = Root(ng->name,".arrow");
               path = PathTo(ng->name);
-              if ((input = Fopen(Catenate(path,"/",core,".quiva"),"r")) == 
NULL)
-                { fprintf(stderr,"%s",Ebuffer);
-                  goto error;
-                }
+              if ((input = Fopen(Catenate(path,"/",core,".arrow"),"r")) == 
NULL)
+                goto error;
   
               first = 0;
               while (cell < nfiles)
@@ -301,25 +291,25 @@ int main(int argc, char *argv[])
                 }
         
               if (first > 0 && reads[first-1].coff < 0)
-                { fprintf(stderr,"%s: Predecessor of %s.quiva has not been 
added yet\n",
+                { fprintf(stderr,"%s: Predecessor of %s.arrow has not been 
added yet\n",
                                  Prog_Name,core);
                   goto error;
                 }
               if (reads[first].coff >= 0)
-                { fprintf(stderr,"%s: %s.quiva has already been 
added\n",Prog_Name,core);
+                { fprintf(stderr,"%s: %s.arrow has already been 
added\n",Prog_Name,core);
                   goto error;
                 }
   
               if (VERBOSE)
-                { fprintf(stderr,"Adding '%s.quiva' ...\n",core);
+                { fprintf(stderr,"Adding '%s.arrow' ...\n",core);
                   fflush(stderr);
                 }
               cline = 0;
             }
 
         //  Not the first addition: get next cell line.  If not a pipe and the 
file name is new,
-        //    then close the current .quiva, open the next one and after 
ensuring the names
-        //    match, open it for compression
+        //    then close the current .arrow, open the next one and after 
ensuring the names
+        //    match, open it for incorporation
 
         else
           { first = last;  
@@ -335,8 +325,8 @@ int main(int argc, char *argv[])
                 ungetc(c,input);
               }
             else if (strcmp(lname,fname) != 0)
-              { if (fgetc(input) != EOF)
-                  { fprintf(stderr,"%s: Too many reads in %s.quiva while 
handling %s.fasta\n",
+              { if ( ! eof)
+                  { fprintf(stderr,"%s: Too many reads in %s.arrow while 
handling %s.fasta\n",
                                    Prog_Name,core,fname);
                     goto error;
                   }
@@ -348,16 +338,12 @@ int main(int argc, char *argv[])
                 if ( ! next_file(ng))
                   break;
                 if (ng->name == NULL)
-                  { fprintf(stderr,"%s",Ebuffer);
-                    goto error;
-                  }
+                  goto error;
 
                 path = PathTo(ng->name);
-                core = Root(ng->name,".quiva");
-                if ((input = Fopen(Catenate(path,"/",core,".quiva"),"r")) == 
NULL)
-                  { fprintf(stderr,"%s",Ebuffer);
-                    goto error;
-                  }
+                core = Root(ng->name,".arrow");
+                if ((input = Fopen(Catenate(path,"/",core,".arrow"),"r")) == 
NULL)
+                  goto error;
 
                 if (strcmp(core,fname) != 0)
                   { fprintf(stderr,"%s: Files not being added in order (expect 
%s, given %s)\n",
@@ -366,92 +352,128 @@ int main(int argc, char *argv[])
                   }
 
                 if (VERBOSE)
-                  { fprintf(stderr,"Adding '%s.quiva' ...\n",core);
+                  { fprintf(stderr,"Adding '%s.arrow' ...\n",core);
                     fflush(stderr);
                   }
                 cline = 0;
               }
           }
 
-        //  Compress reads [first..last) from open .quiva appending to .qvs 
and record
-        //    offset in .coff field of reads (offset of first in a cell is to 
the compression
-        //    table).
+        //  If first cell or source is a new file, then start IO
 
-        { int64     qpos;
-          QVcoding *coding;
-          int       i, s;
+        if (cline == 0)
+          {
+            // Read in first line and make sure it is a header in PACBIO 
format.
 
-          rewind(temp);
-          if (ftruncate(fileno(temp),0) < 0)
-            { fprintf(stderr,"%s: System error: could not truncate temporary 
file\n",Prog_Name);
-              goto error;
-            }
-          Set_QV_Line(cline);
-          s = QVcoding_Scan(input,last-first,temp);
-          if (s < 0)
-            { fprintf(stderr,"%s",Ebuffer);
-              goto error;
-            }
-          if (s != last-first)
-            { if (PIPE)
-                fprintf(stderr,"%s: Insufficient # of reads on input while 
handling %s.fasta\n",
-                               Prog_Name,fname);
-              else
-                fprintf(stderr,"%s: Insufficient # of reads in %s.quiva while 
handling %s.fasta\n",
-                               Prog_Name,core,fname);
-              goto error;
-            }
-
-          coding = Create_QVcoding(LOSSY);
-          if (coding == NULL)
-            { fprintf(stderr,"%s",Ebuffer);
-              goto error;
-            }
+            rlen = 0;
+            eof  = (fgets(read,MAX_NAME,input) == NULL);
+            if (read[strlen(read)-1] != '\n')
+              { fprintf(stderr,"File %s.arrow, Line 1: Fasta line is too long 
(> %d chars)\n",
+                               core,MAX_NAME-2);
+                goto error;
+              }
+            if (!eof && read[0] != '>')
+              { fprintf(stderr,"File %s.arrow, Line 1: First header in arrow 
file is missing\n",
+                                core);
+                goto error;
+              }
+          }
 
-          coding->prefix = Strdup(".qvs","Allocating header prefix");
-          if (coding->prefix == NULL)
-            { fprintf(stderr,"%s",Ebuffer);
-              goto error;
-            }
+        //  Compress reads [first..last) from open .arrow appending to .arw 
and record
+        //    snr in .coff field of reads (offset is the same as for the DNA 
sequence, .boff)
 
-          qpos = ftello(quiva);
-          Write_QVcoding(quiva,coding);
+        { int i, x;
 
-          //  Then compress and append to the .qvs each compressed QV entry
- 
-          rewind(temp);
-          Set_QV_Line(cline);
           for (i = first; i < last; i++)
-            { s = Read_Lines(temp,1);
-              if (s < -1)
-                { fprintf(stderr,"%s",Ebuffer);
+            { char  *find;
+              int    clen;
+              float  snr[4];
+              uint16 cnr[4];
+
+              if (eof)
+                { if (PIPE)
+                    fprintf(stderr,"%s: Insufficient # of reads on input while 
handling %s.arrow\n",
+                                   Prog_Name,fname);
+                  else
+                    { fprintf(stderr,"%s: Insufficient # of reads in %s.arrow 
while handling",
+                                     Prog_Name,core);
+                      fprintf(stderr," %s.arrow\n",fname);
+                    }
+                  goto error;
+                }
+
+              find = index(read+(rlen+1),' ');
+              if (find == NULL)
+                { fprintf(stderr,"File %s.arrow, Line %d: Pacbio header line 
format error\n",
+                                 core,cline);
                   goto error;
                 }
-              reads[i].coff = qpos;
-              s = Compress_Next_QVentry(temp,quiva,coding,LOSSY);
-              if (s < 0)
-                { fprintf(stderr,"%s",Ebuffer);
+              *find = '\0';
+              if (strcmp(read+(rlen+1),prolog) != 0)
+                { fprintf(stderr,"File %s.arrow, Line %d: Pacbio prolog 
doesn't match DB entry\n",
+                                 core,cline);
                   goto error;
                 }
-              if (s != reads[i].rlen)
-                { fprintf(stderr,"%s: Length of quiva %d is different than 
fasta in DB\n",
-                                 Prog_Name,i+1);
+              *find = ' ';
+              x = sscanf(find+1," SN=%f,%f,%f,%f\n",snr,snr+1,snr+2,snr+3);
+              if (x != 4)
+                { fprintf(stderr,"File %s.arrow, Line %d: Pacbio header line 
format error\n",
+                                 core,cline);
                   goto error;
                 }
-              qpos = ftello(quiva);
-            }
-          cline = Get_QV_Line();
 
-          Free_QVcoding(coding);
+              rlen  = 0;
+              while (1)
+                { eof = (fgets(read+rlen,MAX_NAME,input) == NULL);
+                  cline += 1;
+                  x = strlen(read+rlen)-1;
+                  if (read[rlen+x] != '\n')
+                    { if (read[rlen] == '>')
+                        { fprintf(stderr,"File %s.arrow, Line %d:",core,cline);
+                          fprintf(stderr," Fasta header line is too long (> %d 
chars)\n",
+                                         MAX_NAME-2);
+                          goto error;
+                        }
+                      else
+                        x += 1;
+                    }
+                  if (eof || read[rlen] == '>')
+                    break;
+                  rlen += x;
+                  if (rlen + MAX_NAME > rmax)
+                    { rmax = ((int) (1.2 * rmax)) + 1000 + MAX_NAME;
+                      read = (char *) realloc(read,rmax+1);
+                      if (read == NULL)
+                        { fprintf(stderr,"File %s.arrow, Line %d:",core,cline);
+                          fprintf(stderr," Out of memory (Allocating line 
buffer)\n");
+                          goto error;
+                        }
+                    }
+                }
+              read[rlen] = '\0';
+
+              for (x = 0; x < 4; x++)
+                cnr[x] = (uint32) (snr[x] * 100.);
+
+              *((uint64  *) &(reads[i].coff)) = ((uint64) cnr[0]) << 48 |
+                                                ((uint64) cnr[1]) << 32 |
+                                                ((uint64) cnr[2]) << 16 |
+                                                ((uint64) cnr[3]);
+
+              Number_Arrow(read);
+              Compress_Read(rlen,read);
+              clen = COMPRESSED_LEN(rlen);
+              fwrite(read,1,clen,arrow);
+            }
         }
       }
 
-    if (fgetc(input) != EOF)
+    if (!eof)
       { if (PIPE)
           fprintf(stderr,"%s: Too many reads on input while handling 
%s.fasta\n",
                          Prog_Name,lname);
         else
-          fprintf(stderr,"%s: Too many reads in %s.quiva while handling 
%s.fasta\n",
+          fprintf(stderr,"%s: Too many reads in %s.arrow while handling 
%s.fasta\n",
                          Prog_Name,core,lname);
         goto error;
       }
@@ -461,10 +483,8 @@ int main(int argc, char *argv[])
         free(path);
         if (next_file(ng))
           { if (ng->name == NULL)
-              { fprintf(stderr,"%s",Ebuffer);
-                goto error;
-              }
-            core = Root(ng->name,".quiva");
+              goto error;
+            core = Root(ng->name,".arrow");
             fprintf(stderr,"%s: %s.fasta has never been added to 
DB\n",Prog_Name,core);
             goto error;
           }
@@ -473,35 +493,30 @@ int main(int argc, char *argv[])
 
   //  Write the db record and read index into .idx and clean up
 
+  db.allarr |= DB_ARROW;
   rewind(indx);
   fwrite(&db,sizeof(HITS_DB),1,indx);
   fwrite(reads,sizeof(HITS_READ),db.ureads,indx);
 
   fclose(istub);
   fclose(indx);
-  fclose(quiva);
-  fclose(temp);
-  unlink(tname);
+  fclose(arrow);
 
   exit (0);
 
-  //  Error exit:  Either truncate or remove the .qvs file as appropriate.
+  //  Error exit:  Either truncate or remove the .arw file as appropriate.
 
 error:
-  if (coff != 0)
-    { fseeko(quiva,0,SEEK_SET);
-      if (ftruncate(fileno(quiva),coff) < 0)
-        fprintf(stderr,"%s: Fatal: could not restore %s.qvs after error, 
truncate failed\n",
+  if (boff != 0)
+    { fseeko(arrow,0,SEEK_SET);
+      if (ftruncate(fileno(arrow),boff) < 0)
+        fprintf(stderr,"%s: Fatal: could not restore %s.arw after error, 
truncate failed\n",
                        Prog_Name,root);
     }
-  if (quiva != NULL)
-    { fclose(quiva);
-      if (coff == 0)
-        unlink(Catenate(pwd,PATHSEP,root,".qvs"));
-    }
-  if (temp != NULL)
-    { fclose(temp);
-      unlink(tname);
+  if (arrow != NULL)
+    { fclose(arrow);
+      if (boff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".arw"));
     }
   fclose(istub);
   fclose(indx);
diff --git a/fasta2DAM.c b/fasta2DAM.c
index 15e074a..af6722a 100644
--- a/fasta2DAM.c
+++ b/fasta2DAM.c
@@ -527,6 +527,7 @@ int main(int argc, char *argv[])
         db.totlen = totlen;
         db.maxlen = maxlen;
         db.cutoff = -1;
+        db.allarr = 0;
       }
     else
       { for (c = 0; c < 4; c++)
diff --git a/fasta2DB.c b/fasta2DB.c
index 071bbe0..98298e3 100644
--- a/fasta2DB.c
+++ b/fasta2DB.c
@@ -35,7 +35,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-v] <path:string> ( -f<file> | -i[<name>] | 
<input:fasta> ... )";
+static char *Usage = "[-v] <path:db> ( -f<file> | -i[<name>] | <input:fasta> 
... )";
 
 static char number[128] =
     { 0, 0, 0, 0, 0, 0, 0, 0,
@@ -252,23 +252,21 @@ int main(int argc, char *argv[])
 
         ureads  = 0;
         offset  = 0;
-        boff    = 0;
-        ioff    = 0;
       }
     else
       { if (fscanf(istub,DB_NFILE,&ocells) != 1)
           { fprintf(stderr,"%s: %s.db is corrupted, read 
failed\n",Prog_Name,root);
-            goto error;
+            exit (1);
           }
 
         bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"r+");
         indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
         if (bases == NULL || indx == NULL)
-          goto error;
+          exit (1);
 
         if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
           { fprintf(stderr,"%s: %s.idx is corrupted, read 
failed\n",Prog_Name,root);
-            goto error;
+            exit (1);
           }
         fseeko(bases,0,SEEK_END);
         fseeko(indx, 0,SEEK_END);
@@ -340,8 +338,7 @@ int main(int argc, char *argv[])
       { FILE *input;
         char  prolog[MAX_NAME];
         char *path, *core;
-        int   nline, eof, rlen, pcnt;
-        int   pwell;
+        int   eof;
 
         //  Open it: <path>/<core>.fasta if file, stdin otherwise with core = 
PIPE or "stdout"
 
@@ -367,6 +364,22 @@ int main(int argc, char *argv[])
             input = stdin;
           }
 
+        //  Get the header of the first line.  If the file is empty skip.
+
+        eof   = (fgets(read,MAX_NAME,input) == NULL);
+        if (eof || strlen(read) < 1)
+          { free(core);
+            fclose(input);
+            if (PIPE != NULL)
+              { fprintf(stderr,"Standard input is empty, terminating!\n");
+                break;
+              }
+            else
+              { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
+                continue;
+              }
+          }
+
         //  Check that core is not too long and name is unique or last source 
if PIPE'd
 
         if (strlen(core) >= MAX_NAME)
@@ -388,21 +401,6 @@ int main(int argc, char *argv[])
                 }
         }
 
-        //  Get the header of the first line.  If the file is empty skip.
-
-        pcnt  = 0;
-        rlen  = 0;
-        nline = 1;
-        eof   = (fgets(read,MAX_NAME,input) == NULL);
-        if (eof || strlen(read) < 1)
-          { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
-            if (input == stdin)
-              break;
-            fclose(input);
-            free(core);
-            continue;
-          }
-
         //   Add the file name to flist
 
         if (VERBOSE)
@@ -436,8 +434,7 @@ int main(int argc, char *argv[])
               *find = '/';
             }
           else
-            { fprintf(stderr,"File %s.fasta, Line %d: Pacbio header line 
format error\n",
-                             core,nline);
+            { fprintf(stderr,"File %s.fasta, Line 1: Pacbio header line format 
error\n",core);
               goto error;
             }
         }
@@ -445,7 +442,11 @@ int main(int argc, char *argv[])
         //  Read in all the sequences until end-of-file
 
         { int i, x;
+          int nline, pwell, rlen, pcnt;
 
+          pcnt  = 0;
+          rlen  = 0;
+          nline = 1;
           pwell = -1;
           while (!eof)
             { int   beg, end, clen;
@@ -583,6 +584,7 @@ int main(int argc, char *argv[])
         db.totlen = totlen;
         db.maxlen = maxlen;
         db.cutoff = -1;
+        db.allarr = 0;
       }
     else
       { for (c = 0; c < 4; c++)
diff --git a/quiva2DB.c b/quiva2DB.c
index 6d099f5..55e54d6 100644
--- a/quiva2DB.c
+++ b/quiva2DB.c
@@ -31,7 +31,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-vl] <path:string> ( -f<file> | -i | <input:quiva> ... 
)";
+static char *Usage = "[-vl] <path:db> ( -f<file> | -i | <input:quiva> ... )";
 
 typedef struct
   { int    argc;
@@ -165,7 +165,7 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (fscanf(istub,DB_NFILE,&nfiles) != 1)
-    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 
@@ -175,7 +175,11 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
+      exit (1);
+    }
+  if ((db.allarr & DB_ARROW) != 0)
+    { fprintf(stderr,"%s: Database %s has Arrow data!\n",Prog_Name,root);
       exit (1);
     }
 
@@ -185,7 +189,7 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/dazzdb.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

[med-svn] [dazzdb] 01/05: Imported Upstream version 1.0+20161112

Reply via email to