Changeset: 96a23f462af3 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=96a23f462af3
Added Files:
        monetdb5/modules/mal/mosaic_hdr.c
Modified Files:
        monetdb5/modules/mal/Tests/All
        monetdb5/modules/mal/mosaic.c
        monetdb5/modules/mal/mosaic_none.c
        monetdb5/modules/mal/mosaic_rle.c
Branch: mosaic
Log Message:

Update the flow and administration
Simplify the iterations.


diffs (truncated from 948 to 300 lines):

diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -71,6 +71,7 @@ mosaic_none
 mosaic_rle
 mosaic_mix
 
+mosaic_none_qry
 #HAVE_RAPTOR?rdf
 
 # might show different output if openssl is compiled without full sha2
diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -31,6 +31,8 @@
 
 //#define _DEBUG_MOSAIC_
 
+#define MOSAIC_VERSION 20140808
+
 /* do not invest in compressing BATs smaller than this */
 #define MIN_INPUT_COUNT 1
 
@@ -39,10 +41,11 @@
 #define MOSAIC_NONE     0              // no compression at all
 #define MOSAIC_RLE      1              // use run-length encoding
 #define MOSAIC_FRONT    2              // use front compression for >=4 byte 
fields
-#define MOSAIC_DELTATA    3            // use delta encoding
+#define MOSAIC_DELTA   3               // use delta encoding
 #define MOSAIC_BITMAPS  4              // use limited set of bitmaps
-#define MOSAIC_RANGE    5              // use linear model 
+#define MOSAIC_RANGE    5              // use linear model
 #define MOSAIC_GUASSIAN 6              // use guassian model fitting
+#define MOSAIC_EOL             7               // marker for the last block
 
 #define MOSAIC_BITS 48                 // maximum number of elements to 
compress
 
@@ -53,40 +56,38 @@
  * The header is reserved for meta information, e.g. oid indices.
  * The block header encodes the information needed for the chunk decompressor
  */
+#define MOSAICINDEX 4  //> 2 elements
 typedef struct MOSAICHEADER{
-       int mosaicversion;
-       oid index[1000];
-       lng offset[1000];
-} * MosaicHeader;
+       int version;
+       int top;
+       oid index[MOSAICINDEX];
+       lng offset[MOSAICINDEX];
+} * MosaicHdr;
 
 typedef struct MOSAICBLOCK{
        lng tag:4,              // method applied in chunk
        cnt:MOSAIC_BITS;        // compression specific information
-} *MosaicBlk; 
+} *MosaicBlk;
 
 #define MosaicHdrSize  sizeof(struct MOSAICHEADER)
 #define MosaicBlkSize  sizeof(struct MOSAICBLOCK)
 
-#define wordaligned(X,SZ) \
-       X = ((char*)X) + (SZ) +  ((SZ) % sizeof(int)? sizeof(int) - 
(SZ)%sizeof(int) : 0)
+#define wordaligned(SZ) \
+        ((SZ) +  ((SZ) % sizeof(int)? sizeof(int) - ((SZ)%sizeof(int)) : 0))
 
 
 typedef struct MOSTASK{
        int type;               // one of the permissible types
+       MosaicHdr hdr;  // start of the destination heap
+       MosaicBlk blk;  // current block header
+       char *dst;              // write pointer into current compressed blocks
        BUN     elm;            // elements left to compress
-       char *srcheap;  // start in source heap
-       char *dstheap;  // start of the destination heap
-       char *src, *compressed;// read pointer into source, write pointer into 
destination
-       MosaicBlk hdr;  // current block header
+       char *src;              // read pointer into source
 
-       // The competing compression scheme leave the number of elements and 
compressed size
-       lng elements[MOSAIC_METHODS];   
-       lng xsize[MOSAIC_METHODS];              
-       lng time[MOSAIC_METHODS];               
        // collect compression statistics for the particular task
-       lng timing[MOSAIC_METHODS];
-       lng winners[MOSAIC_METHODS];    
-       int percentage[MOSAIC_METHODS]; // compression size for the last batch 
0..100 percent
+       lng time[MOSAIC_METHODS];
+       lng wins[MOSAIC_METHODS];       
+       int perc[MOSAIC_METHODS]; // compression size for the last batch 0..100 
percent
 } *MOStask;
 
 /* we keep a condensed OID index anchored to the compressed blocks */
@@ -100,57 +101,69 @@ typedef struct MOSINDEX{
 /* Run through a column to produce a compressed version */
 
 /* simple include the details of the hardwired compressors */
+#include "mosaic_hdr.c"
 #include "mosaic_none.c"
 #include "mosaic_rle.c"
 
-#ifdef _DEBUG_MOSAIC_
+static void
+MOSinit(MOStask task, BAT *b){
+       char * base = Tloc(b,BUNfirst(b));
+       task->type = b->ttype;
+       task->hdr = (MosaicHdr) base;
+       base += MosaicHdrSize;
+       task->blk = (MosaicBlk)  base;
+       task->dst = base + MosaicBlkSize;
+}
+
+static void
+MOSclose(MOStask task){
+       if( task->blk->cnt == 0){
+               task->dst -= MosaicBlkSize;
+               return;
+       }
+}
+
 static void
 MOSdumpTask(Client cntxt,MOStask task)
 {
        int i;
        mnstr_printf(cntxt->fdout,"#type %d todo "LLFMT"\n", task->type, 
(lng)task->elm);
-       mnstr_printf(cntxt->fdout,"#winners ");
+       mnstr_printf(cntxt->fdout,"#wins ");
        for(i=0; i< MOSAIC_METHODS; i++)
-               mnstr_printf(cntxt->fdout,LLFMT " ",task->winners[i]);
-       mnstr_printf(cntxt->fdout,"\n#elements ");
-       for(i=0; i< MOSAIC_METHODS; i++)
-               mnstr_printf(cntxt->fdout,LLFMT " ",task->elements[i]);
-       mnstr_printf(cntxt->fdout,"\n#xsize ");
-       for(i=0; i< MOSAIC_METHODS; i++)
-               mnstr_printf(cntxt->fdout,LLFMT " ",task->xsize[i]);
+               mnstr_printf(cntxt->fdout,LLFMT " ",task->wins[i]);
        mnstr_printf(cntxt->fdout,"\n#time ");
        for(i=0; i< MOSAIC_METHODS; i++)
                mnstr_printf(cntxt->fdout, LLFMT" ",task->time[i]);
-       mnstr_printf(cntxt->fdout,"\n#percentage ");
+       mnstr_printf(cntxt->fdout,"\n#perc ");
        for(i=0; i< MOSAIC_METHODS; i++)
-               mnstr_printf(cntxt->fdout, "%d ",task->percentage[i]);
-       mnstr_printf(cntxt->fdout,"\n#timing ");
-       for(i=0; i< MOSAIC_METHODS; i++)
-               mnstr_printf(cntxt->fdout, LLFMT" ",task->timing[i]);
+               mnstr_printf(cntxt->fdout, "%d ",task->perc[i]);
        mnstr_printf(cntxt->fdout,"\n");
 }
-#endif
 
+// dump a compressed BAT
 static void
 MOSdumpInternal(Client cntxt, BAT *b){
        MOStask task=0;
-       // loop thru the chunks
-       MT_lock_set(&mal_profileLock,"mosaicdump");
+
        task= (MOStask) GDKzalloc(sizeof(*task));
-       task->type = b->ttype;
-       task->elm =  b->T->heap.count;
-       task->compressed = task->srcheap = (void*) Tloc(b, BUNfirst(b));
-       task->compressed += MosaicHdrSize;
-       task->hdr = (MosaicBlk) task->compressed;
-       while(task->elm  >0){
-               switch(task->hdr->tag){
-               case MOSAIC_NONE: MOSdump_none(cntxt,task); break;
-               case MOSAIC_RLE: MOSdump_rle(cntxt,task); break;
+       if( task == NULL)
+               return;
+       MOSinit(task,b);
+       while(task->blk){
+               switch(task->blk->tag){
+               case MOSAIC_NONE:
+                       MOSdump_none(cntxt,task);
+                       MOSskip_none(task);
+                       break;
+               case MOSAIC_RLE:
+                       MOSdump_rle(cntxt,task);
+                       MOSskip_rle(task);
+                       break;
                default: assert(0);
                }
        }
-       MT_lock_unset(&mal_profileLock,"mosaicdump");
 }
+
 str
 MOSdump(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {      
@@ -216,26 +229,6 @@ inheritCOL( BAT *bn, COLrec *cn, BAT *b,
  * Multiple compression techniques are applied at the same time.
  */
 
-static void
-MOSinit(MOStask task){
-       if ( task->elm > 0){
-               task->hdr = (MosaicBlk) task->compressed;
-               task->hdr->tag = MOSAIC_NONE;
-               task->hdr->cnt = 0;
-               wordaligned(task->compressed,MosaicBlkSize);
-               //task->compressed += MosaicBlkSize; // beware of byte alignment
-       }
-}
-
-static void
-MOSclose(MOStask task){
-       if( task->hdr->cnt == 0){
-               wordaligned(task->compressed,-MosaicBlkSize);
-               //task->compressed -= MosaicBlkSize; // beware of byte alignment
-               return; 
-       }
-}
-
 str
 MOScompressInternal(Client cntxt, int *ret, int *bid, int threshold)
 {
@@ -283,22 +276,23 @@ MOScompressInternal(Client cntxt, int *r
        }
 
        // actual compression mosaic
-       // actual compression mosaic
        task= (MOStask) GDKzalloc(sizeof(*task));
        if( task == NULL){
                BBPreleaseref(b->batCacheid);
                BBPreleaseref(bn->batCacheid);
                throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL);
        }
+       // initialize the non-compressed read pointer
+       task->src = Tloc(b, BUNfirst(b));
+       task->elm = BATcount(b);
 
-       task->type = b->ttype;
-       task->elm = BATcount(b);
-       task->src = task->srcheap = (void*) Tloc(b, BUNfirst(b));
-       task->compressed = task->dstheap = (void*) Tloc(bn,BUNfirst(bn));
-       
-       // initialize the non-compressed block descriptor
-       task->compressed += MosaicHdrSize;
-       MOSinit(task);
+       // prepare a compressed heap
+       MOSinit(task,bn);
+       MOSinitHeader(task);
+
+       // always start with an EOL block
+       task->blk->tag = MOSAIC_EOL;
+       task->blk->cnt = 0;
 
        while(task->elm > 0){
                // default is to extend the non-compressed block
@@ -313,24 +307,42 @@ MOScompressInternal(Client cntxt, int *r
                        chunksize = ch;
                }
 
-               // apply the compression and update the elements left to do
+               // apply the compression to a chunk
                switch(cand){
-               case MOSAIC_RLE: 
-                       MOSclose(task);
-                       MOSinit(task);
-                       MOScompress_rle(cntxt,task); 
-                       MOSinit(task); // prepare for none-compression
+               case MOSAIC_RLE:
+                       // close the non-compressed part
+                       if( task->blk->cnt ){
+                               MOSupdateHeader(cntxt,task);
+                               MOSskip_none(task);
+                               // always start with an EOL block
+                               task->dst = ((char*) task->blk)+ MosaicBlkSize;
+                               task->blk->tag = MOSAIC_EOL;
+                               task->blk->cnt = 0;
+                       }
+                       MOScompress_rle(cntxt,task);
+                       MOSupdateHeader(cntxt,task);
+                       //prepare new block header
+                       task->elm -= task->blk->cnt;
+                       MOSadvance_rle(task);
+                       task->blk->tag = MOSAIC_EOL;
+                       task->blk->cnt = 0;
+                       task->dst = ((char*) task->blk)+ MosaicBlkSize;
                        break;
-               default : 
+               default :
                        // continue to use the last block header.
-                       MOScompress_none(cntxt,task); 
+                       MOScompress_none(cntxt,task);
                }
-               // adjust all tasks based on the elements compressed
-
-#ifdef _DEBUG_MOSAIC_
-               if(0) MOSdumpTask(cntxt,task);
-#endif
        }
+       if( task->blk->tag == MOSAIC_NONE){
+               MOSclose(task);
+               MOSupdateHeader(cntxt,task);
+               task->blk = (MosaicBlk) task->dst;
+               task->blk->tag = MOSAIC_EOL;
+               task->blk->cnt = 0;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to