Changeset: 70ee4991688c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=70ee4991688c
Modified Files:
        monetdb5/modules/mal/mosaic.c
        monetdb5/modules/mal/mosaic.h
        monetdb5/modules/mal/mosaic.mal
        monetdb5/modules/mal/mosaic_delta.c
        monetdb5/modules/mal/mosaic_dict.c
        monetdb5/modules/mal/mosaic_hdr.c
        monetdb5/modules/mal/mosaic_linear.c
        monetdb5/modules/mal/mosaic_none.c
        monetdb5/modules/mal/mosaic_rle.c
        monetdb5/modules/mal/mosaic_zone.c
        sql/test/mosaic/Tests/compression.sql
Branch: mosaic
Log Message:

Various issues
Added cutoff test for dictionary encodings.
Simplified statistics reporting


diffs (truncated from 631 to 300 lines):

diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -50,10 +50,11 @@ MOSdumpTask(Client cntxt,MOStask task)
 {
        int i;
        mnstr_printf(cntxt->fdout,"# ");
-       for ( i=0; i < MOSAIC_METHODS; i++){
-               mnstr_printf(cntxt->fdout, "%s\t"LLFMT "\t"LLFMT "\t" LLFMT 
"\t",
-                       filtername[i], task->wins[i], 
task->elms[i],task->time[i]);
-       }
+       mnstr_printf(cntxt->fdout,"clk " LLFMT"\tsizes "LLFMT"\t"LLFMT 
"\t%10.2fx\t", 
+               task->timer,task->size,task->xsize, task->xsize ==0 ? 
0:(flt)task->size/task->xsize);
+       for ( i=0; i < MOSAIC_METHODS; i++)
+       if( task->blks[i])
+               mnstr_printf(cntxt->fdout, "%s\t"LLFMT "\t"LLFMT "\t" , 
filtername[i], task->blks[i], task->elms[i]);
 }
 
 // dump a compressed BAT
@@ -159,7 +160,7 @@ str
 MOScompressInternal(Client cntxt, int *ret, int *bid, str properties)
 {
        BAT *b, *bn;
-       BUN cnt;
+       BUN cnt, cutoff =0;
        int i;
        char *c;
        str msg = MAL_SUCCEED;
@@ -226,6 +227,8 @@ MOScompressInternal(Client cntxt, int *r
        // initialize the non-compressed read pointer
        task->src = Tloc(b, BUNfirst(b));
        task->elm = BATcount(b);
+       task->size = b->T->heap.free;
+       task->timer = GDKusec();
 
        // prepare a compressed heap
        MOSinit(task,bn);
@@ -235,12 +238,20 @@ MOScompressInternal(Client cntxt, int *r
        task->blk->tag = MOSAIC_EOL;
        task->blk->cnt = 0;
 
+       cutoff = task->elm > 1000? task->elm - 1000: task->elm;
        while(task->elm > 0){
                // default is to extend the non-compressed block
                //mnstr_printf(cntxt->fdout,"#elements "BUNFMT"\n",task->elm);
                cand = MOSAIC_NONE;
                perc = 100;
                percentage = 100;
+
+               // cutoff the filters, especially dictionary tests are expensive
+               if( cutoff && cutoff > task->elm){
+                       if( task->blks[MOSAIC_DICT] == 0)
+                               filter[MOSAIC_DICT] = 0;
+                       cutoff = 0;
+               }
                
                // select candidate amongst those
                if ( filter[MOSAIC_RLE]){
@@ -257,7 +268,7 @@ MOScompressInternal(Client cntxt, int *r
                                percentage = perc;
                        }
                }
-               if ( filter[MOSAIC_ZONE]){
+               if (0 && filter[MOSAIC_ZONE]){
                        perc = MOSestimate_zone(cntxt,task);
                        if (perc >= 0 && perc < percentage){
                                cand = MOSAIC_ZONE;
@@ -369,17 +380,21 @@ MOScompressInternal(Client cntxt, int *r
                task->blk->tag = MOSAIC_EOL;
                task->blk->cnt = 0;
        }
+       task->xsize = ((lng)task->dst - (lng)task->hdr) + MosaicHdrSize;
+       task->timer = GDKusec() - task->timer;
 //#ifdef _DEBUG_MOSAIC_
        MOSdumpTask(cntxt,task);
        mnstr_printf(cntxt->fdout,"\n");
 //#endif
-       // if we couldnt compress ignore the result
-       if( task->elms[MOSAIC_NONE] == (lng) cnt){
+       // if we couldnt compress well enough, ignore the result
+/*
+       if( task->xsize && task->size / task->xsize < 1){
                GDKfree(task);
                BBPreleaseref(bn->batCacheid);
                BBPkeepref(*ret = b->batCacheid);
                return MAL_SUCCEED;
        }
+*/
 
        BATsetcount(bn, cnt);
        BATseqbase(bn,b->hseqbase);
@@ -461,6 +476,7 @@ MOSdecompressInternal(Client cntxt, int 
        }
        MOSinit(task,b);;
        task->src = Tloc(bn, BUNfirst(bn));
+       task->timer = GDKusec();
        while(task->blk){
                switch(task->blk->tag){
                case MOSAIC_DICT:
@@ -500,6 +516,7 @@ MOSdecompressInternal(Client cntxt, int 
        bn->T->seq = b->T->seq;
        bn->tsorted = b->tsorted;
        bn->trevsorted = b->trevsorted;
+       task->timer = GDKusec()- task->timer;
        //bn->tkey = b->tkey;
        //bn->batDirty = 1;
 
@@ -591,8 +608,12 @@ MOSsubselect(Client cntxt, MalBlkPtr mb,
        anti = (bit *) getArgReference(stk, pci, i + 4);
        //
        // use default implementation if possible
-       if( !isCompressed(*bid))
-               return ALGsubselect1(ret,bid,low,hgh,li,hi,anti);
+       if( !isCompressed(*bid)){
+               if(cid)
+                       return ALGsubselect2(ret,bid,cid,low,hgh,li,hi,anti);
+               else
+                       return ALGsubselect1(ret,bid,low,hgh,li,hi,anti);
+       }
 
        b= BATdescriptor(*bid);
        if( b == NULL)
@@ -684,7 +705,7 @@ MOSsubselect(Client cntxt, MalBlkPtr mb,
 
 str MOSthetasubselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       int idx, cid =0,  *ret, *bid;
+       int idx, *cid =0,  *ret, *bid;
        BAT *b = 0, *cand = 0, *bn = NULL;
        BUN first = 0,last = 00;
        BUN cnt=0;
@@ -698,14 +719,18 @@ str MOSthetasubselect(Client cntxt, MalB
        ret= (int*) getArgReference(stk,pci,0);
        bid= (int*) getArgReference(stk,pci,1);
        if( pci->argc == 5){ // candidate list included
-               cid = *(int*) getArgReference(stk,pci, 2);
+               cid = (int*) getArgReference(stk,pci, 2);
                idx = 3;
        } else idx = 2;
        low= (void*) getArgReference(stk,pci,idx);
        oper= (char**) getArgReference(stk,pci,idx+1);
 
-       if( !isCompressed(*bid))
-               return ALGthetasubselect1(ret,bid,low, (const char **)oper);
+       if( !isCompressed(*bid)){
+               if( cid)
+                       return ALGthetasubselect2(ret,bid,cid,low, (const char 
**)oper);
+               else
+                       return ALGthetasubselect1(ret,bid,low, (const char 
**)oper);
+       }
        
        b = BATdescriptor(*bid);
        if( b == NULL)
@@ -731,7 +756,7 @@ str MOSthetasubselect(Client cntxt, MalB
        MOSinit(task,b);
        // drag along the candidate list into the task descriptor
        if (cid) {
-               cand = BATdescriptor(cid);
+               cand = BATdescriptor(*cid);
                if (cand == NULL){
                        BBPreleaseref(b->batCacheid);
                        BBPreleaseref(bn->batCacheid);
@@ -784,7 +809,7 @@ str MOSthetasubselect(Client cntxt, MalB
        cnt = (BUN)( task->lb - (oid*) Tloc(bn,BUNfirst(bn)));
        
        if( cid)
-               BBPreleaseref(cid);
+               BBPreleaseref(*cid);
        if( bn){
                BATsetcount(bn,cnt);
                bn->hdense = 1;
@@ -1047,19 +1072,24 @@ MOSanalyseInternal(Client cntxt, BUN thr
 
        b = BATdescriptor(bid);
        if( b == NULL ){
-               mnstr_printf(cntxt->fdout,"#nonaccessible %d %s\n",bid, 
BBP_logical(bid));
+               mnstr_printf(cntxt->fdout,"#nonaccessible %d\n",bid);
                return;
        }
-       if( BATcount(b) < threshold){
+       if( b->ttype == TYPE_void ||  BATcount(b) < threshold){
                BBPreleaseref(bid);
                //mnstr_printf(cntxt->fdout,"#too small %d %s\n",bid, 
BBP_logical(bid));
                return;
        }
-       if( isVIEW(b)){
+       if ( isVIEW(b) || isVIEWCOMBINE(b) || VIEWtparent(b)) {
                mnstr_printf(cntxt->fdout,"#ignore view %d %s\n",bid, 
BBP_logical(bid));
                BBPreleaseref(bid);
                return;
        }
+       if ( BATcount(b) < MIN_INPUT_COUNT ){
+               mnstr_printf(cntxt->fdout,"#ignore small %d %s\n",bid, 
BBP_logical(bid));
+               BBPreleaseref(bid);
+               return;
+       }
        type = getTypeName(b->ttype);
        switch( b->ttype){
        case TYPE_bit:
@@ -1070,12 +1100,12 @@ MOSanalyseInternal(Client cntxt, BUN thr
        case TYPE_oid:
        case TYPE_flt:
        case TYPE_dbl:
-               mnstr_printf(cntxt->fdout,"#%d\t%s\t%s\t"BUNFMT"\t%10d ", bid, 
BBP_logical(bid), type, BATcount(b), ATOMsize(b->ttype) *(int) BATcount(b));
+               mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t", bid, 
BBP_logical(bid), type, BATcount(b));
                MOScompressInternal(cntxt, &ret, &bid, 0);
                break;
        default:
                if( b->ttype == TYPE_timestamp){
-                       mnstr_printf(cntxt->fdout,"#%d\t%s\t%s\t"BUNFMT"\t%10d 
", bid, BBP_logical(bid), type, BATcount(b), ATOMsize(b->ttype) *(int) 
BATcount(b));
+                       mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t", 
bid, BBP_logical(bid), type, BATcount(b));
                        MOScompressInternal(cntxt, &ret, &bid, 0);
                }
        }
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -85,6 +85,8 @@ typedef struct MOSTASK{
        BUN     elm;            // elements left to compress
        char *src;              // read pointer into source
 
+       lng  xsize,size;// original and compressed size
+       lng timer;              // compression time
        void *min, *max;// space for zones
 
        oid *lb, *rb;   // Collected oids from operations
@@ -94,8 +96,7 @@ typedef struct MOSTASK{
        BAT *lbat, *rbat; // for the joins, where we dont know their size 
upfront
 
        // collect compression statistics for the particular task
-       lng time[MOSAIC_METHODS];
-       lng wins[MOSAIC_METHODS];       
+       lng blks[MOSAIC_METHODS];       
        lng elms[MOSAIC_METHODS];       
 } *MOStask;
 
diff --git a/monetdb5/modules/mal/mosaic.mal b/monetdb5/modules/mal/mosaic.mal
--- a/monetdb5/modules/mal/mosaic.mal
+++ b/monetdb5/modules/mal/mosaic.mal
@@ -20,6 +20,10 @@ pattern analyse(threshold:lng)
 address MOSanalyse
 comment "Apply heap compression on all with minimum threshold";
 
+pattern analyse(threshold:lng,b:int)
+address MOSanalyse
+comment "Apply heap compression on all with minimum threshold";
+
 
 pattern dump(b:bat[:oid,:any])
 address MOSdump
diff --git a/monetdb5/modules/mal/mosaic_delta.c 
b/monetdb5/modules/mal/mosaic_delta.c
--- a/monetdb5/modules/mal/mosaic_delta.c
+++ b/monetdb5/modules/mal/mosaic_delta.c
@@ -129,7 +129,6 @@ MOScompress_delta(Client cntxt, MOStask 
 
        (void) cntxt;
        blk->tag = MOSAIC_DELTA;
-    task->time[MOSAIC_DELTA] = GDKusec();
 
        switch(ATOMstorage(task->type)){
        case TYPE_sht: DELTAcompress(sht); break;
@@ -169,7 +168,6 @@ MOScompress_delta(Client cntxt, MOStask 
 #ifdef _DEBUG_MOSAIC_
        MOSdump_delta_(cntxt, task);
 #endif
-    task->time[MOSAIC_DELTA] = GDKusec() - task->time[MOSAIC_DELTA];
 }
 
 // the inverse operator, extend the src
@@ -190,7 +188,6 @@ MOSdecompress_delta(Client cntxt, MOStas
 {
        MosaicBlk blk = (MosaicBlk) task->blk;
        BUN i;
-       lng clk = GDKusec();
        (void) cntxt;
 
        switch(ATOMstorage(task->type)){
@@ -209,7 +206,6 @@ MOSdecompress_delta(Client cntxt, MOStas
                task->src += i * sizeof(int);
        }
        }
-    task->time[MOSAIC_DELTA] = GDKusec() - clk;
 }
 
 // The remainder should provide the minimal algebraic framework
diff --git a/monetdb5/modules/mal/mosaic_dict.c 
b/monetdb5/modules/mal/mosaic_dict.c
--- a/monetdb5/modules/mal/mosaic_dict.c
+++ b/monetdb5/modules/mal/mosaic_dict.c
@@ -178,7 +178,6 @@ MOScompress_dict(Client cntxt, MOStask t
        *size = 0;
        blk->tag = MOSAIC_DICT;
        blk->cnt =  0;
-       task->time[MOSAIC_DICT] = GDKusec();
 
        switch(ATOMstorage(task->type)){
        case TYPE_sht: DICTcompress(sht); break;
@@ -214,7 +213,6 @@ MOScompress_dict(Client cntxt, MOStask t
 #ifdef _DEBUG_MOSAIC_
        MOSdump_dict(cntxt, task);
 #endif
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to