Changeset: 8a51ac11db91 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8a51ac11db91
Modified Files:
        monetdb5/modules/mal/mosaic.c
        monetdb5/modules/mal/mosaic.h
        monetdb5/modules/mal/mosaic.mal
        monetdb5/modules/mal/mosaic_delta.c
        monetdb5/modules/mal/mosaic_dictionary.c
        monetdb5/modules/mal/mosaic_frame.c
        monetdb5/modules/mal/mosaic_hdr.c
        monetdb5/modules/mal/mosaic_linear.c
        monetdb5/modules/mal/mosaic_literal.c
        monetdb5/modules/mal/mosaic_prefix.c
        monetdb5/modules/mal/mosaic_runlength.c
Branch: mosaic
Log Message:

Add checksum protection


diffs (truncated from 534 to 300 lines):

diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -647,6 +647,8 @@ MOSdecompressInternal(Client cntxt, int 
                BBPreleaseref(b->batCacheid);
                BBPkeepref( *ret = bsrc->batCacheid);
        }
+       if( task->hdr->checksum.sumlng != task->hdr->checksum2.sumlng)
+               mnstr_printf(cntxt->fdout,"#incompatible compression\n");
        GDKfree(task);
 
        //if (!b->T->heap.compressed && b->ttype != TYPE_void) {
@@ -1309,6 +1311,86 @@ MOSanalyseInternal(Client cntxt, int thr
        GDKfree(type);
        BBPreleaseref(bid);
        return 1;
+
+}
+/* slice a fixed size atom into thin columns*/
+static str
+MOSsliceInternal(Client cntxt, bat *slices, BUN size, BAT *b)
+{
+       BUN i;
+       BUN cnt= BATcount(b);
+       BAT *bats[8];
+       bte *thin[8];
+       assert(size < 8);
+       (void) cntxt;
+
+       for( i = 0; i< size; i++){
+               bats[i] = BATnew(TYPE_void,TYPE_bte, cnt, TRANSIENT);
+               if ( bats[i] == NULL){
+                       for( ;i>0; i--)
+                               BBPreleaseref(bats[--i]->batCacheid);
+                       throw(MAL,"mosaic.slice", MAL_MALLOC_FAIL);
+               }
+               slices[i] = bats[i]->batCacheid;
+               thin[i]= (bte*) Tloc(bats[i],0);
+               BATsetcount(bats[i], cnt);
+       }
+       switch(b->ttype){
+       case TYPE_int:
+       { union {
+               unsigned int val;
+               bte thin[4];
+         } map;
+         unsigned int *val = (unsigned int*) Tloc(b,0);
+         for(i=0; i < cnt; i++, val++){
+               map.val = *val;
+               *thin[0] = map.thin[0]; thin[0]++;
+               *thin[1] = map.thin[1]; thin[1]++;
+               *thin[2] = map.thin[2]; thin[2]++;
+               *thin[3] = map.thin[3]; thin[3]++;
+         }
+       }
+       break;
+       case TYPE_lng:
+       { union {
+               unsigned int val;
+               bte thin[4];
+         } map;
+         unsigned int *val = (unsigned int*) Tloc(b,0);
+         for(i=0; i < cnt; i++, val++){
+               map.val = *val;
+               *thin[0] = map.thin[0]; thin[0]++;
+               *thin[1] = map.thin[1]; thin[1]++;
+               *thin[2] = map.thin[2]; thin[2]++;
+               *thin[3] = map.thin[3]; thin[3]++;
+               *thin[4] = map.thin[4]; thin[4]++;
+               *thin[5] = map.thin[5]; thin[5]++;
+               *thin[6] = map.thin[6]; thin[6]++;
+               *thin[7] = map.thin[7]; thin[7]++;
+         }
+       }
+       break;
+       default:
+               assert(0);
+       }
+       return MAL_SUCCEED;
+}
+
+str
+MOSslice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       bat slices[8];
+       BAT *b;
+       BUN s;
+       (void) cntxt;
+
+       s = (BUN) ATOMsize(getArgType(mb,pci,pci->retc));
+       if( s > 8)
+               throw(MAL,"mosaic.slice", "illegal type witdh");
+       b = BATdescriptor(* getArgReference_bat(stk,pci, pci->retc));
+       if ( b == NULL)
+               throw(MAL,"mosaic.slice", RUNTIME_OBJECT_MISSING);
+       return MOSsliceInternal(cntxt, slices, s,b);
 }
 
 str
@@ -1489,3 +1571,4 @@ MOSoptimize(Client cntxt, MalBlkPtr mb, 
        
        return MAL_SUCCEED;
 }
+
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -59,6 +59,18 @@
 #define MOSAICINDEX 4  //> 2 elements
 typedef struct MOSAICHEADER{
        int version;
+       union{
+               bte sumbte;
+               bit sumbit;
+               sht sumsht;
+               int sumint;
+               oid sumoid;
+               lng sumlng;
+               hge sumhge;
+               wrd sumwrd;
+               flt sumflt;
+               dbl sumdbl;
+       } checksum, checksum2;
        // collect compression statistics for the particular task
        lng blks[MOSAIC_METHODS];       
        lng elms[MOSAIC_METHODS];       
@@ -201,6 +213,7 @@ mosaic_export str MOSleftfetchjoin(Clien
 mosaic_export str MOSjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mosaic_export str MOSdump(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mosaic_export str MOSoptimize(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+mosaic_export str MOSslice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mosaic_export void MOSblk(MosaicBlk blk);
 mosaic_export BUN MOSlimit(void);
 
diff --git a/monetdb5/modules/mal/mosaic.mal b/monetdb5/modules/mal/mosaic.mal
--- a/monetdb5/modules/mal/mosaic.mal
+++ b/monetdb5/modules/mal/mosaic.mal
@@ -46,6 +46,9 @@ address MOSoptimize
 comment "Perform a search thru the compression space using up to 2^ply
 candidate combinations and blk*1000 blocksize limits";
 
+pattern slice(bid:lng)(b0:bte,b1:bte,b2:bte,b3:bte,b4:bte,b5:bte,b6:bte,b7:bte)
+address MOSslice
+comment "Break column into verticalslices";
 
 pattern dump(b:bat[:oid,:any])
 address MOSdump
@@ -249,3 +252,4 @@ comment "Overloaded leftfetchjoin operat
 pattern join(b:bat[:oid,:any_1], gen:bat[:oid,:any_1]) 
(l:bat[:oid,:oid],r:bat[:oid,:oid])
 address MOSjoin
 comment "Overloaded join operation";
+
diff --git a/monetdb5/modules/mal/mosaic_delta.c 
b/monetdb5/modules/mal/mosaic_delta.c
--- a/monetdb5/modules/mal/mosaic_delta.c
+++ b/monetdb5/modules/mal/mosaic_delta.c
@@ -144,6 +144,7 @@ MOSestimate_delta(Client cntxt, MOStask 
        *(TYPE*)task->dst = val;\
        task->dst += sizeof(TYPE);\
        for(v++,i =1; i<limit; i++,v++){\
+               hdr->checksum.sum##TYPE += *v;\
                delta = *v -val;\
                if ( EXPR )\
                        break;\
@@ -157,6 +158,7 @@ MOSestimate_delta(Client cntxt, MOStask 
 void
 MOScompress_delta(Client cntxt, MOStask task)
 {
+       MosaicHdr hdr = (MosaicHdr) task->hdr;
        MosaicBlk blk = (MosaicBlk) task->blk;
        BUN i = 0;
 
@@ -179,6 +181,7 @@ MOScompress_delta(Client cntxt, MOStask 
                        *(lng*)task->dst = val;
                        task->dst += sizeof(lng);
                        for(v++, i =1; i<limit; i++, v++){
+                               hdr->checksum.sumint += *v;
                                delta = *v -val;
                                if ( delta < -127 || delta >127)
                                        break;
@@ -211,6 +214,7 @@ MOScompress_delta(Client cntxt, MOStask 
        val = *(TYPE*)task->dst ;\
        task->dst += sizeof(TYPE);\
        for(i = 0; i < lim; i++) {\
+               hdr->checksum2.sum##TYPE += val;\
                ((TYPE*)task->src)[i] = val;\
                val += (bte) *task->dst++;\
        }\
@@ -220,6 +224,7 @@ MOScompress_delta(Client cntxt, MOStask 
 void
 MOSdecompress_delta(Client cntxt, MOStask task)
 {
+       MosaicHdr hdr = (MosaicHdr) task->hdr;
        MosaicBlk blk = (MosaicBlk) task->blk;
        BUN i;
        (void) cntxt;
@@ -240,6 +245,7 @@ MOSdecompress_delta(Client cntxt, MOStas
                val = *(lng*)task->dst ;
                task->dst += sizeof(lng);
                for(i = 0; i < lim; i++) {
+                       hdr->checksum2.sumlng += val;
                        ((lng*)task->src)[i] = val;
                        val += *(bte*) task->dst++;
                }
diff --git a/monetdb5/modules/mal/mosaic_dictionary.c 
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -281,6 +281,7 @@ MOSestimate_dictionary(Client cntxt, MOS
        base  = (unsigned long*) task->dst; \
        base[0]=0;\
        for(i =0; i<limit; i++, val++){\
+               hdr->checksum.sum##TPE += *val;\
                MOSfind(j,*val,0,hdr->dictsize);\
                if(j == hdr->dictsize || dict[j] != *val) \
                        break;\
@@ -326,6 +327,7 @@ MOScompress_dictionary(Client cntxt, MOS
                        base  = (unsigned long*) task->dst; // start of bit 
vector
                        base[0]=0;
                        for(i =0; i<limit; i++, val++){
+                               hdr->checksum.sumlng += *val;
                                MOSfind(j,*val,0,hdr->dictsize);
                                //mnstr_printf(cntxt->fdout,"compress 
["BUNFMT"] val %d index %d bits %d\n",i, *val,j,hdr->bits);
                                if( j == hdr->dictsize || dict[j] != *val )
@@ -371,6 +373,7 @@ if ( lshift >= hdr->bits){\
        for(i = 0; i < lim; i++){\
                dictdecompress(i);\
                ((TPE*)task->src)[i] = dict[j];\
+               hdr->checksum2.sum##TPE += dict[j];\
        }\
        task->src += i * sizeof(TPE);\
 }
@@ -416,6 +419,7 @@ MOSdecompress_dictionary(Client cntxt, M
                                        //mnstr_printf(cntxt->fdout,"[%d] shift 
%d %d cid %lo %lo val %o %o\n", cid, lshift, rshift,base[cid],base[cid+1], m1,  
m2);
                                  }
                                ((int*)task->src)[i] = dict[j];
+                               hdr->checksum2.sumint += dict[j];
                        }
                        task->src += i * sizeof(int);
                }
diff --git a/monetdb5/modules/mal/mosaic_frame.c 
b/monetdb5/modules/mal/mosaic_frame.c
--- a/monetdb5/modules/mal/mosaic_frame.c
+++ b/monetdb5/modules/mal/mosaic_frame.c
@@ -283,6 +283,7 @@ MOSestimate_frame(Client cntxt, MOStask 
        base = (unsigned long*) (((char*) task->blk) +  2 * MosaicBlkSize);\
        base[0]=0;\
        for(i =0; i<limit; i++, val++){\
+               hdr->checksum.sum##TPE += *val;\
                delta = *val - frame;\
                MOSfind(j,delta,0,hdr->framesize);\
                if(j == hdr->framesize || dict[j] != delta) \
@@ -330,6 +331,7 @@ MOScompress_frame(Client cntxt, MOStask 
                        base = (unsigned long*) (((char*) task->blk) +  2 * 
MosaicBlkSize);
                        base[0]=0;
                        for(i =0; i<limit; i++, val++){
+                               hdr->checksum.sumint += *val;
                                delta = *val - frame;
                                MOSfind(j,delta,0,hdr->framesize);
                                //mnstr_printf(cntxt->fdout,"compress 
["BUNFMT"] val %d index %d framebits %d\n",i, *val,j,hdr->framebits);
@@ -377,6 +379,7 @@ if ( lshift >= hdr->framebits){\
        for(i = 0; i < lim; i++){\
                framedecompress(i);\
                ((TPE*)task->src)[i] = frame + dict[j];\
+               hdr->checksum2.sum##TPE += dict[j];\
        }\
        task->src += i * sizeof(TPE);\
 }
@@ -422,6 +425,7 @@ MOSdecompress_frame(Client cntxt, MOStas
                                        j= ((m1 <<(hdr->framebits-lshift)) | 
m2) & 0377;\
                                        //mnstr_printf(cntxt->fdout,"[%d] shift 
%d %d cid %lo %lo val %o %o\n", cid, lshift, rshift,base[cid],base[cid+1], m1,  
m2);
                                  }
+                               hdr->checksum2.sumint += dict[j];
                                ((int*)task->src)[i] = frame + dict[j];
                        }
                        task->src += i * sizeof(int);
diff --git a/monetdb5/modules/mal/mosaic_hdr.c 
b/monetdb5/modules/mal/mosaic_hdr.c
--- a/monetdb5/modules/mal/mosaic_hdr.c
+++ b/monetdb5/modules/mal/mosaic_hdr.c
@@ -99,6 +99,8 @@ MOSinitHeader(MOStask task)
        hdr->factor = 0;
        hdr->version = MOSAIC_VERSION;
        hdr->top = 0;
+       hdr->checksum.sumlng = 0;
+       hdr->checksum2.sumlng = 0;
 }
 
 // position the task on the mosaic blk to be scanned
diff --git a/monetdb5/modules/mal/mosaic_linear.c 
b/monetdb5/modules/mal/mosaic_linear.c
--- a/monetdb5/modules/mal/mosaic_linear.c
+++ b/monetdb5/modules/mal/mosaic_linear.c
@@ -186,6 +186,7 @@ MOSestimate_linear(Client cntxt, MOStask
        for(i=1; i<limit; i++, val = *v++)\
        if (  *v - val != step)\
                break;\
+       else hdr->checksum.sum##TYPE += val;\
        MOSincCnt(blk, i);\
        task->dst = ((char*) blk)+ MosaicBlkSize +  2 * sizeof(TYPE);\
 }
@@ -194,6 +195,7 @@ void
 MOScompress_linear(Client cntxt, MOStask task)
 {
        BUN i;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to