Changeset: 2e32f68867cd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2e32f68867cd
Modified Files:
        monetdb5/modules/mal/mosaic.c
        monetdb5/modules/mal/mosaic.h
        monetdb5/modules/mal/mosaic_delta.c
        monetdb5/modules/mal/mosaic_dict.c
        monetdb5/modules/mal/mosaic_linear.c
        monetdb5/modules/mal/mosaic_rle.c
        monetdb5/optimizer/opt_mosaic.c
Branch: mosaic
Log Message:

Squeeze 4 more bytes from mosaic header


diffs (296 lines):

diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -328,6 +328,19 @@ MOScompressInternal(Client cntxt, int *r
                                task->dst = ((char*) task->blk)+ MosaicBlkSize;
                                *task->blk = MOSeol;
                        }
+                       break;
+               case MOSAIC_NONE:
+               case MOSAIC_ZONE:
+                       if ( MOScnt(task->blk) == MOSlimit()){
+                               MOSupdateHeader(cntxt,task);
+                               if( MOStag(task->blk) == MOSAIC_NONE)
+                                       MOSskip_none(cntxt,task);
+                               else
+                                       MOSskip_zone(cntxt,task);
+                               // always start with an EOL block
+                               task->dst = ((char*) task->blk)+ MosaicBlkSize;
+                               *task->blk = MOSeol;
+                       }
                }
                // apply the compression to a chunk
                switch(cand){
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -63,20 +63,24 @@ typedef struct MOSAICHEADER{
        BUN offset[MOSAICINDEX];
 } * MosaicHdr;
 
-// bit stuffed header block
-typedef lng *MosaicBlk;
-#define MOStag(Blk) (*(Blk)>>56)
-#define MOSsetTag(Tag)  ((lng) (Tag) <<56)
-#define MOScnt(Blk) (BUN)(*(Blk) & 03777777777777777)
+// bit stuffed header block, currently 4 bytes wide
+#define MOSshift 24
+typedef int *MosaicBlk;
+
+#define MOStag(Blk) (*(Blk)>>MOSshift)
+#define MOSsetTag(Tag)  ((int) (Tag) <<MOSshift)
+#define MOScnt(Blk) (BUN)(*(Blk) & ~(0377<<MOSshift))
 #define MOSinc(Blk,I) *(Blk)= *(Blk)+I
 
-#define MOSnone (((lng)MOSAIC_NONE) <<56)
-#define MOSrle (((lng)MOSAIC_RLE) <<56)
-#define MOSdict (((lng)MOSAIC_DICT) <<56)
-#define MOSlinear (((lng)MOSAIC_LINEAR) <<56)
-#define MOSdelta (((lng)MOSAIC_DELTA) <<56)
-#define MOSzone (((lng)MOSAIC_ZONE) <<56)
-#define MOSeol (((lng)MOSAIC_EOL) <<56)
+#define MOSnone (((int)MOSAIC_NONE) <<MOSshift)
+#define MOSrle (((int)MOSAIC_RLE) <<MOSshift)
+#define MOSdict (((int)MOSAIC_DICT) <<MOSshift)
+#define MOSlinear (((int)MOSAIC_LINEAR) <<MOSshift)
+#define MOSdelta (((int)MOSAIC_DELTA) <<MOSshift)
+#define MOSzone (((int)MOSAIC_ZONE) <<MOSshift)
+#define MOSeol (((int)MOSAIC_EOL) <<MOSshift)
+
+#define MOSlimit() (int) ~(0377<<MOSshift)
 
 /* Memory word alignement is type and platform dependent.
  * We use an encoding that fits the column type requirements
diff --git a/monetdb5/modules/mal/mosaic_delta.c 
b/monetdb5/modules/mal/mosaic_delta.c
--- a/monetdb5/modules/mal/mosaic_delta.c
+++ b/monetdb5/modules/mal/mosaic_delta.c
@@ -77,6 +77,7 @@ MOSskip_delta(Client cntxt, MOStask task
                        break;\
                val = *w;\
        }\
+       if ( i > MOSlimit() ) i = MOSlimit();\
        factor = (float)((int)i * sizeof(TYPE))/  (MosaicBlkSize + 
sizeof(TYPE)+(bte)i-1);\
 }
 
@@ -97,6 +98,7 @@ MOSestimate_delta(Client cntxt, MOStask 
                                        break;
                                val = *w;
                        }
+                       if ( i > MOSlimit() ) i = MOSlimit();
                        factor = ((float)i * sizeof(int))/  (MosaicBlkSize + 
sizeof(oid)+(bte)i-1);
                }
        case TYPE_wrd: Estimate_delta(wrd); break;
@@ -123,10 +125,11 @@ MOSestimate_delta(Client cntxt, MOStask 
 
 #define DELTAcompress(TYPE)\
 {      TYPE *w = (TYPE*)task->src, val= *w, delta;\
+       BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;\
        task->dst = ((char*) task->blk) + MosaicBlkSize;\
        *(TYPE*)task->dst = val;\
        task->dst += sizeof(TYPE);\
-       for(w++,i =1; i<task->elm; i++,w++){\
+       for(w++,i =1; i<limit; i++,w++){\
                delta = *w -val;\
                if ( delta < -127 || delta >127)\
                        break;\
@@ -156,10 +159,11 @@ MOScompress_delta(Client cntxt, MOStask 
 #endif
        case TYPE_oid:
                {       oid *w = (oid*)task->src, val= *w, delta;
+                       BUN limit = task->elm > MOSlimit()? 
MOSlimit():task->elm;
                        task->dst = ((char*) task->blk) + MosaicBlkSize;
                        *(oid*)task->dst = val;
                        task->dst += sizeof(oid);
-                       for(w++,i =1; i<task->elm; i++,w++){
+                       for(w++,i =1; i<limit; i++,w++){
                                delta = *w -val;
                                if ( delta < 256)
                                        break;
@@ -172,10 +176,11 @@ MOScompress_delta(Client cntxt, MOStask 
                break;
        case TYPE_int:
                {       int *w = (int*)task->src, val= *w, delta;
+                       BUN limit = task->elm > MOSlimit()? 
MOSlimit():task->elm;
                        task->dst = ((char*) task->blk) + MosaicBlkSize;
                        *(int*)task->dst = val;
                        task->dst += sizeof(int);
-                       for(w++,i =1; i<task->elm; i++,w++){
+                       for(w++,i =1; i<limit; i++,w++){
                                delta = *w -val;
                                if ( delta < -127 || delta >127)
                                        break;
diff --git a/monetdb5/modules/mal/mosaic_dict.c 
b/monetdb5/modules/mal/mosaic_dict.c
--- a/monetdb5/modules/mal/mosaic_dict.c
+++ b/monetdb5/modules/mal/mosaic_dict.c
@@ -123,6 +123,7 @@ MOSskip_dict(Client cntxt, MOStask task)
                        cnt++;\
                }\
        }\
+       if ( i > MOSlimit() ) i = MOSlimit();\
        if(i) factor = (flt) ((int)i * sizeof(int)) / (2 * MosaicBlkSize + 
sizeof(int) * dictsize +i);\
 }
 
@@ -160,6 +161,7 @@ MOSestimate_dict(Client cntxt, MOStask t
                                        cnt++;
                                }
                        }
+                       if ( i > MOSlimit() ) i = MOSlimit();
                        if(i) factor = (flt) ((int)i * sizeof(int)) / (2 * 
MosaicBlkSize + sizeof(int) * dictsize +i);
                }
        }
@@ -173,8 +175,9 @@ MOSestimate_dict(Client cntxt, MOStask t
 #define DICTcompress(TPE)\
 {      TPE *val = (TPE*)task->src;\
        TPE *dict = (TPE*)((char*)task->blk+ 2 * MosaicBlkSize);\
+       BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\
        task->dst = ((char*) dict)+ sizeof(TPE)*dictsize;\
-       for(i =0; i<task->elm; i++, val++){\
+       for(i =0; i<limit; i++, val++){\
                for(j= 0; j< *size; j++)\
                        if( dict[j] == *val) {\
                                MOSinc(blk,1);\
@@ -186,10 +189,10 @@ MOSestimate_dict(Client cntxt, MOStask t
                                task->dst += wordaligned(MOScnt(blk) %2,TPE);\
                                break;\
                        }\
+                       MOSinc(blk,1);\
                        dict[j] = *val;\
                        *size = *size+1;\
                        *task->dst++ = (char) j;\
-                       MOSinc(blk,1);\
                }\
        }\
        task->src = (char*) val;\
@@ -219,8 +222,9 @@ MOScompress_dict(Client cntxt, MOStask t
        case TYPE_lng:
                {       lng *val = (lng*)task->src;
                        lng *dict = (lng*)((char*)task->blk+ 2 * MosaicBlkSize);
+                       BUN limit = task->elm > MOSlimit()? MOSlimit(): 
task->elm;
                        task->dst = ((char*) dict)+ sizeof(lng)*dictsize;
-                       for(i =0; i<task->elm; i++, val++){
+                       for(i =0; i<limit; i++, val++){
                                for(j= 0; j< *size; j++)
                                        if( dict[j] == *val) {
                                                MOSinc(blk,1);
@@ -233,10 +237,10 @@ MOScompress_dict(Client cntxt, MOStask t
                                                task->dst += 
wordaligned(MOScnt(blk) %2,lng);
                                                break;
                                        }
+                                       MOSinc(blk,1);
                                        dict[j] = *val;
                                        *size = *size+1;
                                        *task->dst++ = (char) j;
-                                       MOSinc(blk,1);
                                }
                        }
                        task->src = (char*) val;
diff --git a/monetdb5/modules/mal/mosaic_linear.c 
b/monetdb5/modules/mal/mosaic_linear.c
--- a/monetdb5/modules/mal/mosaic_linear.c
+++ b/monetdb5/modules/mal/mosaic_linear.c
@@ -126,6 +126,7 @@ MOSskip_linear(Client cntxt, MOStask tas
        for(i =1; i < task->elm; i++)\
        if ( ((TYPE*)task->src)[i] != (TYPE)(val + (int)i * step))\
                break;\
+       if( i >= MOSlimit()) i = MOSlimit();\
        factor =  ( (flt)i * sizeof(TYPE))/(MosaicBlkSize + 2 * sizeof(TYPE));\
 }
 
@@ -154,6 +155,7 @@ MOSestimate_linear(Client cntxt, MOStask
                        for(i =1; i<task->elm; i++)
                        if ( ((int*)task->src)[i] != (int)(val + (int)i * step))
                                break;
+                       if( i >= MOSlimit()) i = MOSlimit();
                        factor =  ( (flt)i * sizeof(int))/(MosaicBlkSize + 2 * 
sizeof(int));
                }
        }
@@ -167,7 +169,8 @@ MOSestimate_linear(Client cntxt, MOStask
 #define LINEARcompress(TYPE)\
 {      TYPE val = *(TYPE*) task->src;\
        TYPE step = *(TYPE*) (task->src + sizeof(TYPE)) - val;\
-       for(i =1; i<task->elm; i++)\
+       BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;\
+       for(i =1; i<limit; i++)\
        if ( ((TYPE*)task->src)[i] != (TYPE)(val + (int)i * step))\
                break;\
        MOSinc(blk,i);\
@@ -201,7 +204,8 @@ MOScompress_linear(Client cntxt, MOStask
        case TYPE_int:
                {       int val = *(int*) task->src;\
                        int step = *(int*) (task->src + sizeof(int)) - val;\
-                       for(i =1; i<task->elm; i++)\
+                       BUN limit = task->elm > MOSlimit()? 
MOSlimit():task->elm;
+                       for(i =1; i<limit; i++)\
                        if ( ((int*)task->src)[i] != (int)(val + (int)i * 
step))\
                                break;\
                        MOSinc(blk,i);\
diff --git a/monetdb5/modules/mal/mosaic_rle.c 
b/monetdb5/modules/mal/mosaic_rle.c
--- a/monetdb5/modules/mal/mosaic_rle.c
+++ b/monetdb5/modules/mal/mosaic_rle.c
@@ -108,6 +108,7 @@ MOSskip_rle(Client cntxt, MOStask task)
        for(i =1; i < task->elm; i++)\
        if ( ((TYPE*)task->src)[i] != val)\
                break;\
+       if ( i > MOSlimit() ) i = MOSlimit();\
        factor = ( (flt)i * sizeof(TYPE))/ (MosaicBlkSize + sizeof(TYPE));\
 }
 
@@ -135,6 +136,7 @@ MOSestimate_rle(Client cntxt, MOStask ta
                        for(i =1; i<task->elm; i++)
                        if ( ((int*)task->src)[i] != val)
                                break;
+                       if ( i > MOSlimit() ) i = MOSlimit();
                        factor = ( (flt)i * sizeof(int))/ (MosaicBlkSize + 
sizeof(int));
                }
        }
@@ -146,16 +148,17 @@ MOSestimate_rle(Client cntxt, MOStask ta
 
 // insert a series of values into the compressor block using rle.
 #define RLEcompress(TYPE)\
-       {       TYPE val = *(TYPE*) task->src;\
-               TYPE *dst = (TYPE*) task->dst;\
-               *dst = val;\
-               for(i =1; i<task->elm; i++)\
-               if ( ((TYPE*)task->src)[i] != val)\
-                       break;\
-               MOSinc(blk,i);\
-               task->dst +=  sizeof(TYPE);\
-               task->src += i * sizeof(TYPE);\
-       }
+{      TYPE val = *(TYPE*) task->src;\
+       TYPE *dst = (TYPE*) task->dst;\
+       BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\
+       *dst = val;\
+       for(i =1; i<limit; i++)\
+       if ( ((TYPE*)task->src)[i] != val)\
+               break;\
+       MOSinc(blk,i);\
+       task->dst +=  sizeof(TYPE);\
+       task->src += i * sizeof(TYPE);\
+}
 
 void
 MOScompress_rle(Client cntxt, MOStask task)
@@ -181,8 +184,9 @@ MOScompress_rle(Client cntxt, MOStask ta
        case TYPE_int:
                {       int val = *(int*) task->src;
                        int *dst = (int*) task->dst;
+                       BUN limit = task->elm > MOSlimit()? MOSlimit(): 
task->elm;
                        *dst = val;
-                       for(i =1; i<task->elm; i++)
+                       for(i =1; i<limit; i++)
                        if ( ((int*)task->src)[i] != val)
                                break;
                        MOSinc(blk,i);
diff --git a/monetdb5/optimizer/opt_mosaic.c b/monetdb5/optimizer/opt_mosaic.c
--- a/monetdb5/optimizer/opt_mosaic.c
+++ b/monetdb5/optimizer/opt_mosaic.c
@@ -37,6 +37,9 @@ static int OPTmosaicType(MalBlkPtr mb, I
        case TYPE_sht:
        case TYPE_int:
        case TYPE_lng:
+#ifdef HAVE_HGE
+       case TYPE_hge:
+#endif
        case TYPE_oid:
        case TYPE_wrd:
        case TYPE_flt:
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to