Changeset: 2c7f48a59d68 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2c7f48a59d68
Modified Files:
        monetdb5/modules/mal/Tests/All
        monetdb5/modules/mal/Tests/mosaic_literal.mal
        monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
        monetdb5/modules/mal/mosaic.h
        monetdb5/modules/mal/mosaic_delta.c
        monetdb5/modules/mal/mosaic_dictionary.c
        monetdb5/modules/mal/mosaic_frame.c
        monetdb5/modules/mal/mosaic_hdr.c
        monetdb5/modules/mal/mosaic_linear.c
        monetdb5/modules/mal/mosaic_prefix.c
Branch: mosaic
Log Message:

Prepare for block trimming
The compression schemes run free until the end.
This should be capt to avoid excessive time use.


diffs (truncated from 594 to 300 lines):

diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -68,7 +68,7 @@ manifold
 manifoldstr
 
 mosaic_literal
-mosaic_literal_inplace
+#mosaic_literal_inplace
 mosaic_runlength
 mosaic_mix
 mosaic_dictionary
diff --git a/monetdb5/modules/mal/Tests/mosaic_literal.mal 
b/monetdb5/modules/mal/Tests/mosaic_literal.mal
--- a/monetdb5/modules/mal/Tests/mosaic_literal.mal
+++ b/monetdb5/modules/mal/Tests/mosaic_literal.mal
@@ -10,3 +10,14 @@ x:= mosaic.compress(b,"literal");
 #mosaic.dump(x);
 z:= mosaic.decompress(x);
 io.print(z);
+
+
+s:= bat.new(:oid,:str);
+bat.append(s,"the");
+bat.append(s,"big");
+bat.append(s,"brown");
+bat.append(s,"fox");
+
+y:= mosaic.compress(s,"literal");
+zs:= mosaic.decompress(y);
+io.print(zs);
diff --git a/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal 
b/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
--- a/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
+++ b/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
@@ -97,3 +97,15 @@ io.print(s);
 xs:= mosaic.subselect(x,c,nil:int,nil:int,false,false,true);
 io.print(xs);
 
+s:= bat.new(:oid,:str);
+bat.append(s,"the");
+bat.append(s,"big");
+bat.append(s,"brown");
+bat.append(s,"fox");
+
+ys:= mosaic.compress(s,"literal");
+
+t:= algebra.subselect(s,"big","fox",true,true,false);
+io.print(t);
+t:= mosaic.subselect(ys,"big","fox",true,true,false);
+io.print(t);
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -88,11 +88,6 @@ typedef int *MosaicBlk;
 #define MOSgetCnt(Blk) (BUN)(*(Blk) & ~(0377<<MOSshift))
 #define MOSincCnt(Blk,I) *(Blk)= *(Blk)+I
 
-/* limit the number of elements to consider in a block
- * It should always be smaller then: ~(0377<<MOSshift)
-*/
-#define MOSlimit() (int) ~(0377<<MOSshift)
-
 /* Memory word alignement is type and platform dependent.
  * We use an encoding that fits the column type requirements
  */
@@ -206,5 +201,6 @@ mosaic_export str MOSjoin(Client cntxt, 
 mosaic_export str MOSdump(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mosaic_export str MOSoptimize(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 mosaic_export void MOSblk(MosaicBlk blk);
+mosaic_export BUN MOSlimit(void);
 
 #endif /* _MOSLIST_H */
diff --git a/monetdb5/modules/mal/mosaic_delta.c 
b/monetdb5/modules/mal/mosaic_delta.c
--- a/monetdb5/modules/mal/mosaic_delta.c
+++ b/monetdb5/modules/mal/mosaic_delta.c
@@ -82,13 +82,14 @@ MOSskip_delta(Client cntxt, MOStask task
 // append a series of values into the non-compressed block
 #define Estimate_delta(TYPE, EXPR)\
 {      TYPE *v = ((TYPE*)task->src) + task->start, val= *v, delta = 0;\
-       for(v++,i =1; i<task->stop - task->start; i++,v++){\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop-task->start;\
+       for(v++,i =1; i<limit; i++,v++){\
                delta = *v -val;\
                if ( EXPR)\
                        break;\
                val = *v;\
        }\
-       factor = ((flt) i * sizeof(TYPE))/ wordaligned(MosaicBlkSize + 
sizeof(TYPE) + i-1,TYPE);\
+       if(i) factor = ((flt) i * sizeof(TYPE))/ wordaligned(MosaicBlkSize + 
sizeof(TYPE) + i-1,TYPE);\
 }
 
 // estimate the compression level 
@@ -118,13 +119,14 @@ MOSestimate_delta(Client cntxt, MOStask 
        break;
        case TYPE_int:
                {       int *v = ((int*)task->src) + task->start, val= *v, 
delta=0;
-                       for(v++,i =1; i<task->stop - task->start; i++,v++){
+                       BUN limit = task->stop - task->start > MOSlimit()? 
MOSlimit(): task->stop-task->start;
+                       for(v++,i =1; i<limit; i++,v++){
                                delta = *v -val;
                                if ( delta < -127 || delta >127)
                                        break;
                                val = *v;
                        }
-                       factor = ((flt) i * sizeof(int))/ 
wordaligned(MosaicBlkSize + sizeof(int) + i-1,int);
+                       if(i) factor = ((flt) i * sizeof(int))/ 
wordaligned(MosaicBlkSize + sizeof(int) + i-1,int);
                }
                break;
        //case TYPE_flt: case TYPE_dbl: to be looked into.
diff --git a/monetdb5/modules/mal/mosaic_dictionary.c 
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -106,13 +106,12 @@ MOSskip_dictionary(Client cntxt, MOStask
 #define estimateDict(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start;\
        TPE *dict= (TPE*)hdr->dict;\
-       for(i =task->start; i<task->stop; i++, val++){\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
+       for(i =0; i<limit; i++, val++){\
                MOSfind(j,*val,0,hdr->dictsize);\
                if( j == hdr->dictsize || dict[j] != *val )\
                        break;\
        }\
-       i -= task->start;\
-       if ( i > MOSlimit() ) i = MOSlimit();\
        if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned( 
MosaicBlkSize + i,TPE);\
 }
 
@@ -121,7 +120,8 @@ MOSskip_dictionary(Client cntxt, MOStask
 #define makeDict(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start;\
        TPE *dict = (TPE*)hdr->dict,v;\
-       for(i =task->start; i< task->stop; i++, val++){\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
+       for(i = 0; i< limit; i++, val++){\
                for(j= 0; j< hdr->dictsize; j++)\
                        if( dict[j] == *val) break;\
                if ( j == hdr->dictsize){\
@@ -179,8 +179,9 @@ MOScreatedictionary(Client cntxt, MOStas
        case TYPE_int:
                {       int *val = ((int*)task->src) + task->start;
                        int *dict = (int*)hdr->dict,v;
+                       BUN limit = task->stop - task->start > MOSlimit()? 
MOSlimit(): task->stop - task->start;
 
-                       for(i =task->start; i< task->stop; i++, val++){
+                       for(i =0; i< limit; i++, val++){
                                for(j= 0; j< hdr->dictsize; j++)
                                        if( dict[j] == *val) break;
                                if ( j == hdr->dictsize){
@@ -275,17 +276,17 @@ MOSestimate_dictionary(Client cntxt, MOS
 #define DICTcompress(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start;\
        TPE *dict = (TPE*)hdr->dict;\
-       BUN limit = task->stop - task->start > MOSlimit()? task->start + 
MOSlimit(): task->stop;\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
        task->dst = ((char*) task->blk)+ MosaicBlkSize;\
        base  = (unsigned long*) task->dst; \
        base[0]=0;\
-       for(i =task->start; i<limit; i++, val++){\
+       for(i =0; i<limit; i++, val++){\
                MOSfind(j,*val,0,hdr->dictsize);\
                if(j == hdr->dictsize || dict[j] != *val) \
                        break;\
                else {\
                        MOSincCnt(blk,1);\
-                       dictcompress(base,(i- task->start),hdr->bits,j);\
+                       dictcompress(base,i,hdr->bits,j);\
                }\
        }\
        assert(i);\
@@ -308,7 +309,7 @@ MOScompress_dictionary(Client cntxt, MOS
        switch(ATOMstorage(task->type)){
        //case TYPE_bte: CASE_bit: no compression achievable
        case TYPE_sht: DICTcompress(sht); break;
-       case TYPE_lng: DICTcompress(lng); break;
+       case TYPE_int: DICTcompress(int); break;
        case TYPE_oid: DICTcompress(oid); break;
        case TYPE_wrd: DICTcompress(wrd); break;
        case TYPE_flt: DICTcompress(flt); break;
@@ -316,28 +317,28 @@ MOScompress_dictionary(Client cntxt, MOS
 #ifdef HAVE_HGE
        case TYPE_hge: DICTcompress(hge); break;
 #endif
-       case TYPE_int:
-               {       int *val = ((int*)task->src) + task->start;
-                       int *dict = (int*)hdr->dict;
-                       BUN limit = task->elm > MOSlimit()? MOSlimit(): 
task->elm;
+       case TYPE_lng:
+               {       lng *val = ((lng*)task->src) + task->start;
+                       lng *dict = (lng*)hdr->dict;
+                       BUN limit = task->stop - task->start > MOSlimit()? 
MOSlimit(): task->stop - task->start;
 
                        task->dst = ((char*) task->blk)+ MosaicBlkSize;
                        base  = (unsigned long*) task->dst; // start of bit 
vector
                        base[0]=0;
-                       for(i =task->start; i<limit; i++, val++){
+                       for(i =0; i<limit; i++, val++){
                                MOSfind(j,*val,0,hdr->dictsize);
                                //mnstr_printf(cntxt->fdout,"compress 
["BUNFMT"] val %d index %d bits %d\n",i, *val,j,hdr->bits);
                                if( j == hdr->dictsize || dict[j] != *val )
                                        break;
                                else {
                                        MOSincCnt(blk,1);
-                                       cid = ((i- task->start) * hdr->bits)/64;
-                                       lshift= 63 -(((i- task->start) * 
hdr->bits) % 64) ;
+                                       cid = i * hdr->bits/64;
+                                       lshift= 63 -((i * hdr->bits) % 64) ;
                                        if ( lshift >= hdr->bits){
                                                base[cid]= base[cid] | 
(((unsigned long)j) << (lshift-hdr->bits));
                                                
//mnstr_printf(cntxt->fdout,"[%d] shift %d rbits %d \n",cid, lshift, hdr->bits);
                                        }else{ 
-                                               rshift= 63 -  (((i- 
task->start)+1) * hdr->bits) % 64;
+                                               rshift= 63 -  ((i+1) * 
hdr->bits) % 64;
                                                base[cid]= base[cid] | 
(((unsigned long)j) >> (hdr->bits-lshift));
                                                base[cid+1]= 0 | (((unsigned 
long)j)  << rshift);
                                                
//mnstr_printf(cntxt->fdout,"[%d] shift %d %d val %o %o\n", cid, lshift, rshift,
diff --git a/monetdb5/modules/mal/mosaic_frame.c 
b/monetdb5/modules/mal/mosaic_frame.c
--- a/monetdb5/modules/mal/mosaic_frame.c
+++ b/monetdb5/modules/mal/mosaic_frame.c
@@ -102,14 +102,13 @@ MOSskip_frame(Client cntxt, MOStask task
 #define estimateFrame(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start, frame = *val, delta;\
        TPE *dict= (TPE*)hdr->frame;\
-       for(i =task->start; i<task->stop; i++, val++){\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
+       for(i =0; i<limit; i++, val++){\
                delta = *val - frame;\
                MOSfind(j,delta,0,hdr->framesize);\
                if( j == hdr->framesize || dict[j] != delta )\
                        break;\
        }\
-       i -= task->start;\
-       if ( i > MOSlimit() ) i = MOSlimit();\
        if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned( 
MosaicBlkSize + i,TPE);\
 }
 
@@ -118,7 +117,8 @@ MOSskip_frame(Client cntxt, MOStask task
 #define makeFrame(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start, frame = *val, delta;\
        TPE *dict = (TPE*)hdr->frame,v;\
-       for(i =task->start; i< task->stop; i++, val++){\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
+       for(i =0; i< limit; i++, val++){\
                delta = *val - frame;\
                for(j= 0; j< hdr->framesize; j++)\
                        if( dict[j] == delta) break;\
@@ -177,8 +177,9 @@ MOScreateframe(Client cntxt, MOStask tas
        case TYPE_int:
                {       int *val = ((int*)task->src) + task->start, frame = 
*val, delta;
                        int *dict = (int*)hdr->frame,v;
+                       BUN limit = task->stop - task->start > MOSlimit()? 
MOSlimit(): task->stop - task->start;
 
-                       for(i =task->start; i< task->stop; i++, val++){
+                       for(i =0; i< limit; i++, val++){
                                delta = *val - frame;
                                for(j= 0; j< hdr->framesize; j++)
                                        if( dict[j] == delta) break;
@@ -243,13 +244,13 @@ MOSestimate_frame(Client cntxt, MOStask 
        case TYPE_int:
                {       int *val = ((int*)task->src) + task->start, frame = 
*val, delta;
                        int *dict = (int*)hdr->frame;
-                       for(i =task->start; i<task->stop; i++, val++){
+                       BUN limit = task->stop - task->start > MOSlimit()? 
MOSlimit(): task->stop - task->start;
+                       for(i =0; i<limit; i++, val++){
                                delta= *val - frame;
                                MOSfind(j,delta,0,hdr->framesize);
                                if( j == hdr->framesize || dict[j] != delta)
                                        break;
                        }
-                       i -= task->start;
                        if ( i > MOSlimit() ) i = MOSlimit();
                        if(i) factor = (flt) ((int)i * sizeof(int)) / 
wordaligned( MosaicBlkSize + i,lng);
                }
@@ -276,25 +277,24 @@ MOSestimate_frame(Client cntxt, MOStask 
 #define FRAMEcompress(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start, frame = *val, delta;\
        TPE *dict = (TPE*)hdr->frame;\
-       BUN limit = task->stop - task->start > MOSlimit()? task->start + 
MOSlimit(): task->stop;\
+       BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
        task->dst = ((char*) task->blk)+ MosaicBlkSize;\
     *(TPE*) task->dst = frame;\
        base = (unsigned long*) (((char*) task->blk) +  2 * MosaicBlkSize);\
        base[0]=0;\
-       for(i =task->start; i<limit; i++, val++){\
+       for(i =0; i<limit; i++, val++){\
                delta = *val - frame;\
                MOSfind(j,delta,0,hdr->framesize);\
                if(j == hdr->framesize || dict[j] != delta) \
                        break;\
                else {\
                        MOSincCnt(blk,1);\
-                       framecompress(base,(i- task->start),hdr->framebits,j);\
+                       framecompress(base,i,hdr->framebits,j);\
                }\
        }\
        assert(i);\
 }
 
-
 void
 MOScompress_frame(Client cntxt, MOStask task)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to