Changeset: 2c7f48a59d68 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2c7f48a59d68
Modified Files:
monetdb5/modules/mal/Tests/All
monetdb5/modules/mal/Tests/mosaic_literal.mal
monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
monetdb5/modules/mal/mosaic.h
monetdb5/modules/mal/mosaic_delta.c
monetdb5/modules/mal/mosaic_dictionary.c
monetdb5/modules/mal/mosaic_frame.c
monetdb5/modules/mal/mosaic_hdr.c
monetdb5/modules/mal/mosaic_linear.c
monetdb5/modules/mal/mosaic_prefix.c
Branch: mosaic
Log Message:
Prepare for block trimming
The compression schemes run free until the end.
This should be capt to avoid excessive time use.
diffs (truncated from 594 to 300 lines):
diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -68,7 +68,7 @@ manifold
manifoldstr
mosaic_literal
-mosaic_literal_inplace
+#mosaic_literal_inplace
mosaic_runlength
mosaic_mix
mosaic_dictionary
diff --git a/monetdb5/modules/mal/Tests/mosaic_literal.mal
b/monetdb5/modules/mal/Tests/mosaic_literal.mal
--- a/monetdb5/modules/mal/Tests/mosaic_literal.mal
+++ b/monetdb5/modules/mal/Tests/mosaic_literal.mal
@@ -10,3 +10,14 @@ x:= mosaic.compress(b,"literal");
#mosaic.dump(x);
z:= mosaic.decompress(x);
io.print(z);
+
+
+s:= bat.new(:oid,:str);
+bat.append(s,"the");
+bat.append(s,"big");
+bat.append(s,"brown");
+bat.append(s,"fox");
+
+y:= mosaic.compress(s,"literal");
+zs:= mosaic.decompress(y);
+io.print(zs);
diff --git a/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
b/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
--- a/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
+++ b/monetdb5/modules/mal/Tests/mosaic_literal_subselect.mal
@@ -97,3 +97,15 @@ io.print(s);
xs:= mosaic.subselect(x,c,nil:int,nil:int,false,false,true);
io.print(xs);
+s:= bat.new(:oid,:str);
+bat.append(s,"the");
+bat.append(s,"big");
+bat.append(s,"brown");
+bat.append(s,"fox");
+
+ys:= mosaic.compress(s,"literal");
+
+t:= algebra.subselect(s,"big","fox",true,true,false);
+io.print(t);
+t:= mosaic.subselect(ys,"big","fox",true,true,false);
+io.print(t);
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -88,11 +88,6 @@ typedef int *MosaicBlk;
#define MOSgetCnt(Blk) (BUN)(*(Blk) & ~(0377<<MOSshift))
#define MOSincCnt(Blk,I) *(Blk)= *(Blk)+I
-/* limit the number of elements to consider in a block
- * It should always be smaller then: ~(0377<<MOSshift)
-*/
-#define MOSlimit() (int) ~(0377<<MOSshift)
-
/* Memory word alignement is type and platform dependent.
* We use an encoding that fits the column type requirements
*/
@@ -206,5 +201,6 @@ mosaic_export str MOSjoin(Client cntxt,
mosaic_export str MOSdump(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
mosaic_export str MOSoptimize(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
mosaic_export void MOSblk(MosaicBlk blk);
+mosaic_export BUN MOSlimit(void);
#endif /* _MOSLIST_H */
diff --git a/monetdb5/modules/mal/mosaic_delta.c
b/monetdb5/modules/mal/mosaic_delta.c
--- a/monetdb5/modules/mal/mosaic_delta.c
+++ b/monetdb5/modules/mal/mosaic_delta.c
@@ -82,13 +82,14 @@ MOSskip_delta(Client cntxt, MOStask task
// append a series of values into the non-compressed block
#define Estimate_delta(TYPE, EXPR)\
{ TYPE *v = ((TYPE*)task->src) + task->start, val= *v, delta = 0;\
- for(v++,i =1; i<task->stop - task->start; i++,v++){\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop-task->start;\
+ for(v++,i =1; i<limit; i++,v++){\
delta = *v -val;\
if ( EXPR)\
break;\
val = *v;\
}\
- factor = ((flt) i * sizeof(TYPE))/ wordaligned(MosaicBlkSize +
sizeof(TYPE) + i-1,TYPE);\
+ if(i) factor = ((flt) i * sizeof(TYPE))/ wordaligned(MosaicBlkSize +
sizeof(TYPE) + i-1,TYPE);\
}
// estimate the compression level
@@ -118,13 +119,14 @@ MOSestimate_delta(Client cntxt, MOStask
break;
case TYPE_int:
{ int *v = ((int*)task->src) + task->start, val= *v,
delta=0;
- for(v++,i =1; i<task->stop - task->start; i++,v++){
+ BUN limit = task->stop - task->start > MOSlimit()?
MOSlimit(): task->stop-task->start;
+ for(v++,i =1; i<limit; i++,v++){
delta = *v -val;
if ( delta < -127 || delta >127)
break;
val = *v;
}
- factor = ((flt) i * sizeof(int))/
wordaligned(MosaicBlkSize + sizeof(int) + i-1,int);
+ if(i) factor = ((flt) i * sizeof(int))/
wordaligned(MosaicBlkSize + sizeof(int) + i-1,int);
}
break;
//case TYPE_flt: case TYPE_dbl: to be looked into.
diff --git a/monetdb5/modules/mal/mosaic_dictionary.c
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -106,13 +106,12 @@ MOSskip_dictionary(Client cntxt, MOStask
#define estimateDict(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start;\
TPE *dict= (TPE*)hdr->dict;\
- for(i =task->start; i<task->stop; i++, val++){\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
+ for(i =0; i<limit; i++, val++){\
MOSfind(j,*val,0,hdr->dictsize);\
if( j == hdr->dictsize || dict[j] != *val )\
break;\
}\
- i -= task->start;\
- if ( i > MOSlimit() ) i = MOSlimit();\
if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned(
MosaicBlkSize + i,TPE);\
}
@@ -121,7 +120,8 @@ MOSskip_dictionary(Client cntxt, MOStask
#define makeDict(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start;\
TPE *dict = (TPE*)hdr->dict,v;\
- for(i =task->start; i< task->stop; i++, val++){\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
+ for(i = 0; i< limit; i++, val++){\
for(j= 0; j< hdr->dictsize; j++)\
if( dict[j] == *val) break;\
if ( j == hdr->dictsize){\
@@ -179,8 +179,9 @@ MOScreatedictionary(Client cntxt, MOStas
case TYPE_int:
{ int *val = ((int*)task->src) + task->start;
int *dict = (int*)hdr->dict,v;
+ BUN limit = task->stop - task->start > MOSlimit()?
MOSlimit(): task->stop - task->start;
- for(i =task->start; i< task->stop; i++, val++){
+ for(i =0; i< limit; i++, val++){
for(j= 0; j< hdr->dictsize; j++)
if( dict[j] == *val) break;
if ( j == hdr->dictsize){
@@ -275,17 +276,17 @@ MOSestimate_dictionary(Client cntxt, MOS
#define DICTcompress(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start;\
TPE *dict = (TPE*)hdr->dict;\
- BUN limit = task->stop - task->start > MOSlimit()? task->start +
MOSlimit(): task->stop;\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
task->dst = ((char*) task->blk)+ MosaicBlkSize;\
base = (unsigned long*) task->dst; \
base[0]=0;\
- for(i =task->start; i<limit; i++, val++){\
+ for(i =0; i<limit; i++, val++){\
MOSfind(j,*val,0,hdr->dictsize);\
if(j == hdr->dictsize || dict[j] != *val) \
break;\
else {\
MOSincCnt(blk,1);\
- dictcompress(base,(i- task->start),hdr->bits,j);\
+ dictcompress(base,i,hdr->bits,j);\
}\
}\
assert(i);\
@@ -308,7 +309,7 @@ MOScompress_dictionary(Client cntxt, MOS
switch(ATOMstorage(task->type)){
//case TYPE_bte: CASE_bit: no compression achievable
case TYPE_sht: DICTcompress(sht); break;
- case TYPE_lng: DICTcompress(lng); break;
+ case TYPE_int: DICTcompress(int); break;
case TYPE_oid: DICTcompress(oid); break;
case TYPE_wrd: DICTcompress(wrd); break;
case TYPE_flt: DICTcompress(flt); break;
@@ -316,28 +317,28 @@ MOScompress_dictionary(Client cntxt, MOS
#ifdef HAVE_HGE
case TYPE_hge: DICTcompress(hge); break;
#endif
- case TYPE_int:
- { int *val = ((int*)task->src) + task->start;
- int *dict = (int*)hdr->dict;
- BUN limit = task->elm > MOSlimit()? MOSlimit():
task->elm;
+ case TYPE_lng:
+ { lng *val = ((lng*)task->src) + task->start;
+ lng *dict = (lng*)hdr->dict;
+ BUN limit = task->stop - task->start > MOSlimit()?
MOSlimit(): task->stop - task->start;
task->dst = ((char*) task->blk)+ MosaicBlkSize;
base = (unsigned long*) task->dst; // start of bit
vector
base[0]=0;
- for(i =task->start; i<limit; i++, val++){
+ for(i =0; i<limit; i++, val++){
MOSfind(j,*val,0,hdr->dictsize);
//mnstr_printf(cntxt->fdout,"compress
["BUNFMT"] val %d index %d bits %d\n",i, *val,j,hdr->bits);
if( j == hdr->dictsize || dict[j] != *val )
break;
else {
MOSincCnt(blk,1);
- cid = ((i- task->start) * hdr->bits)/64;
- lshift= 63 -(((i- task->start) *
hdr->bits) % 64) ;
+ cid = i * hdr->bits/64;
+ lshift= 63 -((i * hdr->bits) % 64) ;
if ( lshift >= hdr->bits){
base[cid]= base[cid] |
(((unsigned long)j) << (lshift-hdr->bits));
//mnstr_printf(cntxt->fdout,"[%d] shift %d rbits %d \n",cid, lshift, hdr->bits);
}else{
- rshift= 63 - (((i-
task->start)+1) * hdr->bits) % 64;
+ rshift= 63 - ((i+1) *
hdr->bits) % 64;
base[cid]= base[cid] |
(((unsigned long)j) >> (hdr->bits-lshift));
base[cid+1]= 0 | (((unsigned
long)j) << rshift);
//mnstr_printf(cntxt->fdout,"[%d] shift %d %d val %o %o\n", cid, lshift, rshift,
diff --git a/monetdb5/modules/mal/mosaic_frame.c
b/monetdb5/modules/mal/mosaic_frame.c
--- a/monetdb5/modules/mal/mosaic_frame.c
+++ b/monetdb5/modules/mal/mosaic_frame.c
@@ -102,14 +102,13 @@ MOSskip_frame(Client cntxt, MOStask task
#define estimateFrame(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start, frame = *val, delta;\
TPE *dict= (TPE*)hdr->frame;\
- for(i =task->start; i<task->stop; i++, val++){\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
+ for(i =0; i<limit; i++, val++){\
delta = *val - frame;\
MOSfind(j,delta,0,hdr->framesize);\
if( j == hdr->framesize || dict[j] != delta )\
break;\
}\
- i -= task->start;\
- if ( i > MOSlimit() ) i = MOSlimit();\
if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned(
MosaicBlkSize + i,TPE);\
}
@@ -118,7 +117,8 @@ MOSskip_frame(Client cntxt, MOStask task
#define makeFrame(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start, frame = *val, delta;\
TPE *dict = (TPE*)hdr->frame,v;\
- for(i =task->start; i< task->stop; i++, val++){\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
+ for(i =0; i< limit; i++, val++){\
delta = *val - frame;\
for(j= 0; j< hdr->framesize; j++)\
if( dict[j] == delta) break;\
@@ -177,8 +177,9 @@ MOScreateframe(Client cntxt, MOStask tas
case TYPE_int:
{ int *val = ((int*)task->src) + task->start, frame =
*val, delta;
int *dict = (int*)hdr->frame,v;
+ BUN limit = task->stop - task->start > MOSlimit()?
MOSlimit(): task->stop - task->start;
- for(i =task->start; i< task->stop; i++, val++){
+ for(i =0; i< limit; i++, val++){
delta = *val - frame;
for(j= 0; j< hdr->framesize; j++)
if( dict[j] == delta) break;
@@ -243,13 +244,13 @@ MOSestimate_frame(Client cntxt, MOStask
case TYPE_int:
{ int *val = ((int*)task->src) + task->start, frame =
*val, delta;
int *dict = (int*)hdr->frame;
- for(i =task->start; i<task->stop; i++, val++){
+ BUN limit = task->stop - task->start > MOSlimit()?
MOSlimit(): task->stop - task->start;
+ for(i =0; i<limit; i++, val++){
delta= *val - frame;
MOSfind(j,delta,0,hdr->framesize);
if( j == hdr->framesize || dict[j] != delta)
break;
}
- i -= task->start;
if ( i > MOSlimit() ) i = MOSlimit();
if(i) factor = (flt) ((int)i * sizeof(int)) /
wordaligned( MosaicBlkSize + i,lng);
}
@@ -276,25 +277,24 @@ MOSestimate_frame(Client cntxt, MOStask
#define FRAMEcompress(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start, frame = *val, delta;\
TPE *dict = (TPE*)hdr->frame;\
- BUN limit = task->stop - task->start > MOSlimit()? task->start +
MOSlimit(): task->stop;\
+ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
task->dst = ((char*) task->blk)+ MosaicBlkSize;\
*(TPE*) task->dst = frame;\
base = (unsigned long*) (((char*) task->blk) + 2 * MosaicBlkSize);\
base[0]=0;\
- for(i =task->start; i<limit; i++, val++){\
+ for(i =0; i<limit; i++, val++){\
delta = *val - frame;\
MOSfind(j,delta,0,hdr->framesize);\
if(j == hdr->framesize || dict[j] != delta) \
break;\
else {\
MOSincCnt(blk,1);\
- framecompress(base,(i- task->start),hdr->framebits,j);\
+ framecompress(base,i,hdr->framebits,j);\
}\
}\
assert(i);\
}
-
void
MOScompress_frame(Client cntxt, MOStask task)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list