Changeset: 539de38da445 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=539de38da445
Added Files:
monetdb5/modules/mosaic/mosaic_calendar.c
monetdb5/modules/mosaic/mosaic_calendar.h
Modified Files:
monetdb5/modules/mosaic/Makefile.ag
monetdb5/modules/mosaic/mosaic.c
monetdb5/modules/mosaic/mosaic.h
Branch: mosaic
Log Message:
Add calendar compressor
Cut off the last 5 bits and store the rest in the dictionary
diffs (truncated from 1038 to 300 lines):
diff --git a/monetdb5/modules/mosaic/Makefile.ag
b/monetdb5/modules/mosaic/Makefile.ag
--- a/monetdb5/modules/mosaic/Makefile.ag
+++ b/monetdb5/modules/mosaic/Makefile.ag
@@ -27,6 +27,7 @@ lib_mosaic = {
mosaic_delta.c mosaic_delta.h \
mosaic_linear.c mosaic_linear.h \
mosaic_frame.c mosaic_frame.h \
+ mosaic_calendar.c mosaic_calendar.h \
mosaic_prefix.c mosaic_prefix.h
}
diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -33,8 +33,9 @@
#include "mosaic_linear.h"
#include "mosaic_frame.h"
#include "mosaic_prefix.h"
+#include "mosaic_calendar.h"
-char
*MOSfiltername[]={"literal","runlength","dictionary","delta","linear","frame","prefix","EOL"};
+char
*MOSfiltername[]={"literal","runlength","dictionary","delta","linear","frame","prefix","calendar","EOL"};
BUN MOSblocklimit = 100000;
str MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug);
@@ -113,6 +114,8 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte
MOSlayout_frame_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties);
if( task->hdr->blks[MOSAIC_DICT])
MOSlayout_dictionary_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties);
+ if( task->hdr->blks[MOSAIC_CALENDAR])
+
MOSlayout_calendar(cntxt,task,btech,bcount,binput,boutput,bproperties);
BUNappend(btech, "========", FALSE);
BUNappend(bcount, &zero, FALSE);
@@ -150,6 +153,10 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte
MOSlayout_prefix(cntxt,task,btech,bcount,binput,boutput,bproperties);
MOSadvance_prefix(cntxt,task);
break;
+ case MOSAIC_CALENDAR:
+
MOSlayout_calendar(cntxt,task,btech,bcount,binput,boutput,bproperties);
+ MOSadvance_calendar(cntxt,task);
+ break;
default:
assert(0);
}
@@ -192,6 +199,11 @@ MOSdumpInternal(Client cntxt, BAT *b){
case MOSAIC_FRAME:
MOSdump_frame(cntxt,task);
MOSadvance_frame(cntxt,task);
+ break;
+ case MOSAIC_CALENDAR:
+ MOSdump_calendar(cntxt,task);
+ MOSadvance_calendar(cntxt,task);
+ break;
}
}
}
@@ -217,47 +229,6 @@ MOSdump(Client cntxt, MalBlkPtr mb, MalS
}
/*
-static BAT*
-inheritCOL( BAT *bn, COLrec *cn, BAT *b, COLrec *c, bat p )
-{
- str nme = cn->id;
-
- assert((b->H == c && p == VIEWhparent(b)) ||
- (b->T == c && p == VIEWtparent(b)));
- assert(bn->H == cn || bn->T == cn);
- assert(cn->props == NULL);
- assert(cn->vheap == NULL);
- assert(cn->hash == NULL);
- assert(bn->S->deleted == b->S->deleted );
- assert(bn->S->first == b->S->first );
- assert(bn->S->inserted == b->S->inserted);
- assert(bn->S->count == b->S->count );
-
- HEAPfree(&cn->heap);
-
- if (p == 0)
- p = b->batCacheid;
- bn->S->capacity = MIN( bn->S->capacity, b->S->capacity );
- *cn = *c;
- BBPshare(p);
- if (cn->vheap) {
- assert(cn->vheap->parentid > 0);
- BBPshare(cn->vheap->parentid);
- }
- cn->heap.copied = 0;
- cn->props = NULL;
- cn->heap.parentid = p;
- cn->id = nme;
- if (isVIEW(b))
- cn->hash = NULL;
- else
- cn->hash = c->hash;
-
- return bn;
-}
-*/
-
-/*
* Compression is focussed on a single column.
* Multiple compression techniques are applied at the same time.
*/
@@ -306,7 +277,7 @@ MOSoptimizerCost(Client cntxt, MOStask t
task->filter[MOSAIC_PREFIX] = 0;
}
// max achievable compression factor is 8x
- if (ratio < 8 && task->filter[MOSAIC_DICT]){
+ if (task->filter[MOSAIC_DICT]){
fac = MOSestimate_dictionary(cntxt,task);
if (fac > ratio){
cand = MOSAIC_DICT;
@@ -314,7 +285,7 @@ MOSoptimizerCost(Client cntxt, MOStask t
}
}
// max achievable compression factor is 8x
- if (ratio < 8 && task->filter[MOSAIC_FRAME]){
+ if (task->filter[MOSAIC_FRAME]){
fac = MOSestimate_frame(cntxt,task);
if (fac > ratio){
cand = MOSAIC_FRAME;
@@ -322,13 +293,20 @@ MOSoptimizerCost(Client cntxt, MOStask t
}
}
// max achievable compression factor is 8x
- if (ratio < 8 && task->filter[MOSAIC_DELTA]){
+ if (task->filter[MOSAIC_DELTA]){
fac = MOSestimate_delta(cntxt,task);
if ( fac > ratio ){
cand = MOSAIC_DELTA;
ratio = fac;
}
}
+ if (task->filter[MOSAIC_CALENDAR]){
+ fac = MOSestimate_calendar(cntxt,task);
+ if (fac > ratio){
+ cand = MOSAIC_CALENDAR;
+ ratio = fac;
+ }
+ }
//mnstr_printf(cntxt->fdout,"#cand %d factor %f\n",cand,ratio);
return cand;
}
@@ -430,6 +408,8 @@ MOScompressInternal(Client cntxt, bat *b
MOScreateframeDictionary(cntxt,task);
if( task->filter[MOSAIC_DICT])
MOScreatedictionary(cntxt,task);
+ if( task->filter[MOSAIC_CALENDAR])
+ MOScreatecalendar(cntxt,task);
// always start with an EOL block
MOSsetTag(task->blk,MOSAIC_EOL);
@@ -448,6 +428,7 @@ MOScompressInternal(Client cntxt, bat *b
switch(cand){
case MOSAIC_RLE:
case MOSAIC_DICT:
+ case MOSAIC_CALENDAR:
case MOSAIC_FRAME:
case MOSAIC_DELTA:
case MOSAIC_LINEAR:
@@ -512,6 +493,12 @@ MOScompressInternal(Client cntxt, bat *b
MOSadvance_prefix(cntxt,task);
MOSnewBlk(task);
break;
+ case MOSAIC_CALENDAR:
+ MOScompress_calendar(cntxt,task);
+ MOSupdateHeader(cntxt,task);
+ MOSadvance_calendar(cntxt,task);
+ MOSnewBlk(task);
+ break;
default :
// continue to use the last block header.
MOScompress_literal(cntxt,task);
@@ -661,6 +648,10 @@ MOSdecompressInternal(Client cntxt, bat
MOSdecompress_prefix(cntxt,task);
MOSskip_prefix(cntxt,task);
break;
+ case MOSAIC_CALENDAR:
+ MOSdecompress_calendar(cntxt,task);
+ MOSskip_calendar(cntxt,task);
+ break;
default: assert(0);
}
}
@@ -884,6 +875,9 @@ MOSsubselect(Client cntxt, MalBlkPtr mb,
case MOSAIC_LINEAR:
MOSsubselect_linear(cntxt,task,low,hgh,li,hi,anti);
break;
+ case MOSAIC_CALENDAR:
+ MOSsubselect_calendar(cntxt,task,low,hgh,li,hi,anti);
+ break;
case MOSAIC_NONE:
default:
MOSsubselect_literal(cntxt,task,low,hgh,li,hi,anti);
@@ -1010,6 +1004,9 @@ str MOSthetasubselect(Client cntxt, MalB
case MOSAIC_FRAME:
MOSthetasubselect_frame(cntxt,task,low,*oper);
break;
+ case MOSAIC_CALENDAR:
+ MOSthetasubselect_calendar(cntxt,task,low,*oper);
+ break;
case MOSAIC_NONE:
default:
MOSthetasubselect_literal(cntxt,task,low,*oper);
@@ -1138,6 +1135,9 @@ str MOSprojection(Client cntxt, MalBlkPt
case MOSAIC_LINEAR:
MOSprojection_linear(cntxt, task);
break;
+ case MOSAIC_CALENDAR:
+ MOSprojection_calendar(cntxt, task);
+ break;
case MOSAIC_NONE:
MOSprojection_literal(cntxt, task);
break;
@@ -1270,6 +1270,9 @@ MOSsubjoin(Client cntxt, MalBlkPtr mb, M
case MOSAIC_LINEAR:
MOSsubjoin_linear(cntxt, task);
break;
+ case MOSAIC_CALENDAR:
+ MOSsubjoin_calendar(cntxt, task);
+ break;
case MOSAIC_NONE:
MOSsubjoin_literal(cntxt, task);
break;
@@ -1410,47 +1413,64 @@ MOSanalyseInternal(Client cntxt, int thr
*/
#define CANDIDATES 256 /* all three combinations */
+struct PAT{
+ int pattern;
+ str technique;
+ BUN xsize;
+ dbl xf;
+ lng clk1, clk2;
+}pat[CANDIDATES];
+
+static int cmpPattern(const void *p1, const void *p2){
+ struct PAT *r1, *r2;
+ r1= (struct PAT *) p1;
+ r2= (struct PAT *) p2;
+ if( r1->xf > r2->xf) return -1;
+ if( r1->xf == r2->xf && r1->technique && r2->technique) return
strlen(r1->technique) > strlen(r2->technique);
+ return 0;
+}
+
void
MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio,
BAT *bcompress, BAT *bdecompress, str compressions)
{
int i,j,k,cases, bit=1, bid= b->batCacheid;
- BUN xsize;
MOStask task;
int pattern[CANDIDATES];
- char technique[CANDIDATES]={0}, *t = technique;
- dbl xf[CANDIDATES], ratio;
- lng clk,clk1;
+ char buf[1024]={0}, *t;
task = (MOStask) GDKzalloc(sizeof(*task));
if( task == NULL)
return;
// create the list of all possible 2^6 compression patterns
cases = makepatterns(pattern,CANDIDATES, compressions);
+ memset((char*)pat,0, sizeof(pat));
for( i = 0; i < CANDIDATES; i++)
- xf[i]= -1;
+ pat[i].xf= -1;
for( i = 1; i< cases; i++){
// Ignore patterns that have a poor individual compressor
if( i > MOSAIC_METHODS-2) {
for( j= 0; j < MOSAIC_METHODS-1; j++)
- if ( (pattern[i] & pattern[j]) == pattern[j] && xf[j]
>= 0 && xf[j] < 1.0) break;
+ if ( (pattern[i] & pattern[j]) == pattern[j] &&
pat[j].xf >= 0 && pat[j].xf < 1.0) break;
if( j< MOSAIC_METHODS-1 ) continue;
}
- t= technique;
+ t= buf;
+ *t =0;
for(j=0, bit=1; j < MOSAIC_METHODS-1; j++){
task->filter[j]= (pattern[i] & bit)>0;
task->range[j]= 0;
task->factor[j]= 0.0;
bit *=2;
if( task->filter[j]){
- snprintf(t, 1024-strlen(technique),"%s ",
MOSfiltername[j]);
- t= technique + strlen(technique);
+ snprintf(t, 1024-strlen(buf),"%s ",
MOSfiltername[j]);
+ t= buf + strlen(buf);
}
}
- clk = GDKms();
+ pat[i].technique= GDKstrdup(buf);
+ pat[i].clk1 = GDKms();
MOScompressInternal(cntxt, &bid, task, 0);
- clk = GDKms()- clk;
+ pat[i].clk1 = GDKms()- pat[i].clk1;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list