Changeset: c6f8c2213bad for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c6f8c2213bad
Modified Files:
monetdb5/modules/mal/mosaic.c
monetdb5/modules/mal/mosaic.h
monetdb5/modules/mal/mosaic_dictionary.c
monetdb5/modules/mal/mosaic_dictionary.h
monetdb5/modules/mal/mosaic_hdr.c
Branch: mosaic
Log Message:
Dump more header information in the layout structure
diffs (truncated from 376 to 300 lines):
diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -79,6 +79,8 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte
MOStask task=0;
int i,ret,bid;
BAT *bn= NULL;
+ char buf[BUFSIZ];
+ lng zero=0;
task= (MOStask) GDKzalloc(sizeof(*task));
if( task == NULL)
@@ -100,9 +102,35 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte
MOSinit(task,b);
MOSinitializeScan(cntxt,task,0,task->hdr->top);
+ // safe the general properties
+ BUNappend(btech, "ratio", FALSE);
+ BUNappend(bcount, &zero, FALSE);
+ BUNappend(binput, &zero, FALSE);
+ BUNappend(boutput, &zero , FALSE);
+ snprintf(buf,BUFSIZ,"%g", task->hdr->ratio);
+ BUNappend(bproperties, buf, FALSE);
+ for(i=0; i < MOSAIC_METHODS-1; i++){
+ lng zero = 0;
+ snprintf(buf,BUFSIZ,"%s_blks", MOSfiltername[i]);
+ BUNappend(btech, buf, FALSE);
+ BUNappend(bcount, &zero, FALSE);
+ BUNappend(binput, &task->hdr->blks[i], FALSE);
+ BUNappend(boutput, &zero , FALSE);
+ BUNappend(bproperties, "", FALSE);
+
+ snprintf(buf,BUFSIZ,"%s_elms", MOSfiltername[i]);
+ BUNappend(btech, buf, FALSE);
+ BUNappend(bcount, &zero, FALSE);
+ BUNappend(binput, &task->hdr->elms[i], FALSE);
+ BUNappend(boutput, &zero , FALSE);
+ BUNappend(bproperties, "", FALSE);
+
+ }
if( task->hdr->blks[MOSAIC_FRAME])
MOSlayout_frame_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties);
+ if( task->hdr->blks[MOSAIC_DICT])
+
MOSlayout_dictionary_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties);
while(task->start< task->stop){
switch(MOSgetTag(task->blk)){
@@ -254,7 +282,7 @@ MOScompressInternal(Client cntxt, bat *r
BUN cutoff =0;
str msg = MAL_SUCCEED;
int cand;
- float factor= 1.0, fac= 1.0;
+ float ratio= 1.0, fac= 1.0;
*ret = 0;
@@ -367,7 +395,7 @@ MOScompressInternal(Client cntxt, bat *r
// default is to extend the non-compressed block
cand = MOSAIC_NONE;
fac = 1.0;
- factor = 1.0;
+ ratio = 1.0;
// cutoff the filters, especially dictionary tests are expensive
if( cutoff && cutoff < task->start){
@@ -381,46 +409,46 @@ MOScompressInternal(Client cntxt, bat *r
// select candidate amongst those
if ( task->filter[MOSAIC_RLE]){
fac = MOSestimate_runlength(cntxt,task);
- if (fac > factor){
+ if (fac > ratio){
cand = MOSAIC_RLE;
- factor = fac;
+ ratio = fac;
}
}
if ( task->filter[MOSAIC_DICT]){
fac = MOSestimate_dictionary(cntxt,task);
- if (fac > factor){
+ if (fac > ratio){
cand = MOSAIC_DICT;
- factor = fac;
+ ratio = fac;
}
}
if ( task->filter[MOSAIC_FRAME]){
fac = MOSestimate_frame(cntxt,task);
- if (fac > factor){
+ if (fac > ratio){
cand = MOSAIC_FRAME;
- factor = fac;
+ ratio = fac;
}
}
if ( task->filter[MOSAIC_DELTA]){
fac = MOSestimate_delta(cntxt,task);
- if ( fac > factor ){
+ if ( fac > ratio ){
cand = MOSAIC_DELTA;
- factor = fac;
+ ratio = fac;
}
}
if ( task->filter[MOSAIC_PREFIX]){
fac = MOSestimate_prefix(cntxt,task);
- if ( fac > factor ){
+ if ( fac > ratio ){
cand = MOSAIC_PREFIX;
- factor = fac;
+ ratio = fac;
}
if ( fac < 0.0)
task->filter[MOSAIC_PREFIX] = 0;
}
if ( task->filter[MOSAIC_LINEAR]){
fac = MOSestimate_linear(cntxt,task);
- if ( fac >factor){
+ if ( fac >ratio){
cand = MOSAIC_LINEAR;
- factor = fac;
+ ratio = fac;
}
}
@@ -530,7 +558,7 @@ MOScompressInternal(Client cntxt, bat *r
BBPkeepref(*ret = bsrc->batCacheid);
BBPunfix(bcompress->batCacheid);
}
- task->factor = task->hdr->factor = (task->xsize ==0 ?
0:(flt)task->size/task->xsize);
+ task->ratio = task->hdr->ratio = (task->xsize ==0 ?
0:(flt)task->size/task->xsize);
#ifdef _DEBUG_MOSAIC_
MOSdumpInternal(cntxt,bcompress);
#endif
@@ -1451,7 +1479,7 @@ MOSanalyseInternal(Client cntxt, int thr
#define CANDIDATES 256 /* all three combinations */
void
-MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bfactor,
str compressions)
+MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio,
str compressions)
{
int i,j,k,cases, bit=1, ret, bid= b->batCacheid;
BUN cnt= BATcount(b);
@@ -1459,7 +1487,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B
MOStask task;
int pattern[CANDIDATES];
char technique[CANDIDATES]={0}, *t = technique;
- dbl xf[CANDIDATES], factor;
+ dbl xf[CANDIDATES], ratio;
cases = makepatterns(pattern,CANDIDATES, compressions);
task = (MOStask) GDKzalloc(sizeof(*task));
@@ -1491,7 +1519,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B
if( j<i)
continue;
- xf[i]= task->hdr? task->factor: 0;
+ xf[i]= task->hdr? task->ratio: 0;
if( xf[i] == 0)
continue;
BUNappend(boutput,&task->xsize,FALSE);
@@ -1504,8 +1532,8 @@ MOSanalyseReport(Client cntxt, BAT *b, B
}
BUNappend(btech,technique,FALSE);
if( task->xsize)
- factor = (input + 0.0)/task->xsize;
- BUNappend(bfactor,&factor,FALSE);
+ ratio = (input + 0.0)/task->xsize;
+ BUNappend(bratio,&ratio,FALSE);
// get rid of temporary compressed BAT
if( ret != bid)
@@ -1634,7 +1662,7 @@ MOSanalyse(Client cntxt, MalBlkPtr mb, M
for( k = 0; k< MOSAIC_METHODS; k++)
task->filter[k]= 1;
x+= MOSanalyseInternal(cntxt, threshold, task, bid);
- xf[j]= task->hdr? task->factor: 0;
+ xf[j]= task->hdr? task->ratio: 0;
if(xf[mx] < xf[j]) mx =j;
}
if(x >1){
@@ -1655,7 +1683,7 @@ MOSanalyse(Client cntxt, MalBlkPtr mb, M
for( k = 0; k< MOSAIC_METHODS; k++)
task->filter[k]= 1;
x+= MOSanalyseInternal(cntxt, threshold, task,
i);
- xf[j]= task->hdr? task->factor: 0;
+ xf[j]= task->hdr? task->ratio: 0;
}
if( x >1){
mnstr_printf(cntxt->fdout,"#all %d ",i);
@@ -1722,13 +1750,13 @@ MOSoptimize(Client cntxt, MalBlkPtr mb,
bit *=2;
}
for( j=0; j < i; j++)
- if (pattern[j] == k && task->factor == xf[j])
+ if (pattern[j] == k && task->ratio == xf[j])
break;
if( j<i)
continue;
- xf[i] = task->factor;
+ xf[i] = task->ratio;
if( ret != bid)
BBPdecref(ret, TRUE);
}
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -73,18 +73,15 @@ typedef struct MOSAICHEADER{
flt sumflt;
dbl sumdbl;
} checksum, checksum2;
- // collect compression statistics for the particular task
- lng blks[MOSAIC_METHODS];
- lng elms[MOSAIC_METHODS];
- flt factor;
int top;
+ // skip index for OID access
oid oidbase[MOSAICINDEX]; // to speedup localization
BUN offset[MOSAICINDEX];
bte mask, bits, framebits; // global compression type properties
+ // both dictionary and framebased compression require a global
dictionary of frequent values
+ // Their size is purposely topped
int dictsize; // used by dictionary compression
int framesize; // used by frame compression
- // both dictionary and framebased compression require a global
dictionary of frequent values
- // Their size is purposely topped
#ifdef HAVE_HGE
hge dict[256];
hge frame[256];
@@ -92,6 +89,12 @@ typedef struct MOSAICHEADER{
lng dict[256];
lng frame[256];
#endif
+ // collect compression statistics for the particular task
+ flt ratio; //compresion ratio
+ lng blks[MOSAIC_METHODS];
+ lng elms[MOSAIC_METHODS];
+ lng dictfreq[256];// keep track on their use
+ lng framefreq[256];
} * MosaicHdr;
// bit stuffed header block, currently 4 bytes wide and chunks should be
4-byte aligned
@@ -125,7 +128,7 @@ typedef struct MOSTASK{
MosaicBlk blk; // current block header in scan
oid start; // oid of first element in current blk
oid stop; // last oid of range to be scanned
- flt factor;
+ flt ratio; // compression ratio encountered
char *dst; // write pointer into current compressed blocks
diff --git a/monetdb5/modules/mal/mosaic_dictionary.c
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -46,46 +46,66 @@ MOSadvance_dictionary(Client cntxt, MOSt
}
/* Beware, the dump routines use the compressed part of the task */
+static void
+MOSdump_dictionaryInternal(char *buf, size_t len, MOStask task, int i)
+{
+ void *val = (void*)task->hdr->dict;
+
+ switch(ATOMstorage(task->type)){
+ case TYPE_sht:
+ snprintf(buf,len,"%hd", ((sht*) val)[i]); break;
+ case TYPE_int:
+ snprintf(buf,len,"%d", ((int*) val)[i]); break;
+ case TYPE_oid:
+ snprintf(buf,len,OIDFMT, ((oid*) val)[i]); break;
+ case TYPE_lng:
+ snprintf(buf,len,LLFMT, ((lng*) val)[i]); break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ snprintf(buf,len,"%.40g", (dbl) ((hge*) val)[i]); break;
+#endif
+ case TYPE_wrd:
+ snprintf(buf,len,SZFMT, ((wrd*) val)[i]); break;
+ case TYPE_flt:
+ snprintf(buf,len,"%f", ((flt*) val)[i]); break;
+ case TYPE_dbl:
+ snprintf(buf,len,"%g", ((dbl*) val)[i]); break;
+ }
+}
+
void
MOSdump_dictionary(Client cntxt, MOStask task)
{
- MosaicHdr hdr= task->hdr;
int i;
- void *val = (void*)hdr->dict;
+ char buf[BUFSIZ];
- mnstr_printf(cntxt->fdout,"# bits %d",hdr->bits);
- switch(ATOMstorage(task->type)){
- case TYPE_sht:
- for(i=0; i< hdr->dictsize; i++)
- mnstr_printf(cntxt->fdout,"sht [%d] %hd ",i, ((sht*) val)[i]);
break;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list