Changeset: 03cd42acce72 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03cd42acce72
Modified Files:
monetdb5/modules/mosaic/mosaic.c
monetdb5/modules/mosaic/mosaic.h
monetdb5/modules/mosaic/mosaic_dictionary.c
monetdb5/modules/mosaic/mosaic_frame.c
Branch: mosaic
Log Message:
Clean up the code
Easier to use global dictionary and delta frame
Use the bitvector code in gdk
diffs (truncated from 1209 to 300 lines):
diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -653,6 +653,7 @@ MOSdecompressInternal(Client cntxt, bat
// continue with all work
bsrc->batDirty = 1;
+ BATsettrivprop(bsrc);
MCexitMaintenance(cntxt);
BBPkeepref( *ret = bsrc->batCacheid);
@@ -684,9 +685,9 @@ MOSdecompressInternal(Client cntxt, bat
}
if(error)
mnstr_printf(cntxt->fdout,"#incompatible compression\n");
- GDKfree(task);
task->timer = GDKusec() - task->timer;
+ GDKfree(task);
return MAL_SUCCEED;
}
@@ -874,9 +875,7 @@ MOSsubselect(Client cntxt, MalBlkPtr mb,
BATsetcount(bn,cnt);
bn->tnil = 0;
bn->tnonil = 1;
- bn->tsorted = 1;
- bn->trevsorted = BATcount(bn) <= 1;
- bn->tkey = 1;
+ bn->tsorted = bn->trevsorted = cnt <=1;
*getArgReference_bat(stk, pci, 0) = bn->batCacheid;
GDKfree(task);
BBPkeepref(bn->batCacheid);
@@ -1005,9 +1004,7 @@ str MOSthetasubselect(Client cntxt, MalB
BATsetcount(bn,cnt);
bn->tnil = 0;
bn->tnonil = 1;
- bn->tsorted = 1;
- bn->trevsorted = BATcount(bn) <= 1;
- bn->tkey = 1;
+ bn->tsorted = bn->trevsorted = cnt <= 1;
BBPkeepref(*getArgReference_bat(stk,pci,0)= bn->batCacheid);
}
GDKfree(task);
@@ -1135,9 +1132,7 @@ str MOSprojection(Client cntxt, MalBlkPt
BATsetcount(bn,task->cnt);
bn->tnil = 0;
bn->tnonil = 1;
- bn->tsorted = 1;
- bn->trevsorted = BATcount(bn) <= 1;
- bn->tkey = 1;
+ bn->tsorted = bn->trevsorted = cnt <= 1;
BBPkeepref(*ret = bn->batCacheid);
GDKfree(task);
return msg;
@@ -1262,11 +1257,8 @@ MOSsubjoin(Client cntxt, MalBlkPtr mb, M
assert(0);
}
- bln->tsorted = cnt <= 1;
- bln->trevsorted = cnt <= 1;
-
- brn->tsorted = cnt<= 1;
- brn->trevsorted = cnt <= 1;
+ BATsettrivprop(bln);
+ BATsettrivprop(brn);
if( swapped){
BBPkeepref(*ret= brn->batCacheid);
BBPkeepref(*ret2= bln->batCacheid);
diff --git a/monetdb5/modules/mosaic/mosaic.h b/monetdb5/modules/mosaic/mosaic.h
--- a/monetdb5/modules/mosaic/mosaic.h
+++ b/monetdb5/modules/mosaic/mosaic.h
@@ -81,18 +81,33 @@ typedef struct MOSAICHEADER{
bte mask, bits, framebits; // global compression type properties
int dictsize; // used by dictionary compression, it is a
small table
int framesize; // used by frame compression, it is a small
table
+ union{
+ sht valsht[256];
+ int valint[256];
+ lng vallng[256];
+ oid valoid[256];
+ flt valflt[256];
+ dbl valdbl[256];
#ifdef HAVE_HGE
- hge dict[256];
- hge frame[256];
-#else
- lng dict[256];
- lng frame[256];
+ hge valhge[256];
#endif
+ }dict;
+ lng dictfreq[256];// keep track on their use
+ union{
+ sht valsht[256];
+ int valint[256];
+ lng vallng[256];
+ oid valoid[256];
+ flt valflt[256];
+ dbl valdbl[256];
+#ifdef HAVE_HGE
+ hge valhge[256];
+#endif
+ }frame;
// collect compression statistics for the particular task
flt ratio; //compresion ratio
lng blks[MOSAIC_METHODS];
lng elms[MOSAIC_METHODS];
- lng dictfreq[256];// keep track on their use
lng framefreq[256];
} * MosaicHdr;
@@ -110,7 +125,7 @@ typedef struct MOSAICBLK{
#define MOSincCnt(Blk,I) (assert((Blk)->cnt +I < MOSAICMAXCNT), (Blk)->cnt+=
(unsigned int)(I))
/* The start of the encoding withing a Mosaic block */
-#define MOScodevector(Task) (((char*) Task->blk)+ MosaicBlkSize)
+#define MOScodevector(Task) (((char*) (Task)->blk)+ MosaicBlkSize)
/* Memory word alignement is type and platform dependent.
* We use an encoding that fits the column type requirements
diff --git a/monetdb5/modules/mosaic/mosaic_dictionary.c
b/monetdb5/modules/mosaic/mosaic_dictionary.c
--- a/monetdb5/modules/mosaic/mosaic_dictionary.c
+++ b/monetdb5/modules/mosaic/mosaic_dictionary.c
@@ -35,7 +35,7 @@
void
MOSadvance_dictionary(Client cntxt, MOStask task)
{
- int *dst = (int*) (((char*) task->blk) + MosaicBlkSize);
+ int *dst = (int*) MOScodevector(task);
BUN cnt = MOSgetCnt(task->blk);
long bytes;
(void) cntxt;
@@ -51,40 +51,58 @@ MOSadvance_dictionary(Client cntxt, MOSt
static void
MOSdump_dictionaryInternal(char *buf, size_t len, MOStask task, int i)
{
- void *val = (void*)task->hdr->dict;
switch(ATOMbasetype(task->type)){
case TYPE_sht:
- snprintf(buf,len,"%hd", ((sht*) val)[i]); break;
+ snprintf(buf,len,"%hd", task->hdr->dict.valsht[i]); break;
case TYPE_int:
- snprintf(buf,len,"%d", ((int*) val)[i]); break;
+ snprintf(buf,len,"%d", task->hdr->dict.valint[i]); break;
case TYPE_oid:
- snprintf(buf,len,OIDFMT, ((oid*) val)[i]); break;
+ snprintf(buf,len,OIDFMT, task->hdr->dict.valoid[i]); break;
case TYPE_lng:
- snprintf(buf,len,LLFMT, ((lng*) val)[i]); break;
+ snprintf(buf,len,LLFMT, task->hdr->dict.vallng[i]); break;
#ifdef HAVE_HGE
case TYPE_hge:
- snprintf(buf,len,"%.40g", (dbl) ((hge*) val)[i]); break;
+ snprintf(buf,len,"%.40g", (dbl) task->hdr->dict.valhge[i]);
break;
#endif
case TYPE_flt:
- snprintf(buf,len,"%f", ((flt*) val)[i]); break;
+ snprintf(buf,len,"%f", task->hdr->dict.valflt[i]); break;
case TYPE_dbl:
- snprintf(buf,len,"%g", ((dbl*) val)[i]); break;
+ snprintf(buf,len,"%g", task->hdr->dict.valdbl[i]); break;
}
}
void
MOSdump_dictionary(Client cntxt, MOStask task)
{
- int i;
+ int i,len= BUFSIZ;
char buf[BUFSIZ];
- mnstr_printf(cntxt->fdout,"#bits %d",task->hdr->bits);
+ mnstr_printf(cntxt->fdout,"#dictionary bits %d dictsize
%d",task->hdr->bits, task->hdr->dictsize);
for(i=0; i< task->hdr->dictsize; i++){
MOSdump_dictionaryInternal(buf, BUFSIZ, task,i);
mnstr_printf(cntxt->fdout,"[%d] %s ",i,buf);
}
mnstr_printf(cntxt->fdout,"\n");
+ switch(ATOMbasetype(task->type)){
+ case TYPE_sht:
+ snprintf(buf,len,"%hd %hd",
task->hdr->checksum.sumsht,task->hdr->checksum2.sumsht); break;
+ case TYPE_int:
+ snprintf(buf,len,"%d %d",
task->hdr->checksum.sumint,task->hdr->checksum2.sumint); break;
+ case TYPE_oid:
+ snprintf(buf,len,OIDFMT " " OIDFMT,
task->hdr->checksum.sumoid,task->hdr->checksum2.sumoid); break;
+ case TYPE_lng:
+ snprintf(buf,len,LLFMT " " LLFMT,
task->hdr->checksum.sumlng,task->hdr->checksum2.sumlng); break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ snprintf(buf,len,"%.40g %.40g",
(dbl)task->hdr->checksum.sumhge,(dbl)task->hdr->checksum2.sumhge); break;
+#endif
+ case TYPE_flt:
+ snprintf(buf,len,"%f %f",
task->hdr->checksum.sumflt,task->hdr->checksum2.sumflt); break;
+ case TYPE_dbl:
+ snprintf(buf,len,"%g %g",
task->hdr->checksum.sumdbl,task->hdr->checksum2.sumdbl); break;
+ }
+ mnstr_printf(cntxt->fdout,"#checksums %s\n",buf);
}
void
@@ -130,19 +148,18 @@ MOSskip_dictionary(Client cntxt, MOStask
task->blk = 0; // ENDOFLIST
}
-#define MOSfind(X,VAL,F,L)\
+#define MOSfind(Res,DICT,VAL,F,L)\
{ int m,f= F, l=L; \
while( l-f > 0 ) { \
m = f + (l-f)/2;\
- if ( VAL < dict[m] ) l=m-1; else f= m;\
- if ( VAL > dict[m] ) f=m+1; else l= m;\
+ if ( VAL < DICT[m] ) l=m-1; else f= m;\
+ if ( VAL > DICT[m] ) f=m+1; else l= m;\
}\
- X= f;\
+ Res= f;\
}
#define estimateDict(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start;\
- TPE *dict= (TPE*)hdr->dict;\
BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
if( task->range[MOSAIC_DICT] > task->start){\
i = task->range[MOSAIC_DICT] - task->start;\
@@ -155,8 +172,8 @@ MOSskip_dictionary(Client cntxt, MOStask
return factor;\
}\
for(i =0; i<limit; i++, val++){\
- MOSfind(j,*val,0,hdr->dictsize);\
- if( j == hdr->dictsize || dict[j] != *val )\
+ MOSfind(j,hdr->dict.val##TPE,*val,0,hdr->dictsize);\
+ if( j == hdr->dictsize || hdr->dict.val##TPE[j] != *val )\
break;\
}\
if( i * sizeof(TPE) <= wordaligned( MosaicBlkSize + i,TPE))\
@@ -167,12 +184,11 @@ MOSskip_dictionary(Client cntxt, MOStask
// store it in the compressed heap header directly
// filter out the most frequent ones
#define makeDict(TPE)\
-{ TPE *val = ((TPE*)task->src) + task->start;\
- TPE *dict = (TPE*)hdr->dict,v;\
+{ TPE v,*val = ((TPE*)task->src) + task->start;\
BUN limit = task->stop - task->start > MOSlimit()? MOSlimit():
task->stop - task->start;\
for(i = 0; i< limit; i++, val++){\
for(j= 0; j< hdr->dictsize; j++)\
- if( dict[j] == *val) break;\
+ if( task->hdr->dict.val##TPE[j] == *val) break;\
if ( j == hdr->dictsize){\
if ( hdr->dictsize == 256){\
int min = 0;\
@@ -182,7 +198,7 @@ MOSskip_dictionary(Client cntxt, MOStask
cnt[j]=0;\
break;\
}\
- dict[j] = *val;\
+ task->hdr->dict.val##TPE[j] = *val;\
cnt[j]++;\
hdr->dictsize++;\
} else\
@@ -190,10 +206,10 @@ MOSskip_dictionary(Client cntxt, MOStask
}\
for(k=0; k< hdr->dictsize; k++)\
for(j=k+1; j< hdr->dictsize; j++)\
- if(dict[k] >dict[j]){\
- v= dict[k];\
- dict[k] = dict[j];\
- dict[j] = v;\
+ if(task->hdr->dict.val##TPE[k]
>task->hdr->dict.val##TPE[j]){\
+ v = task->hdr->dict.val##TPE[k];\
+ task->hdr->dict.val##TPE[k] =
task->hdr->dict.val##TPE[j];\
+ task->hdr->dict.val##TPE[j] = v;\
}\
hdr->bits = 1;\
hdr->mask =1;\
@@ -256,7 +272,6 @@ MOSestimate_dictionary(Client cntxt, MOS
#endif
case TYPE_lng:
{ lng *val = ((lng*)task->src) + task->start;
- lng *dict = (lng*)hdr->dict;
// assume uniform compression statistics
if( task->range[MOSAIC_DICT] > task->start){
i = task->range[MOSAIC_DICT] - task->start;
@@ -270,8 +285,8 @@ MOSestimate_dictionary(Client cntxt, MOS
}
for(i =task->start; i<task->stop; i++, val++){
- MOSfind(j,*val,0,hdr->dictsize);
- if( j == hdr->dictsize || dict[j] != *val)
+
MOSfind(j,task->hdr->dict.vallng,*val,0,hdr->dictsize);
+ if( j == hdr->dictsize ||
task->hdr->dict.vallng[j] != *val)
break;
}
i -= task->start;
@@ -297,17 +312,17 @@ MOSestimate_dictionary(Client cntxt, MOS
#define DICTcompress(TPE)\
{ TPE *val = ((TPE*)task->src) + task->start;\
- TPE *dict = (TPE*)hdr->dict;\
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list