Changeset: 03142818494b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03142818494b
Modified Files:
monetdb5/modules/mal/mosaic_dictionary.c
Branch: mosaic
Log Message:
Moving dictionary indexing to bit bits
There is still an issue with the bit stuffing
as shown by mosaic_mal test.
diffs (truncated from 839 to 300 lines):
diff --git a/monetdb5/modules/mal/mosaic_dictionary.c
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -33,22 +33,15 @@
void
MOSadvance_dictionary(Client cntxt, MOStask task)
{
+ int *dst = (int*) (((char*) task->blk) + MosaicBlkSize);
+ long cnt = MOSgetCnt(task->blk);
+ long bytes;
(void) cntxt;
- task->start += MOSgetCnt(task->blk);
- switch(ATOMstorage(task->type)){
- //case TYPE_bte: CASE_bit: no compression achievable
- case TYPE_sht: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),sht)); break;
- case TYPE_int: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),int)); break;
- case TYPE_lng: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),lng)); break;
- case TYPE_oid: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),oid)); break;
- case TYPE_wrd: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),wrd)); break;
- case TYPE_flt: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),flt)); break;
- case TYPE_dbl: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),dbl)); break;
-#ifdef HAVE_HGE
- case TYPE_hge: task->blk = (MosaicBlk)( ((char*)task->blk) +
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),hge)); break;
-#endif
- }
+ assert(cnt > 0);
+ task->start += (oid) cnt;
+ bytes = (cnt * task->hdr->bits)/8 + (((cnt * task->hdr->bits) %8) !=
0);
+ task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, lng));
}
/* Beware, the dump routines use the compressed part of the task */
@@ -59,7 +52,7 @@ MOSdump_dictionary(Client cntxt, MOStask
int i;
void *val = (void*)hdr->dict;
- mnstr_printf(cntxt->fdout,"#");
+ mnstr_printf(cntxt->fdout,"# bits %d",hdr->bits);
switch(ATOMstorage(task->type)){
case TYPE_sht:
for(i=0; i< hdr->dictsize; i++)
@@ -99,11 +92,23 @@ MOSskip_dictionary(Client cntxt, MOStask
task->blk = 0; // ENDOFLIST
}
+#define MOSfind(X,VAL,F,L)\
+{ int m,f= F, l=L; \
+ while( l-f > 0 ) { \
+ m = f + (l-f)/2;\
+ if ( VAL < dict[m] ) l=m-1; else f= m;\
+ if ( VAL > dict[m] ) f=m+1; else l= m;\
+ }\
+ X= f;\
+}
+
#define estimateDict(TPE)\
{ TPE *val = (TPE*)task->src;\
+ TPE *dict= (TPE*)hdr->dict;\
for(i =0; i<task->elm; i++, val++){\
- for(j= 0; j< hdr->dictsize; j++)\
- if( hdr->dict[j] == *val) break;\
+ MOSfind(j,*val,0,hdr->dictsize);\
+ if( j == hdr->dictsize || dict[j] != *val )\
+ break;\
}\
if ( i > MOSlimit() ) i = MOSlimit();\
if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned(
MosaicBlkSize + i,TPE);\
@@ -139,8 +144,15 @@ MOSskip_dictionary(Client cntxt, MOStask
dict[i] = dict[j];\
dict[j] = v;\
}\
+ hdr->bits = 1;\
+ hdr->mask =1;\
+ for( i=1 ; i < hdr->dictsize-1; i *=2){\
+ hdr->bits++;\
+ hdr->mask = (hdr->mask <<1) | 1;\
+ }\
}
+
void
MOScreatedictionary(Client cntxt, MOStask task)
{ int i, j;
@@ -153,7 +165,7 @@ MOScreatedictionary(Client cntxt, MOStas
hdr->dictsize = 0;
switch(ATOMstorage(task->type)){
case TYPE_sht: makeDict(sht); break;
- case TYPE_lng: makeDict(lng); break;
+ case TYPE_int: makeDict(int); break;
case TYPE_oid: makeDict(oid); break;
case TYPE_wrd: makeDict(wrd); break;
case TYPE_flt: makeDict(flt); break;
@@ -161,9 +173,9 @@ MOScreatedictionary(Client cntxt, MOStas
#ifdef HAVE_HGE
case TYPE_hge: makeDict(hge); break;
#endif
- case TYPE_int:
- { int *val = (int*)task->src;
- int *dict = (int*)hdr->dict,v;
+ case TYPE_lng:
+ { lng *val = (lng*)task->src;
+ lng *dict = (lng*)hdr->dict,v;
for(i =0; i< (int) task->elm; i++, val++){
for(j= 0; j< hdr->dictsize; j++)
@@ -192,10 +204,16 @@ MOScreatedictionary(Client cntxt, MOStas
dict[i] = dict[j];
dict[j] = v;
}
+ hdr->bits = 1;
+ hdr->mask =1;
+ for( i=1 ; i < hdr->dictsize-1; i *=2){
+ hdr->bits++;
+ hdr->mask = (hdr->mask <<1) | 1;
+ }
}
}
+ MOSdump_dictionary(cntxt, task);
#ifdef _DEBUG_MOSAIC_
- MOSdump_dictionary(cntxt, task);
#endif
}
// calculate the expected reduction using DICT in terms of elements compressed
@@ -210,7 +228,7 @@ MOSestimate_dictionary(Client cntxt, MOS
switch(ATOMstorage(task->type)){
//case TYPE_bte: CASE_bit: no compression achievable
case TYPE_sht: estimateDict(sht); break;
- case TYPE_int: estimateDict(int); break;
+ case TYPE_lng: estimateDict(lng); break;
case TYPE_oid: estimateDict(oid); break;
case TYPE_wrd: estimateDict(wrd); break;
case TYPE_flt: estimateDict(flt); break;
@@ -218,17 +236,16 @@ MOSestimate_dictionary(Client cntxt, MOS
#ifdef HAVE_HGE
case TYPE_hge: estimateDict(hge); break;
#endif
- case TYPE_lng:
- { lng *val = (lng*)task->src;
+ case TYPE_int:
+ { int *val = (int*)task->src;
+ int *dict = (int*)hdr->dict;
for(i =0; i<task->elm; i++, val++){
- for(j= 0; j< hdr->dictsize; j++)
- if( hdr->dict[j] == *val)
- break;
- if ( j == hdr->dictsize)
+ MOSfind(j,*val,0,hdr->dictsize);
+ if( j == hdr->dictsize || dict[j] != *val)
break;
}
if ( i > MOSlimit() ) i = MOSlimit();
- if(i) factor = (flt) ((int)i * sizeof(lng)) /
wordaligned( MosaicBlkSize + i,lng);
+ if(i) factor = (flt) ((int)i * sizeof(int)) /
wordaligned( MosaicBlkSize + i,lng);
}
}
#ifdef _DEBUG_MOSAIC_
@@ -238,24 +255,38 @@ MOSestimate_dictionary(Client cntxt, MOS
}
// insert a series of values into the compressor block using dictionary
+#define dictcompress()\
+cid = (i * hdr->bits)/64;\
+lshift= 64 -((i * hdr->bits) % 64) ;\
+if ( lshift > hdr->bits){\
+ base[cid]= base[cid] | (((unsigned long)j) << (lshift-hdr->bits));\
+}else{ \
+ rshift= 64 - ((i+1) * hdr->bits) % 64;\
+ base[cid]= base[cid] | (((unsigned long)j) >> (hdr->bits-lshift));\
+ base[cid+1]= 0 | (((unsigned long)j) << rshift);\
+}
+
#define DICTcompress(TPE)\
{ TPE *val = (TPE*)task->src;\
TPE *dict = (TPE*)hdr->dict;\
BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\
task->dst = ((char*) task->blk)+ MosaicBlkSize;\
+ base = (unsigned long*) task->dst; \
+ base[0]=0;\
for(i =0; i<limit; i++, val++){\
- for(j= 0; j< hdr->dictsize; j++)\
- if( dict[j] == *val) {\
- MOSincCnt(blk,1);\
- *task->dst++ = (char) j;\
- break;\
- }\
- if ( j == hdr->dictsize) \
+ MOSfind(j,*val,0,hdr->dictsize);\
+ if(j == hdr->dictsize || dict[j] != *val) \
break;\
+ else {\
+ MOSincCnt(blk,1);\
+ dictcompress();\
+ }\
}\
+ assert(i);\
task->src = (char*) val;\
}
+
void
MOScompress_dictionary(Client cntxt, MOStask task)
{
@@ -263,6 +294,8 @@ MOScompress_dictionary(Client cntxt, MOS
int j;
MosaicBlk blk = task->blk;
MosaicHdr hdr = task->hdr;
+ int cid, lshift, rshift;
+ unsigned long *base;
(void) cntxt;
MOSsetTag(blk,MOSAIC_DICT);
@@ -282,29 +315,58 @@ MOScompress_dictionary(Client cntxt, MOS
{ int *val = (int*)task->src;
int *dict = (int*)hdr->dict;
BUN limit = task->elm > MOSlimit()? MOSlimit():
task->elm;
+
task->dst = ((char*) task->blk)+ MosaicBlkSize;
+ base = (unsigned long*) task->dst; // start of bit
vector
+ base[0]=0;
for(i =0; i<limit; i++, val++){
- for(j= 0; j< hdr->dictsize; j++)
- if( dict[j] == *val) {
- MOSincCnt(blk,1);
- *task->dst++ = (char) j;
- break;
+ MOSfind(j,*val,0,hdr->dictsize);
+ //mnstr_printf(cntxt->fdout,"compress %d %d
bits %d\n",*val,j,hdr->bits);
+ if( j == hdr->dictsize || dict[j] != *val )
+ break;
+ else {
+ MOSincCnt(blk,1);
+ cid = (i * hdr->bits)/64;
+ lshift= 64 -((i * hdr->bits) % 64) ;
+ if ( lshift > hdr->bits){
+ base[cid]= base[cid] |
(((unsigned long)j) << (lshift-hdr->bits));
+
//mnstr_printf(cntxt->fdout,"[%d] shift %d rbits %d \n",cid, lshift, hdr->bits);
+ }else{
+ rshift= 64 - ((i+1) *
hdr->bits) % 64;
+ base[cid]= base[cid] |
(((unsigned long)j) >> (hdr->bits-lshift));
+ base[cid+1]= 0 | (((unsigned
long)j) << rshift);
+
//mnstr_printf(cntxt->fdout,"[%d] shift %d %d val %o %o\n", cid, lshift, rshift,
+ //(j >>
(hdr->bits-lshift)), (j <<rshift));
}
- if ( j == hdr->dictsize)
- break;
+ }
}
+ assert(i);
task->src = (char*) val;
}
}
}
// the inverse operator, extend the src
+#define dictdecompress(I)\
+cid = (I * hdr->bits)/64;\
+lshift= 64 -((I * hdr->bits) % 64) ;\
+if ( lshift >hdr->bits){\
+ j = (base[cid]>> (lshift-hdr->bits)) & ((unsigned long)hdr->mask);\
+ }else{ \
+ rshift= 64 - ((I+1) * hdr->bits) % 64;\
+ m1 = (base[cid] & ( ((unsigned long)hdr->mask) >> (hdr->bits-lshift)));\
+ m2 = base[cid+1] >>rshift;\
+ j= 0 | (m1 <<(hdr->bits-lshift)) | m2;\
+ }
+
#define DICTdecompress(TPE)\
{ TPE *dict =(TPE*)((char*)hdr->dict);\
- bte *idx = (bte*)(((char*)blk) + MosaicBlkSize);\
BUN lim = MOSgetCnt(blk);\
- for(i = 0; i < lim; i++,idx++)\
- ((TPE*)task->src)[i] = dict[ (bte)*idx];\
+ base = (unsigned long*)(((char*)blk) + MosaicBlkSize);\
+ for(i = 0; i < lim; i++){\
+ dictdecompress(i);\
+ ((TPE*)task->src)[i] = dict[j];\
+ }\
task->src += i * sizeof(TPE);\
}
@@ -314,6 +376,9 @@ MOSdecompress_dictionary(Client cntxt, M
MosaicBlk blk = task->blk;
MosaicHdr hdr = task->hdr;
BUN i;
+ bte j,m1,m2;
+ int cid, lshift, rshift;
+ unsigned long *base;
(void) cntxt;
switch(ATOMstorage(task->type)){
@@ -329,10 +394,24 @@ MOSdecompress_dictionary(Client cntxt, M
#endif
case TYPE_int:
{ int *dict =(int*)((char*)hdr->dict);
- bte *idx = (bte*)(((char*)blk) + MosaicBlkSize);
BUN lim = MOSgetCnt(blk);
- for(i = 0; i < lim; i++,idx++)
- ((int*)task->src)[i] = dict[ (bte)*idx];
+ base = (unsigned long*)(((char*)blk) + MosaicBlkSize);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list