Changeset: f0bf3f7a67b7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f0bf3f7a67b7
Modified Files:
clients/Tests/exports.stable.out
monetdb5/modules/mosaic/mosaic.c
monetdb5/modules/mosaic/mosaic.h
monetdb5/modules/mosaic/mosaic.mal
sql/backends/monet5/sql.c
sql/backends/monet5/sql.mal
sql/backends/monet5/sql_mosaic.c
sql/scripts/76_mosaic.sql
sql/test/mosaic/Tests/compressionRLE2.stable.out
Branch: mosaic
Log Message:
Improve speed of mosaic analysis
Compression techniques that in isolation do not shrink the
data are ignored in finding the workable combination(s)
diffs (truncated from 681 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -395,6 +395,7 @@ int geomversion_get(void);
void geomversion_set(void);
bat getBBPsize(void);
int getBitVector(BitVector vector, BUN i, int bits);
+lng getBitVectorSize(const BUN cnt, const int width);
char *get_bin_path(void);
int gettimeofday(struct timeval *tv, int *ignore_zone);
int gprof_pthread_create(pthread_t *__restrict, __const pthread_attr_t
*__restrict, void *( *fcn)(void *), void *__restrict);
@@ -476,7 +477,7 @@ int ptrToStr(str *dst, int *len, const p
const ptr ptr_nil;
struct dirent *readdir(DIR *dir);
void rewinddir(DIR *dir);
-void setBitVector(BitVector vector, const BUN i, const int bits, const int
value);
+void setBitVector(BitVector vector, const BUN i, const int bits, const
unsigned int value);
int shtFromStr(const char *src, int *len, sht **dst);
int shtToStr(str *dst, int *len, const sht *src);
const sht sht_nil;
@@ -1416,7 +1417,7 @@ void MOSanalyseReport(Client cntxt, BAT
void MOSblk(MosaicBlk blk);
BUN MOSblocklimit;
str MOScompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
-str MOScompressInternal(Client cntxt, bat *ret, bat *bid, MOStask task, int
debug);
+str MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug);
void MOScompress_delta(Client cntxt, MOStask task);
void MOScompress_dictionary(Client cntxt, MOStask task);
void MOScompress_frame(Client cntxt, MOStask task);
@@ -1427,7 +1428,7 @@ void MOScompress_runlength(Client cntxt,
void MOScreatedictionary(Client cntxt, MOStask task);
void MOScreateframeDictionary(Client cntxt, MOStask task);
str MOSdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
-str MOSdecompressInternal(Client cntxt, bat *ret, bat *bid);
+str MOSdecompressInternal(Client cntxt, bat *bid);
str MOSdecompressStorage(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
void MOSdecompress_delta(Client cntxt, MOStask task);
void MOSdecompress_dictionary(Client cntxt, MOStask task);
diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -37,7 +37,7 @@
char
*MOSfiltername[]={"literal","runlength","dictionary","delta","linear","frame","prefix","EOL"};
BUN MOSblocklimit = 100000;
-str MOScompressInternal(Client cntxt, bat *ret, bat *bid, MOStask task, int
debug);
+str MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug);
static void
MOSinit(MOStask task, BAT *b){
@@ -333,8 +333,9 @@ MOSoptimizerCost(Client cntxt, MOStask t
return cand;
}
+/* the source is extended with a BAT mosaic mirror */
str
-MOScompressInternal(Client cntxt, bat *ret, bat *bid, MOStask task, int debug)
+MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug)
{
BAT *o = NULL, *bsrc; // the BAT to be augmented with a
compressed heap
str msg = MAL_SUCCEED;
@@ -342,8 +343,6 @@ MOScompressInternal(Client cntxt, bat *r
int tpe, typewidth;
lng t0,t1;
- *ret = 0;
-
if ((bsrc = BATdescriptor(*bid)) == NULL)
throw(MAL, "mosaic.compress", INTERNAL_BAT_ACCESS);
@@ -363,32 +362,33 @@ MOScompressInternal(Client cntxt, bat *r
typewidth = ATOMsize(tpe) * 8; // size in bits
break;
default:
- // don't compress them
- BBPkeepref(*ret = bsrc->batCacheid);
- return msg;
+ // don't compress it
+ BBPunfix(bsrc->batCacheid);
+ return MAL_SUCCEED;
}
if (BATcheckmosaic(bsrc)){
/* already compressed */
- BBPkeepref(*ret = bsrc->batCacheid);
+ BBPunfix(bsrc->batCacheid);
return msg;
}
assert(bsrc->tmosaic == NULL);
+ /* views are never compressed */
if (VIEWtparent(bsrc)) {
bat p = VIEWtparent(bsrc);
o = bsrc;
bsrc = BATdescriptor(p);
if (BATcheckmosaic(bsrc)) {
- BBPunfix(bsrc->batCacheid);
+ BBPunfix(o->batCacheid);
return MAL_SUCCEED;
}
- assert(bsrc->timprints == NULL);
+ assert(bsrc->tmosaic == NULL);
}
if ( BATcount(bsrc) < MOSAIC_THRESHOLD ){
/* no need to compress */
- BBPkeepref(*ret = bsrc->batCacheid);
+ BBPunfix(bsrc->batCacheid);
return msg;
}
@@ -404,7 +404,8 @@ MOScompressInternal(Client cntxt, bat *r
// Then we total size may go beyond the original size and we
should terminate the process.
// This should be detected before we compress a block, in the
estimate functions
// or when we extend the non-compressed collector block
- throw(MAL,"mosaic.compress", "heap construction
failes");
+ BBPunfix(bsrc->batCacheid);
+ throw(MAL,"mosaic.compress", "heap construction failes");
}
// initialize the non-compressed read pointer
@@ -533,8 +534,7 @@ MOScompressInternal(Client cntxt, bat *r
task->ratio = task->hdr->ratio = (flt)task->bsrc->theap.free/
task->bsrc->tmosaic->free;
finalize:
MCexitMaintenance(cntxt);
- *ret= bsrc->batCacheid;
- BBPkeepref(bsrc->batCacheid);
+ BBPunfix(bsrc->batCacheid);
#ifdef _DEBUG_MOSAIC_
MOSdumpInternal(cntxt,bsrc);
@@ -552,7 +552,7 @@ finalize:
str
MOScompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
- str prop = NULL;
+ str msg = MAL_SUCCEED;
int i;
MOStask task;
@@ -567,22 +567,22 @@ MOScompress(Client cntxt, MalBlkPtr mb,
throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL);
if( pci->argc == 3)
- prop = *getArgReference_str(stk,pci,2);
- if( prop && !strstr(prop,"mosaic"))
+ msg = *getArgReference_str(stk,pci,2);
+ if( msg && !strstr(msg,"mosaic"))
for( i = 0; i< MOSAIC_METHODS; i++)
- task->filter[i]= strstr(prop,MOSfiltername[i]) != 0;
+ task->filter[i]= strstr(msg,MOSfiltername[i]) != 0;
else
for( i = 0; i< MOSAIC_METHODS; i++)
task->filter[i]= 1;
- prop= MOScompressInternal(cntxt, getArgReference_bat(stk,pci,0),
getArgReference_bat(stk,pci,1), task, flg);
+ msg= MOScompressInternal(cntxt, getArgReference_bat(stk,pci,1), task,
flg);
GDKfree(task);
- return prop;
+ return msg;
}
// recreate the uncompressed heap from its mosaic version
str
-MOSdecompressInternal(Client cntxt, bat *ret, bat *bid)
+MOSdecompressInternal(Client cntxt, bat *bid)
{
BAT *bsrc;
MOStask task;
@@ -598,11 +598,10 @@ MOSdecompressInternal(Client cntxt, bat
if (BATcheckmosaic(bsrc) == 0 ){
BBPunfix(bsrc->batCacheid);
- BBPkeepref(*ret = bsrc->batCacheid);
return MAL_SUCCEED;
}
if (!bsrc->tmosaic) {
- BBPkeepref(*ret = bsrc->batCacheid);
+ BBPunfix(bsrc->batCacheid);
return MAL_SUCCEED;
}
@@ -664,6 +663,7 @@ MOSdecompressInternal(Client cntxt, bat
}
}
+ task->ratio = (flt)task->bsrc->theap.free/ task->bsrc->tmosaic->free;
error = 0;
switch( ATOMbasetype(task->type)){
@@ -687,11 +687,13 @@ MOSdecompressInternal(Client cntxt, bat
break;
case TYPE_str:
break;
+#ifdef _DEBUG_MOSAIC_
default:
mnstr_printf(cntxt->fdout,"#unknown compression
compatibility\n");
+#endif
}
- if(error)
- mnstr_printf(cntxt->fdout,"#incompatible compression\n");
+ if(error)
+ mnstr_printf(cntxt->fdout,"#incompatible compression for type
%d ratio %f\n", ATOMbasetype(task->type),task->ratio);
task->timer = GDKusec() - task->timer;
@@ -701,7 +703,7 @@ MOSdecompressInternal(Client cntxt, bat
bsrc->batDirty = 1;
MOSdestroy(bsrc);
BATsettrivprop(bsrc);
- BBPkeepref( *ret = bsrc->batCacheid);
+ BBPunfix(bsrc->batCacheid);
MCexitMaintenance(cntxt);
return MAL_SUCCEED;
@@ -711,14 +713,14 @@ str
MOSdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
(void) mb;
- return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,0),
getArgReference_bat(stk,pci,1));
+ return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,1));
}
str
MOSdecompressStorage(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
(void) mb;
- return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,0),
getArgReference_bat(stk,pci,1));
+ return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,1));
}
// The remainders is cloned from the generator code base
@@ -1341,7 +1343,6 @@ int
MOSanalyseInternal(Client cntxt, int threshold, MOStask task, bat bid)
{
BAT *b;
- int ret = 0;
str type;
b = BATdescriptor(bid);
@@ -1384,18 +1385,14 @@ MOSanalyseInternal(Client cntxt, int thr
#endif
case TYPE_str:
mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t", bid,
BBP_physical(bid), type, BATcount(b));
- MOScompressInternal(cntxt, &ret, &bid, task,TRUE);
+ MOScompressInternal(cntxt, &bid, task,TRUE);
MOSdestroy(BBPdescriptor(bid));
- if( ret != b->batCacheid)
- BBPdecref(ret, TRUE);
break;
default:
if( b->ttype == TYPE_timestamp || b->ttype == TYPE_date ||
b->ttype == TYPE_daytime){
mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t",
bid, BBP_physical(bid), type, BATcount(b));
- MOScompressInternal(cntxt, &ret, &bid, task,TRUE);
+ MOScompressInternal(cntxt, &bid, task,TRUE);
MOSdestroy(BBPdescriptor(bid));
- if( ret != b->batCacheid)
- BBPdecref(ret, TRUE);
} else
mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t
illegal compression type %s\n", bid, BBP_logical(bid), type, BATcount(b),
getTypeName(b->ttype));
;
@@ -1405,47 +1402,63 @@ MOSanalyseInternal(Client cntxt, int thr
return 1;
}
+/*
+ * An analysis of all possible compressors
+ * Drop techniques if they are not able to reduce the size below a factor 1.0
+ */
#define CANDIDATES 256 /* all three combinations */
+
void
-MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio,
BAT *brun, str compressions)
+MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio,
BAT *bcompress, BAT *bdecompress, str compressions)
{
- int i,j,k,cases, bit=1, ret, bid= b->batCacheid;
- BUN cnt= BATcount(b), xsize;
- lng input;
+ int i,j,k,cases, bit=1, bid= b->batCacheid;
+ BUN xsize;
MOStask task;
int pattern[CANDIDATES];
char technique[CANDIDATES]={0}, *t = technique;
dbl xf[CANDIDATES], ratio;
- lng clk;
+ lng clk,clk1;
- cases = makepatterns(pattern,CANDIDATES, compressions);
task = (MOStask) GDKzalloc(sizeof(*task));
if( task == NULL)
return;
+ // create the list of all possible 2^6 compression patterns
+ cases = makepatterns(pattern,CANDIDATES, compressions);
+
for( i = 0; i < CANDIDATES; i++)
xf[i]= -1;
- input = cnt * ATOMsize(b->ttype);
for( i = 1; i< cases; i++){
- // filter in-effective sub-patterns
- for( j=0; j < i; j++)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list