Changeset: 27384408157e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=27384408157e
Modified Files:
monetdb5/modules/mosaic/mosaic.c
sql/test/mosaic/Tests/All
Branch: mosaic
Log Message:
Improve mosaic.analysis:
We now exclude unnecessary complicated combination
that do not improve compression rate
both in output and in computation.
diffs (164 lines):
diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -46,14 +46,16 @@ static void
MOSinitializeFilter(MOStask task, const char* compressions) {
if (!GDK_STRNIL(compressions)) {
for(int i = 0; i< MOSAIC_METHODS-1; i++) {
- task->filter[i] = strstr(compressions,
MOSfiltername[i]) != 0 && type_allowed(i, task->bsrc);
- task->hdr->elms[i] = task->hdr->blks[i] = 0;
+ if ( (task->filter[i] = strstr(compressions,
MOSfiltername[i]) != 0 && type_allowed(i, task->bsrc)) ) {
+ task->hdr->elms[i] = task->hdr->blks[i]
= 0;
+ }
}
}
else {
for(int i = 0; i< MOSAIC_METHODS-1; i++) {
- task->filter[i] = type_allowed(i, task->bsrc);
- task->hdr->elms[i] = task->hdr->blks[i] = 0;
+ if ( (task->filter[i] = type_allowed(i,
task->bsrc)) ) {
+ task->hdr->elms[i] = task->hdr->blks[i]
= 0;
+ }
}
}
}
@@ -317,8 +319,8 @@ MOScompressInternal(BAT* bsrc, const cha
MOSinit(task,bsrc);
task->blk->cnt= 0;
+ MOSinitHeader(task);
MOSinitializeFilter(task, compressions);
- MOSinitHeader(task);
if( msg != MAL_SUCCEED){
GDKfree(task);
@@ -1183,45 +1185,44 @@ makepatterns(int *patterns, int size, st
#define CANDIDATES 256 /* all three combinations */
struct PAT{
- int pattern;
+ bool include;
str technique;
BUN xsize;
dbl xf;
lng clk1, clk2;
}pat[CANDIDATES];
-static int cmpPattern(const void *p1, const void *p2){
- struct PAT *r1, *r2;
- r1= (struct PAT *) p1;
- r2= (struct PAT *) p2;
- if( r1->xf > r2->xf) return -1;
- if( r1->xf == r2->xf && r1->technique && r2->technique) return
strlen(r1->technique) > strlen(r2->technique);
- return 0;
-}
-
void
MOSanalyseReport(BAT *b, BAT *btech, BAT *boutput, BAT *bratio, BAT
*bcompress, BAT *bdecompress, str compressions)
{
- int i,j,k,cases, bit=1, bid= b->batCacheid;
+ int i,j,cases, bit=1, bid= b->batCacheid;
int pattern[CANDIDATES];
+ int antipattern[CANDIDATES];
+ int antipatternSize = 0;
char buf[1024]={0}, *t;
int filter[MOSAIC_METHODS];
// create the list of all possible 2^6 compression patterns
cases = makepatterns(pattern,CANDIDATES, compressions, b);
+
+ memset(antipattern,0, sizeof(antipattern));
+ antipatternSize++; // the first pattern aka 0 is always an antipattern.
+
memset((char*)pat,0, sizeof(pat));
- for( i = 0; i < CANDIDATES; i++)
- pat[i].xf= -1;
for( i = 1; i< cases; i++) {
- // Ignore patterns that have a poor individual compressor
- for( j= 0; j < MOSAIC_METHODS-1 && j < i; j++) {
- if ( (pattern[i] & pattern[j]) == pattern[j] &&
pat[j].xf >= 0 && pat[j].xf < 1.0) {
+ pat[i].include = true;
+ // Ignore patterns that have a poor or unused individual
compressor
+ bool skip = false;
+ for( j=1; j < antipatternSize; j++) {
+ if ( (pattern[i] & antipattern[j]) ==
antipattern[j] && pattern[i] > antipattern[j]) {
+ pat[i].include = false;
+ skip = true;
break;
}
}
- if( j < MOSAIC_METHODS-1 && j < i ) continue;
+ if(skip) continue;
t= buf;
*t =0;
@@ -1261,22 +1262,27 @@ MOSanalyseReport(BAT *b, BAT *btech, BAT
if (original) {
b->tmosaic = original;
}
-
+ pat[i].include = false;
MOSunsetLock(b);
continue;
}
- // analyse result block distribution to detect a new
compression combination
- for(k=0, j=0, bit=1; j < MOSAIC_METHODS-1; j++){
- if ( ((MosaicHdr) b->tmosaic->base)->blks[j] > 0)
- k |= bit;
- bit *=2;
- }
- for( j=0; j < i; j++)
- if (pattern[j] == k )
- break;
+
pat[i].xsize = (BUN) b->tmosaic->free;
pat[i].xf= ((MosaicHdr) b->tmosaic->base)->ratio;
+ // analyse result block distribution to exclude complicated
compression combination that (probably) won't improve compression rate.
+ if ( i < MOSAIC_METHODS-1 && pat[i].xf >= 0 && pat[i].xf < 1.0)
{
+ antipattern[antipatternSize++] = pattern[i];
+ }
+ else {
+ for(j=1; j < MOSAIC_METHODS-1; j++){
+ if ( ((MosaicHdr) b->tmosaic->base)->blks[j]
== 0) {
+ antipattern[antipatternSize++] =
pattern[i];
+ pat[i].include = false;
+ }
+ }
+ }
+
BAT* decompressed;
pat[i].clk2 = GDKms();
MOSdecompressInternal( &decompressed, b);
@@ -1294,10 +1300,9 @@ MOSanalyseReport(BAT *b, BAT *btech, BAT
MOSunsetLock(b);
}
- qsort((void*) pat, CANDIDATES, sizeof(struct PAT), cmpPattern);
// Collect the results in a table
for(i=0;i< CANDIDATES; i++){
- if( pattern[i] && pat[i].xf >=0){
+ if(pat[i].include) {
// round down to three decimals.
pat[i].xf = ((dbl) (int) (pat[i].xf * 1000)) / 1000;
@@ -1309,7 +1314,8 @@ MOSanalyseReport(BAT *b, BAT *btech, BAT
BUNappend(bdecompress,&pat[i].clk2,false) !=
GDK_SUCCEED )
return;
}
- if( pat[i].technique) GDKfree(pat[i].technique);
+
+ GDKfree(pat[i].technique);
}
}
diff --git a/sql/test/mosaic/Tests/All b/sql/test/mosaic/Tests/All
--- a/sql/test/mosaic/Tests/All
+++ b/sql/test/mosaic/Tests/All
@@ -8,3 +8,4 @@ prefix
mix
session_init
session_exit
+analysis
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list