Changeset: b7aa98d7aaf8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b7aa98d7aaf8
Modified Files:
monetdb5/modules/mosaic/mosaic.c
monetdb5/modules/mosaic/mosaic.h
monetdb5/modules/mosaic/mosaic_capped.c
monetdb5/modules/mosaic/mosaic_hdr.c
monetdb5/modules/mosaic/mosaic_projection.h
monetdb5/modules/mosaic/mosaic_raw.c
monetdb5/modules/mosaic/mosaic_select.h
sql/backends/monet5/sql_cat.c
sql/test/mosaic/Tests/analysis.stable.out
Branch: mosaic
Log Message:
1) Prepare for method renames var/capped => dict/dict256
2) Refactoring technique id management.
3) Eliminating MOSAIC_EOL.
diffs (truncated from 739 to 300 lines):
diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -9,7 +9,7 @@
/*
* authors Martin Kersten, A. Koning
* Adaptive compression scheme to reduce the storage footprint for stable
persistent data.
- * The permissible compression techniques can be controlled thru an argument
list
+ * The permissible compression MOSmethods can be controlled thru an argument
list
*/
#include "monetdb_config.h"
@@ -24,49 +24,91 @@
#include "mosaic_frame.h"
#include "mosaic_prefix.h"
-char
*MOSfiltername[]={"raw","runlength","capped","var","delta","linear","frame","prefix","EOL"};
+#define DEFINE_METHOD(NAME) \
+{\
+ .bit = (1 << NAME),\
+ .name = #NAME\
+}
-bool MOSisTypeAllowed(int compression, BAT* b) {
+const Method MOSmethods[] = {
+ DEFINE_METHOD(raw),
+ DEFINE_METHOD(runlength),
+ DEFINE_METHOD(capped),
+ DEFINE_METHOD(var),
+ DEFINE_METHOD(delta),
+ DEFINE_METHOD(linear),
+ DEFINE_METHOD(frame),
+ DEFINE_METHOD(prefix)
+};
+
+#define METHOD_IS_SET(FILTER, IDX) ( (FILTER) & MOSmethods[IDX].bit )
+#define SET_METHOD(FILTER, IDX) ( (FILTER) |= MOSmethods[IDX].bit )
+#define UNSET_METHOD(FILTER, IDX) ( (FILTER) &= ~MOSmethods[IDX].bit )
+
+bit MOSisTypeAllowed(char compression, BAT* b) {
switch (compression) {
- case MOSAIC_RAW: return MOStypes_raw(b);
- case MOSAIC_RLE: return MOStypes_runlength(b);
- case MOSAIC_CAPPED: return MOStypes_capped(b);
- case MOSAIC_VAR: return MOStypes_var(b);
- case MOSAIC_DELTA: return MOStypes_delta(b);
- case MOSAIC_LINEAR: return MOStypes_linear(b);
- case MOSAIC_FRAME: return MOStypes_frame(b);
- case MOSAIC_PREFIX: return MOStypes_prefix(b);
+ case raw: return MOStypes_raw(b);
+ case runlength: return MOStypes_runlength(b);
+ case capped: return MOStypes_capped(b);
+ case var: return MOStypes_var(b);
+ case delta: return MOStypes_delta(b);
+ case linear: return MOStypes_linear(b);
+ case frame: return MOStypes_frame(b);
+ case prefix: return MOStypes_prefix(b);
default: /* should not happen*/ assert(0);
}
return false;
}
+static void
+construct_compression_mask(sht* compression_mask, char* compressions) {
+ if (GDK_STRNIL(compressions)) {
+ *compression_mask = ~0;
+ return;
+ }
+
+ *compression_mask = 0;
+
+ char* _dict256;
+ /* The capped dictionary technique 'capped' has to be processed upfront
+ * to prevent search collision with the variable dictionary technique
'dict'.
+ */
+ while ( (_dict256 = strstr(compressions, MOSmethods[capped].name)) ) {
+ strncpy (_dict256,"______", 6);
+
+ *compression_mask |= MOSmethods[capped].bit;
+ }
+
+ for(unsigned i = 0; i< MOSAIC_METHODS; i++) {
+ if ( strstr(compressions, MOSmethods[i].name) ) {
+ *compression_mask |= MOSmethods[i].bit;
+ }
+ }
+}
+
static bool
-MOSinitializeFilter(MOStask task, const char* compressions) {
+initialize_filter(MOStask task) {
bool is_not_compressible = true;
- if (!GDK_STRNIL(compressions)) {
- for(int i = 0; i< MOSAIC_METHODS-1; i++) {
- if ( (task->filter[i] = strstr(compressions,
MOSfiltername[i]) != 0 && MOSisTypeAllowed(i, task->bsrc)) ) {
- task->hdr->elms[i] = task->hdr->blks[i]
= 0;
- is_not_compressible = false;
- }
- }
- }
- else {
- for(int i = 0; i< MOSAIC_METHODS-1; i++) {
- if ( (task->filter[i] = MOSisTypeAllowed(i,
task->bsrc)) ) {
- task->hdr->elms[i] = task->hdr->blks[i]
= 0;
- is_not_compressible = false;
- }
+ for(unsigned i = 0; i< MOSAIC_METHODS; i++) {
+ if ( METHOD_IS_SET(task->mask, i) && MOSisTypeAllowed(i,
task->bsrc) ) {
+ task->hdr->elms[i] = task->hdr->blks[i] = 0;
+ is_not_compressible = false;
}
}
return is_not_compressible;
}
+static bool
+MOSinitializeFilter(MOStask task, const sht compression_mask) {
+ task->mask = compression_mask;
+
+ return initialize_filter(task);
+}
+
static void
MOSinit(MOStask task, BAT *b) {
char *base;
@@ -96,7 +138,7 @@ str
MOSlayout(BAT *b, BAT *btech, BAT *bcount, BAT *binput, BAT *boutput, BAT
*bproperties)
{
MOStask task=0;
- int i;
+ unsigned i;
char buf[BUFSIZ];
lng zero=0;
@@ -120,9 +162,9 @@ MOSlayout(BAT *b, BAT *btech, BAT *bcoun
BUNappend(bproperties, buf, false) != GDK_SUCCEED ||
BUNappend(boutput, &zero , false) != GDK_SUCCEED)
throw(MAL,"mosaic.layout", MAL_MALLOC_FAIL);
- for(i=0; i < MOSAIC_METHODS-1; i++){
+ for(i=0; i < MOSAIC_METHODS; i++){
lng zero = 0;
- snprintf(buf,BUFSIZ,"%s blocks", MOSfiltername[i]);
+ snprintf(buf,BUFSIZ,"%s blocks", MOSmethods[i].name);
if( BUNappend(btech, buf, false) != GDK_SUCCEED ||
BUNappend(bcount, &task->hdr->blks[i], false) !=
GDK_SUCCEED ||
BUNappend(binput, &task->hdr->elms[i], false) !=
GDK_SUCCEED ||
@@ -195,11 +237,10 @@ MOSlayout(BAT *b, BAT *btech, BAT *bcoun
/*
* Compression is focussed on a single column.
- * Multiple compression techniques are applied at the same time.
+ * Multiple compression MOSmethods are applied at the same time.
*/
#define MOSnewBlk(TASK)\
- MOSsetTag(TASK->blk,MOSAIC_EOL);\
MOSsetCnt(TASK->blk,0);\
TASK->dst = MOScodevector(TASK);
@@ -220,13 +261,13 @@ static str
MOSprepareEstimate(MOStask task) {
str error;
- if (task->filter[MOSAIC_CAPPED]){
+ if (METHOD_IS_SET(task->mask, MOSAIC_CAPPED)){
if ( (error = MOSprepareEstimate_capped(task))) {
return error;
}
}
- if (task->filter[MOSAIC_VAR]){
+ if (METHOD_IS_SET(task->mask, MOSAIC_VAR)){
if ( (error = MOSprepareEstimate_var(task))) {
return error;
}
@@ -267,28 +308,28 @@ static str MOSestimate_inner_##TPE(MOSta
}\
\
/* select candidate amongst those*/\
- if (task->filter[MOSAIC_RLE]) {\
- DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, runlength, TPE,
MOSAIC_RLE);\
+ if (METHOD_IS_SET(task->mask, MOSAIC_RLE)) {\
+ DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, runlength, TPE,
MOSAIC_RLE);\
}\
- if (task->filter[MOSAIC_DELTA]) {\
- DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, delta, TPE,
MOSAIC_DELTA);\
+ if (METHOD_IS_SET(task->mask, MOSAIC_DELTA)) {\
+ DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, delta, TPE,
MOSAIC_DELTA);\
}\
- if (task->filter[MOSAIC_LINEAR]) {\
- DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, linear, TPE,
MOSAIC_LINEAR);\
+ if (METHOD_IS_SET(task->mask, MOSAIC_LINEAR)) {\
+ DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, linear, TPE,
MOSAIC_LINEAR);\
}\
- if (task->filter[MOSAIC_FRAME]) {\
- DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, frame, TPE,
MOSAIC_FRAME);\
+ if (METHOD_IS_SET(task->mask, MOSAIC_FRAME)) {\
+ DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, frame, TPE,
MOSAIC_FRAME);\
}\
- if (task->filter[MOSAIC_PREFIX]) {\
- DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, prefix, TPE,
MOSAIC_PREFIX);\
+ if (METHOD_IS_SET(task->mask, MOSAIC_PREFIX)) {\
+ DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, prefix, TPE,
MOSAIC_PREFIX);\
}\
- if (task->filter[MOSAIC_CAPPED]) {\
- DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, capped, TPE,
MOSAIC_CAPPED);\
+ if (METHOD_IS_SET(task->mask, MOSAIC_CAPPED)) {\
+ DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, capped, TPE,
MOSAIC_CAPPED);\
}\
- if (task->filter[MOSAIC_VAR]) {\
+ if (METHOD_IS_SET(task->mask, MOSAIC_VAR)) {\
DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, var, TPE,
MOSAIC_VAR);\
}\
- if (task->filter[MOSAIC_RAW]) {\
+ if (METHOD_IS_SET(task->mask, MOSAIC_RAW)) {\
DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, raw, TPE,
MOSAIC_RAW);\
}\
\
@@ -336,8 +377,8 @@ static str MOSestimate_##TPE(MOStask tas
.var_limit = &var_limit,\
.nr_capped_encoded_elements = 0,\
.nr_capped_encoded_blocks = 0,\
- .compression_strategy = {.tag = MOSAIC_EOL, .cnt = 0},\
- .must_be_merged_with_previous = false,\
+ .compression_strategy = {.cnt = 0},\
+ .must_be_merged_with_previous = false\
};\
\
MosaicEstimation current;\
@@ -351,7 +392,7 @@ static str MOSestimate_##TPE(MOStask tas
}\
\
if (!current.is_applicable) {\
- throw(MAL,"mosaic.compress", "Cannot compress BAT with
given compression techniques.");\
+ throw(MAL,"mosaic.compress", "Cannot compress BAT with
given compression MOSmethods.");\
}\
\
if (current.must_be_merged_with_previous) {\
@@ -408,12 +449,12 @@ MOSfinalizeDictionary(MOStask task) {
str error;
- if (task->filter[MOSAIC_VAR]) {
+ if (METHOD_IS_SET(task->mask, MOSAIC_VAR)) {
if ((error = finalizeDictionary_var(task))) {
return error;
}
}
- if (task->filter[MOSAIC_CAPPED]) {
+ if (METHOD_IS_SET(task->mask, MOSAIC_CAPPED)) {
if ((error = finalizeDictionary_capped(task))) {
return error;
}
@@ -514,14 +555,13 @@ MOScompressInternal(BAT* bsrc, const cha
// or when we extend the non-compressed collector block
throw(MAL,"mosaic.compress", "heap construction failes");
}
-
assert(bsrc->tmosaic->parentid == bsrc->batCacheid);
if((task = (MOStask) GDKzalloc(sizeof(*task))) == NULL) {
MOSdestroy(bsrc);
throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL);
}
-
+
// initialize the non-compressed read pointer
task->src = Tloc(bsrc, 0);
task->start = 0;
@@ -531,15 +571,30 @@ MOScompressInternal(BAT* bsrc, const cha
MOSinit(task,bsrc);
task->blk->cnt= 0;
MOSinitHeader(task);
- if (MOSinitializeFilter(task, compressions)) {
+
+ char* copy = NULL;
+
+ if (compressions) {
+ copy = GDKzalloc(strlen(compressions)+1);
+ strcpy(copy, compressions);
+ if (copy == NULL) {
+ MOSdestroy(bsrc);
+ throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL);
+ }
+ }
+ sht compression_mask;
+ construct_compression_mask(&compression_mask, copy);
+
+ if (copy) {
+ GDKfree(copy);
+ }
+
+ if (MOSinitializeFilter(task, compression_mask)) {
msg = createException(MAL, "mosaic.compress", "No valid
compression technique given or available for type: %s", ATOMname(task->type));
MOSdestroy(bsrc);
goto finalize;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list