Changeset: b7aa98d7aaf8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b7aa98d7aaf8
Modified Files:
        monetdb5/modules/mosaic/mosaic.c
        monetdb5/modules/mosaic/mosaic.h
        monetdb5/modules/mosaic/mosaic_capped.c
        monetdb5/modules/mosaic/mosaic_hdr.c
        monetdb5/modules/mosaic/mosaic_projection.h
        monetdb5/modules/mosaic/mosaic_raw.c
        monetdb5/modules/mosaic/mosaic_select.h
        sql/backends/monet5/sql_cat.c
        sql/test/mosaic/Tests/analysis.stable.out
Branch: mosaic
Log Message:

1) Prepare for method renames var/capped => dict/dict256
2) Refactoring technique id management.
3) Eliminating MOSAIC_EOL.


diffs (truncated from 739 to 300 lines):

diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -9,7 +9,7 @@
 /*
  * authors Martin Kersten, A. Koning
  * Adaptive compression scheme to reduce the storage footprint for stable 
persistent data.
- * The permissible compression techniques can be controlled thru an argument 
list
+ * The permissible compression MOSmethods can be controlled thru an argument 
list
 */
 
 #include "monetdb_config.h"
@@ -24,49 +24,91 @@
 #include "mosaic_frame.h"
 #include "mosaic_prefix.h"
 
-char 
*MOSfiltername[]={"raw","runlength","capped","var","delta","linear","frame","prefix","EOL"};
+#define DEFINE_METHOD(NAME) \
+{\
+       .bit    = (1 << NAME),\
+       .name   = #NAME\
+}
 
-bool MOSisTypeAllowed(int compression, BAT* b) {
+const Method MOSmethods[] = {
+       DEFINE_METHOD(raw),
+       DEFINE_METHOD(runlength),
+       DEFINE_METHOD(capped),
+       DEFINE_METHOD(var),
+       DEFINE_METHOD(delta),
+       DEFINE_METHOD(linear),
+       DEFINE_METHOD(frame),
+       DEFINE_METHOD(prefix)
+};
+
+#define METHOD_IS_SET(FILTER, IDX)     ( (FILTER) & MOSmethods[IDX].bit )
+#define SET_METHOD(FILTER, IDX)        ( (FILTER) |= MOSmethods[IDX].bit )
+#define UNSET_METHOD(FILTER, IDX)      ( (FILTER) &= ~MOSmethods[IDX].bit )
+
+bit MOSisTypeAllowed(char compression, BAT* b) {
        switch (compression) {
-       case MOSAIC_RAW:                return MOStypes_raw(b);
-       case MOSAIC_RLE:                return MOStypes_runlength(b);
-       case MOSAIC_CAPPED:             return MOStypes_capped(b);
-       case MOSAIC_VAR:                return MOStypes_var(b);
-       case MOSAIC_DELTA:              return MOStypes_delta(b);
-       case MOSAIC_LINEAR:             return MOStypes_linear(b);
-       case MOSAIC_FRAME:              return MOStypes_frame(b);
-       case MOSAIC_PREFIX:             return MOStypes_prefix(b);
+       case raw:               return MOStypes_raw(b);
+       case runlength: return MOStypes_runlength(b);
+       case capped:    return MOStypes_capped(b);
+       case var:               return MOStypes_var(b);
+       case delta:             return MOStypes_delta(b);
+       case linear:    return MOStypes_linear(b);
+       case frame:             return MOStypes_frame(b);
+       case prefix:    return MOStypes_prefix(b);
        default: /* should not happen*/ assert(0);
        }
 
        return false;
 }
 
+static void
+construct_compression_mask(sht* compression_mask, char* compressions) {
+       if (GDK_STRNIL(compressions)) {
+               *compression_mask = ~0;
+               return;
+       }
+
+       *compression_mask = 0;
+
+       char* _dict256;
+       /* The capped dictionary technique 'capped' has to be processed upfront
+        * to prevent search collision with the variable dictionary technique 
'dict'.
+        */
+       while ( (_dict256 = strstr(compressions, MOSmethods[capped].name)) ) {
+               strncpy (_dict256,"______", 6);
+
+               *compression_mask |= MOSmethods[capped].bit;
+       }
+
+       for(unsigned i = 0; i< MOSAIC_METHODS; i++) {
+               if ( strstr(compressions, MOSmethods[i].name) ) {
+                       *compression_mask |= MOSmethods[i].bit;
+               }
+       }
+}
+
 static bool
-MOSinitializeFilter(MOStask task, const char* compressions) {
+initialize_filter(MOStask task) {
 
        bool is_not_compressible = true;
 
-       if (!GDK_STRNIL(compressions)) {
-               for(int i = 0; i< MOSAIC_METHODS-1; i++) {
-                               if ( (task->filter[i] = strstr(compressions, 
MOSfiltername[i]) != 0 && MOSisTypeAllowed(i, task->bsrc)) ) {
-                                       task->hdr->elms[i] = task->hdr->blks[i] 
= 0;
-                                       is_not_compressible = false;
-                               }
-               }
-       }
-       else {
-               for(int i = 0; i< MOSAIC_METHODS-1; i++) {
-                               if ( (task->filter[i] = MOSisTypeAllowed(i, 
task->bsrc)) ) {
-                                       task->hdr->elms[i] = task->hdr->blks[i] 
= 0;
-                                       is_not_compressible = false;
-                               }
+       for(unsigned i = 0; i< MOSAIC_METHODS; i++) {
+               if ( METHOD_IS_SET(task->mask, i) && MOSisTypeAllowed(i, 
task->bsrc) ) {
+                       task->hdr->elms[i] = task->hdr->blks[i] = 0;
+                       is_not_compressible = false;
                }
        }
 
        return is_not_compressible;
 }
 
+static bool
+MOSinitializeFilter(MOStask task, const sht compression_mask) {
+       task->mask = compression_mask;
+
+       return initialize_filter(task);
+}
+
 static void
 MOSinit(MOStask task, BAT *b) {
        char *base;
@@ -96,7 +138,7 @@ str
 MOSlayout(BAT *b, BAT *btech, BAT *bcount, BAT *binput, BAT *boutput, BAT 
*bproperties)
 {
        MOStask task=0;
-       int i;
+       unsigned i;
        char buf[BUFSIZ];
        lng zero=0;
 
@@ -120,9 +162,9 @@ MOSlayout(BAT *b, BAT *btech, BAT *bcoun
                        BUNappend(bproperties, buf, false) != GDK_SUCCEED ||
                        BUNappend(boutput, &zero , false) != GDK_SUCCEED)
                                throw(MAL,"mosaic.layout", MAL_MALLOC_FAIL);
-       for(i=0; i < MOSAIC_METHODS-1; i++){
+       for(i=0; i < MOSAIC_METHODS; i++){
                lng zero = 0;
-               snprintf(buf,BUFSIZ,"%s blocks", MOSfiltername[i]);
+               snprintf(buf,BUFSIZ,"%s blocks", MOSmethods[i].name);
                if( BUNappend(btech, buf, false) != GDK_SUCCEED ||
                        BUNappend(bcount, &task->hdr->blks[i], false) != 
GDK_SUCCEED ||
                        BUNappend(binput, &task->hdr->elms[i], false) != 
GDK_SUCCEED ||
@@ -195,11 +237,10 @@ MOSlayout(BAT *b, BAT *btech, BAT *bcoun
 
 /*
  * Compression is focussed on a single column.
- * Multiple compression techniques are applied at the same time.
+ * Multiple compression MOSmethods are applied at the same time.
  */
 
 #define MOSnewBlk(TASK)\
-                       MOSsetTag(TASK->blk,MOSAIC_EOL);\
                        MOSsetCnt(TASK->blk,0);\
                        TASK->dst = MOScodevector(TASK);
 
@@ -220,13 +261,13 @@ static str
 MOSprepareEstimate(MOStask task) {
 
        str error;
-       if (task->filter[MOSAIC_CAPPED]){
+       if (METHOD_IS_SET(task->mask, MOSAIC_CAPPED)){
                if ( (error = MOSprepareEstimate_capped(task))) {
                        return error;
                }
        }
 
-       if (task->filter[MOSAIC_VAR]){
+       if (METHOD_IS_SET(task->mask, MOSAIC_VAR)){
                if ( (error = MOSprepareEstimate_var(task))) {
                        return error;
                }
@@ -267,28 +308,28 @@ static str MOSestimate_inner_##TPE(MOSta
        }\
 \
        /* select candidate amongst those*/\
-       if (task->filter[MOSAIC_RLE]) {\
-               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, runlength,   TPE, 
MOSAIC_RLE);\
+       if (METHOD_IS_SET(task->mask, MOSAIC_RLE))              {\
+               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, runlength, TPE, 
MOSAIC_RLE);\
        }\
-       if (task->filter[MOSAIC_DELTA]) {\
-               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, delta,       TPE, 
MOSAIC_DELTA);\
+       if (METHOD_IS_SET(task->mask, MOSAIC_DELTA))    {\
+               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, delta, TPE, 
MOSAIC_DELTA);\
        }\
-       if (task->filter[MOSAIC_LINEAR]) {\
-               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, linear,      TPE, 
MOSAIC_LINEAR);\
+       if (METHOD_IS_SET(task->mask, MOSAIC_LINEAR))   {\
+               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, linear, TPE, 
MOSAIC_LINEAR);\
        }\
-       if (task->filter[MOSAIC_FRAME]) {\
-               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, frame,       TPE, 
MOSAIC_FRAME);\
+       if (METHOD_IS_SET(task->mask, MOSAIC_FRAME))    {\
+               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, frame, TPE, 
MOSAIC_FRAME);\
        }\
-       if (task->filter[MOSAIC_PREFIX]) {\
-               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, prefix,      TPE, 
MOSAIC_PREFIX);\
+       if (METHOD_IS_SET(task->mask, MOSAIC_PREFIX))   {\
+               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, prefix, TPE, 
MOSAIC_PREFIX);\
        }\
-       if (task->filter[MOSAIC_CAPPED]) {\
-               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, capped,      TPE, 
MOSAIC_CAPPED);\
+       if (METHOD_IS_SET(task->mask, MOSAIC_CAPPED))   {\
+               DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, capped, TPE, 
MOSAIC_CAPPED);\
        }\
-       if (task->filter[MOSAIC_VAR]) {\
+       if (METHOD_IS_SET(task->mask, MOSAIC_VAR))              {\
                DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, var, TPE, 
MOSAIC_VAR);\
        }\
-       if (task->filter[MOSAIC_RAW]) {\
+       if (METHOD_IS_SET(task->mask, MOSAIC_RAW))              {\
                DO_OPERATION_IF_ALLOWED_VARIADIC(estimate, raw, TPE, 
MOSAIC_RAW);\
        }\
 \
@@ -336,8 +377,8 @@ static str MOSestimate_##TPE(MOStask tas
                .var_limit = &var_limit,\
                .nr_capped_encoded_elements = 0,\
                .nr_capped_encoded_blocks = 0,\
-               .compression_strategy = {.tag = MOSAIC_EOL, .cnt = 0},\
-               .must_be_merged_with_previous = false,\
+               .compression_strategy = {.cnt = 0},\
+               .must_be_merged_with_previous = false\
        };\
 \
        MosaicEstimation current;\
@@ -351,7 +392,7 @@ static str MOSestimate_##TPE(MOStask tas
                }\
 \
                if (!current.is_applicable) {\
-                       throw(MAL,"mosaic.compress", "Cannot compress BAT with 
given compression techniques.");\
+                       throw(MAL,"mosaic.compress", "Cannot compress BAT with 
given compression MOSmethods.");\
                }\
 \
                if (current.must_be_merged_with_previous) {\
@@ -408,12 +449,12 @@ MOSfinalizeDictionary(MOStask task) {
 
        str error;
 
-       if (task->filter[MOSAIC_VAR]) {
+       if (METHOD_IS_SET(task->mask, MOSAIC_VAR)) {
                if ((error = finalizeDictionary_var(task))) {
                        return error;
                }
        }
-       if (task->filter[MOSAIC_CAPPED]) {
+       if (METHOD_IS_SET(task->mask, MOSAIC_CAPPED)) {
                if ((error = finalizeDictionary_capped(task))) {
                        return error;
                }
@@ -514,14 +555,13 @@ MOScompressInternal(BAT* bsrc, const cha
                // or when we extend the non-compressed collector block
                throw(MAL,"mosaic.compress", "heap construction failes");
        }
-
        assert(bsrc->tmosaic->parentid == bsrc->batCacheid);
 
        if((task = (MOStask) GDKzalloc(sizeof(*task))) == NULL) {
                MOSdestroy(bsrc);
                throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL);
        }
-       
+
        // initialize the non-compressed read pointer
        task->src = Tloc(bsrc, 0);
        task->start = 0;
@@ -531,15 +571,30 @@ MOScompressInternal(BAT* bsrc, const cha
        MOSinit(task,bsrc);
        task->blk->cnt= 0;
        MOSinitHeader(task);
-       if (MOSinitializeFilter(task, compressions)) {
+
+       char* copy = NULL;
+       
+       if (compressions) {
+               copy = GDKzalloc(strlen(compressions)+1);
+               strcpy(copy, compressions);
+               if (copy == NULL) {
+                       MOSdestroy(bsrc);
+                       throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL);
+               }
+       }
+       sht compression_mask;
+       construct_compression_mask(&compression_mask, copy);
+
+       if (copy) {
+               GDKfree(copy);
+       }
+
+       if (MOSinitializeFilter(task, compression_mask)) {
                msg = createException(MAL, "mosaic.compress", "No valid 
compression technique given or available for type: %s", ATOMname(task->type));
                MOSdestroy(bsrc);
                goto finalize;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to