Changeset: 8df0f5e447f3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8df0f5e447f3
Modified Files:
        monetdb5/modules/mosaic/mosaic.c
        monetdb5/modules/mosaic/mosaic.h
        monetdb5/modules/mosaic/mosaic_calendar.c
        monetdb5/modules/mosaic/mosaic_calendar.h
        monetdb5/modules/mosaic/mosaic_delta.c
        monetdb5/modules/mosaic/mosaic_delta.h
        monetdb5/modules/mosaic/mosaic_dictionary.c
        monetdb5/modules/mosaic/mosaic_dictionary.h
        monetdb5/modules/mosaic/mosaic_frame.c
        monetdb5/modules/mosaic/mosaic_frame.h
        monetdb5/modules/mosaic/mosaic_linear.c
        monetdb5/modules/mosaic/mosaic_linear.h
        monetdb5/modules/mosaic/mosaic_prefix.c
        monetdb5/modules/mosaic/mosaic_prefix.h
        monetdb5/modules/mosaic/mosaic_raw.c
        monetdb5/modules/mosaic/mosaic_raw.h
        monetdb5/modules/mosaic/mosaic_runlength.c
        monetdb5/modules/mosaic/mosaic_runlength.h
        sql/backends/monet5/sql_cat.c
        sql/test/mosaic/Tests/All
Branch: mosaic
Log Message:

Refactoring and check if data type and compression match


diffs (truncated from 603 to 300 lines):

diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -26,6 +26,22 @@
 
 char 
*MOSfiltername[]={"raw","runlength","dictionary","delta","linear","frame","prefix","calendar","EOL"};
 
+bool type_allowed(int compression, BAT* b) {
+       switch (compression) {
+       case MOSAIC_RAW:                return MOStypes_raw(b);
+       case MOSAIC_RLE:                return MOStypes_runlength(b);
+       case MOSAIC_DICT:               return MOStypes_dictionary(b);
+       case MOSAIC_DELTA:              return MOStypes_delta(b);
+       case MOSAIC_LINEAR:             return MOStypes_runlength(b);
+       case MOSAIC_FRAME:              return MOStypes_frame(b);
+       case MOSAIC_PREFIX:             return MOStypes_prefix(b);
+       case MOSAIC_CALENDAR:   return MOStypes_calendar(b);
+       default: /* should not happen*/ assert(0);
+       }
+
+       return false;
+}
+
 static void
 MOSinit(MOStask task, BAT *b){
        char *base;
@@ -216,18 +232,15 @@ MOSoptimizerCost(Client cntxt, MOStask t
        return cand;
 }
 
-/* the source is extended with a BAT mosaic mirror */
+/* the source is extended with a BAT mosaic heap */
 str
-MOScompressInternal(Client cntxt, bat *bid, MOStask task)
+MOScompressInternal(Client cntxt, BAT* bsrc, MOStask task)
 {
-       BAT *o = NULL, *bsrc;           // the BAT to be augmented with a 
compressed heap
+       BAT *o = NULL;          // the BAT to be augmented with a compressed 
heap
        str msg = MAL_SUCCEED;
        int cand;
        int tpe, typewidth;
        lng t0,t1;
-       
-       if ((bsrc = BATdescriptor(*bid)) == NULL)
-               throw(MAL, "mosaic.compress", INTERNAL_BAT_ACCESS);
 
        switch( tpe =ATOMbasetype(bsrc->ttype)){
        case TYPE_bit:
@@ -246,52 +259,36 @@ MOScompressInternal(Client cntxt, bat *b
                break;
        default:
                // don't compress it
-               BBPunfix(bsrc->batCacheid);
                return MAL_SUCCEED;
        }
 
     if (BATcheckmosaic(bsrc)){
                /* already compressed */
-               BBPunfix(bsrc->batCacheid);
-               return msg;
+               return MAL_SUCCEED;
        }
     assert(bsrc->tmosaic == NULL);
 
-       /* views are never compressed */
-    if (VIEWtparent(bsrc)) {
-        bat p = VIEWtparent(bsrc);
-        o = bsrc;
-        bsrc = BATdescriptor(p);
-        if (BATcheckmosaic(bsrc)) {
-                       BBPunfix(o->batCacheid);
-            return MAL_SUCCEED;
-        }
-        assert(bsrc->tmosaic == NULL);
-    }
-
        if ( BATcount(bsrc) < MOSAIC_THRESHOLD  ){
                /* no need to compress */
-               BBPunfix(bsrc->batCacheid);
-               return msg;
+               return MAL_SUCCEED;
        }
 
 #ifdef _DEBUG_MOSAIC_
-       mnstr_printf(cntxt->fdout,"#compress bat %d \n",*bid);
+       mnstr_printf(cntxt->fdout,"#compress bat %d \n",bsrc->batCacheid);
 #endif
     t0 = GDKusec();
 
-       if( bsrc->tmosaic == NULL && BATmosaic(bsrc,  BATcapacity(bsrc) + 
(MosaicHdrSize + 2 * MosaicBlkSize)/Tsize(bsrc)+ BATTINY) == GDK_FAIL){
+       if(BATmosaic(bsrc,  BATcapacity(bsrc) + (MosaicHdrSize + 2 * 
MosaicBlkSize)/Tsize(bsrc)+ BATTINY) == GDK_FAIL){
                // create the mosaic heap if not available.
                // The final size should be smaller then the original
                // It may, however, be the case that we mix a lot of RAW and, 
say, DELTA small blocks
                // Then we total size may go beyond the original size and we 
should terminate the process.
                // This should be detected before we compress a block, in the 
estimate functions
                // or when we extend the non-compressed collector block
-               BBPunfix(bsrc->batCacheid);
                throw(MAL,"mosaic.compress", "heap construction failes");
        }
 
-       assert(bsrc->tmosaic->parentid == *bid);
+       assert(bsrc->tmosaic->parentid == bsrc->batCacheid);
        
        // initialize the non-compressed read pointer
        task->src = Tloc(bsrc, 0);
@@ -308,7 +305,6 @@ MOScompressInternal(Client cntxt, bat *b
        msg = MCstartMaintenance(cntxt,1,0);
        if( msg != MAL_SUCCEED){
                GDKfree(task);
-               BBPunfix(bsrc->batCacheid);
                throw(MAL, "mosaic.compress", "Can not claim server");
        }
        if( task->filter[MOSAIC_FRAME])
@@ -417,7 +413,6 @@ MOScompressInternal(Client cntxt, bat *b
        task->ratio = task->hdr->ratio = (flt)task->bsrc->theap.free/ 
task->bsrc->tmosaic->free;
 finalize:
        MCexitMaintenance(cntxt);
-       BBPunfix(bsrc->batCacheid);
 
     t1 = GDKusec();
     ALGODEBUG fprintf(stderr, "#BATmosaic: mosaic construction " LLFMT " 
usec\n", t1 - t0);
@@ -442,6 +437,12 @@ MOScompress(Client cntxt, MalBlkPtr mb, 
        if ((b = BATdescriptor(*bid)) == NULL)
                throw(MAL, "mosaic.compress", INTERNAL_BAT_ACCESS);
 
+       /* views are never compressed */
+    if (VIEWtparent(b)) {
+               BBPunfix(b->batCacheid);
+               throw(MAL, "mosaic.compress", "Mosaic does not allow views as 
input.");
+    }
+
        (void) mb;
        task= (MOStask) GDKzalloc(sizeof(*task));
        if( task == NULL){
@@ -452,13 +453,13 @@ MOScompress(Client cntxt, MalBlkPtr mb, 
        if( pci->argc == 3)
                msg = *getArgReference_str(stk,pci,2);
        if( msg && !strstr(msg,"mosaic"))
-               for( i = 0; i< MOSAIC_METHODS; i++)
-                       task->filter[i]= strstr(msg,MOSfiltername[i]) != 0;
+               for( i = 0; i< MOSAIC_METHODS-1; i++)
+                       task->filter[i]= strstr(msg,MOSfiltername[i]) != 0 && 
type_allowed(i, b);
        else
-               for( i = 0; i< MOSAIC_METHODS; i++)
-                       task->filter[i]= 1;
+               for( i = 0; i< MOSAIC_METHODS-1; i++)
+                       task->filter[i]= type_allowed(i, b);
 
-       msg= MOScompressInternal(cntxt, bid, task);
+       msg= MOScompressInternal(cntxt, b, task);
        BBPkeepref(*ret = b->batCacheid);
        GDKfree(task);
        return msg;
@@ -1132,14 +1133,14 @@ MOSjoin(Client cntxt, MalBlkPtr mb, MalS
 
 #define STEP MOSAIC_METHODS
 static int
-makepatterns(int *patterns, int size, str compressions)
+makepatterns(int *patterns, int size, str compressions, BAT* b)
 {
        int i,j,k, idx, bit=1, step = MOSAIC_METHODS - 1;
        int lim= 8*7*6*5*4*3*2;
        int candidate[MOSAIC_METHODS]= {0};
 
-       for( i = 0; i < MOSAIC_METHODS; i++)
-               candidate[i] = compressions == NULL || 
strstr(compressions,MOSfiltername[i]) != 0;
+       for( i = 0; i < MOSAIC_METHODS-1; i++)
+               candidate[i] = (compressions == NULL || 
strstr(compressions,MOSfiltername[i]) != 0) &&  type_allowed(i, b);
 
        for( k=0, i=0; i<lim && k <size; i++){
                patterns[k]=0;
@@ -1208,7 +1209,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B
        if( task == NULL)
                return;
        // create the list of all possible 2^6 compression patterns 
-       cases = makepatterns(pattern,CANDIDATES, compressions);
+       cases = makepatterns(pattern,CANDIDATES, compressions, b);
        memset((char*)pat,0, sizeof(pat));
 
        for( i = 0; i < CANDIDATES; i++)
@@ -1235,7 +1236,10 @@ MOSanalyseReport(Client cntxt, BAT *b, B
                }
                pat[i].technique= GDKstrdup(buf);
                pat[i].clk1 = GDKms();
-               MOScompressInternal(cntxt, &bid, task);
+
+               // TODO: keep a potentially pre-existing mosaic_heap aside.
+
+               MOScompressInternal(cntxt, b, task);
                pat[i].clk1 = GDKms()- pat[i].clk1;
                
 #ifdef _DEBUG_MOSAIC_
@@ -1259,19 +1263,14 @@ MOSanalyseReport(Client cntxt, BAT *b, B
                pat[i].xf= task->ratio;
 
                BAT* decompressed;
-
                pat[i].clk2 = GDKms();
-
                MOSdecompressInternal(cntxt, &decompressed, b);
-
                pat[i].clk2 = GDKms()- pat[i].clk2;
-
                MOSdestroy(decompressed);
-
                BBPunfix(decompressed->batCacheid);
 
                // get rid of mosaic heap
-               MOSdestroy(BBPdescriptor(bid));
+               MOSdestroy(b);
        }
 
        qsort((void*) pat, CANDIDATES, sizeof(struct PAT), cmpPattern);
diff --git a/monetdb5/modules/mosaic/mosaic.h b/monetdb5/modules/mosaic/mosaic.h
--- a/monetdb5/modules/mosaic/mosaic.h
+++ b/monetdb5/modules/mosaic/mosaic.h
@@ -188,9 +188,10 @@ if ( task->n && task->cl ){\
 
 
 mal_export char *MOSfiltername[];
+mal_export bool type_allowed(int compression, BAT* b);
 mal_export str MOScompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str MOSdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
-mal_export str MOScompressInternal(Client cntxt, bat *bid, MOStask task); // 
TODO: I don't like to export internal functions
+mal_export str MOScompressInternal(Client cntxt, BAT* bsrc, MOStask task); // 
TODO: I don't like to export internal functions
 mal_export str MOSanalyse(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str MOSselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str MOSthetaselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
diff --git a/monetdb5/modules/mosaic/mosaic_calendar.c 
b/monetdb5/modules/mosaic/mosaic_calendar.c
--- a/monetdb5/modules/mosaic/mosaic_calendar.c
+++ b/monetdb5/modules/mosaic/mosaic_calendar.c
@@ -23,6 +23,10 @@
 #define MASKDAY 037
 #define MASKBITS 5
 
+bool MOStypes_calendar(BAT* b) {
+       return ATOMbasetype(getBatType(b->ttype)) == TYPE_date /* TODO: || type 
== TYPE_timestamp || type == TYPE_daytime*/;
+}
+
 void
 MOSadvance_calendar(Client cntxt, MOStask task)
 {
diff --git a/monetdb5/modules/mosaic/mosaic_calendar.h 
b/monetdb5/modules/mosaic/mosaic_calendar.h
--- a/monetdb5/modules/mosaic/mosaic_calendar.h
+++ b/monetdb5/modules/mosaic/mosaic_calendar.h
@@ -20,6 +20,7 @@
 #define TEMPORALTHRESHOLD 4192
 #define TEMPORALSIZE 256 
 
+bool MOStypes_calendar(BAT* b);
 mal_export void MOScreatecalendar(Client cntxt, MOStask task);
 mal_export void MOSlayout_calendar(Client cntxt, MOStask task, BAT *btech, BAT 
*bcount, BAT *binput, BAT *boutput, BAT *bproperties);
 mal_export void MOSlayout_calendar_hdr(Client cntxt, MOStask task, BAT *btech, 
BAT *bcount, BAT *binput, BAT *boutput, BAT *bproperties);
diff --git a/monetdb5/modules/mosaic/mosaic_delta.c 
b/monetdb5/modules/mosaic/mosaic_delta.c
--- a/monetdb5/modules/mosaic/mosaic_delta.c
+++ b/monetdb5/modules/mosaic/mosaic_delta.c
@@ -18,6 +18,29 @@
 
 //#define _DEBUG_MOSAIC_
 
+bool MOStypes_delta(BAT* b) {
+       switch(ATOMbasetype(getBatType(b->ttype))){
+       case TYPE_sht: return true;
+       case TYPE_int: return true;
+       case TYPE_lng: return true;
+       case TYPE_oid: return true;
+       /* TODO: case TYPE_flt: return true; */
+       /* TODO: case TYPE_dbl: return true; */
+#ifdef HAVE_HGE
+       case TYPE_hge: return true;
+#endif
+       case  TYPE_str:
+               switch(b->twidth){
+               case 2: return true;
+               case 4: return true;
+               case 8: return true;
+               }
+               break;
+       }
+
+       return false;
+}
+
 void
 MOSadvance_delta(Client cntxt, MOStask task)
 {
diff --git a/monetdb5/modules/mosaic/mosaic_delta.h 
b/monetdb5/modules/mosaic/mosaic_delta.h
--- a/monetdb5/modules/mosaic/mosaic_delta.h
+++ b/monetdb5/modules/mosaic/mosaic_delta.h
@@ -17,6 +17,7 @@
 #include "mal_interpreter.h"
 #include "mal_client.h"
 
+bool MOStypes_delta(BAT* b);
 mal_export void MOSlayout_delta(Client cntxt, MOStask task, BAT *btech, BAT 
*bcount, BAT *binput, BAT *boutput, BAT *bproperties);
 mal_export void MOSadvance_delta(Client cntxt, MOStask task);
 mal_export void MOSskip_delta(Client cntxt, MOStask task);
diff --git a/monetdb5/modules/mosaic/mosaic_dictionary.c 
b/monetdb5/modules/mosaic/mosaic_dictionary.c
--- a/monetdb5/modules/mosaic/mosaic_dictionary.c
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to