Changeset: 52f0c4422b43 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=52f0c4422b43
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/exports.stable.out
        monetdb5/modules/mal/groupby.c
        monetdb5/modules/mal/groupby.h
        monetdb5/modules/mal/groupby.mal
        monetdb5/optimizer/opt_groups.c
        monetdb5/optimizer/opt_pipes.c
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
        sql/test/Tests/setoptimizer.stable.err
        sql/test/Tests/setoptimizer.stable.out
Branch: default
Log Message:

Cleanup of the groups optimizer
Properly catch the multi-column group by operations
for further optimization.
Code should first be synchronized with mergetable before
activated and a proper re-ordering test should proof its validity


diffs (truncated from 323 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -39292,6 +39292,10 @@ command geom.point(x:dbl,y:dbl):wkb
 address wkbcreatepoint;
 comment Construct a point from two geometries
 
+pattern group.multicolumn(b:bat[:oid,:any]...) 
(ref:bat[:oid,:oid],grp:bat[:oid,:oid],hist:bat[:oid,:any]) 
+address GROUPmulticolumngroup;
+comment Derivation of a group index over multiple columns.
+
 command 
group.subgroupdone(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:oid],h:bat[:oid,:wrd])
 (groups:bat[:oid,:oid],extents:bat[:oid,:oid],histo:bat[:oid,:wrd]) 
 address GRPsubgroup4;
 command group.subgroupdone(b:bat[:oid,:any_1],g:bat[:oid,:oid]) 
(groups:bat[:oid,:oid],extents:bat[:oid,:oid],histo:bat[:oid,:wrd]) 
@@ -39304,10 +39308,6 @@ command group.subgroup(b:bat[:oid,:any_1
 address GRPsubgroup2;
 command group.subgroup(b:bat[:oid,:any_1]) 
(groups:bat[:oid,:oid],extents:bat[:oid,:oid],histo:bat[:oid,:wrd]) 
 address GRPsubgroup1;
-pattern group.subgroup(b:bat[:oid,:any]...) 
(ref:bat[:oid,:oid],grp:bat[:oid,:oid],hist:bat[:oid,:any]) 
-address GROUPmulticolumn;
-comment Derivation of a group index over multiple columns.
-
 command identifier.#fromstr():void 
 address IDfromString;
 comment Convert a string to an identifier without any check
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1382,7 +1382,7 @@ str FCTgetOwners(int *ret);
 str FCTgetPlants(int *ret, int *ret2);
 str FCTsetLocation(int *ret, str *loc);
 str FCTshutdown(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
-str GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
+str GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 str GRPsubgroup1(bat *ngid, bat *next, bat *nhis, bat *bid);
 str GRPsubgroup2(bat *ngid, bat *next, bat *nhis, bat *bid, bat *gid);
 str GRPsubgroup4(bat *ngid, bat *next, bat *nhis, bat *bid, bat *gid, bat 
*eid, bat *hid);
@@ -2776,7 +2776,7 @@ size_t monet_memory;
 void moveInstruction(MalBlkPtr mb, int pc, int target);
 str mtimeRef;
 str mulRef;
-str multicolumnsRef;
+str multicolumnRef;
 str multiplexRef;
 str mvcRef;
 InstrPtr newAssignment(MalBlkPtr mb);
diff --git a/monetdb5/modules/mal/groupby.c b/monetdb5/modules/mal/groupby.c
--- a/monetdb5/modules/mal/groupby.c
+++ b/monetdb5/modules/mal/groupby.c
@@ -162,7 +162,7 @@ GROUPdelete(AGGRtask *a){
  */
 
 str
-GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
        bat *grp = (bat *) getArgReference(stk, pci, 0);
        bat *ext = (bat *) getArgReference(stk, pci, 1);
diff --git a/monetdb5/modules/mal/groupby.h b/monetdb5/modules/mal/groupby.h
--- a/monetdb5/modules/mal/groupby.h
+++ b/monetdb5/modules/mal/groupby.h
@@ -35,6 +35,6 @@
 #define group_by_export extern
 #endif
 
-group_by_export str GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr 
stk, InstrPtr pci);
+group_by_export str GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, 
MalStkPtr stk, InstrPtr pci);
 
 #endif /* _GROUPBY_H */
diff --git a/monetdb5/modules/mal/groupby.mal b/monetdb5/modules/mal/groupby.mal
--- a/monetdb5/modules/mal/groupby.mal
+++ b/monetdb5/modules/mal/groupby.mal
@@ -17,6 +17,6 @@
 
 module group;
 
-pattern group.subgroup(b:bat[:oid,:any]...)(ref:bat[:oid,:oid], 
grp:bat[:oid,:oid], hist:bat[:oid,:any])
-address GROUPmulticolumn
+pattern group.multicolumn(b:bat[:oid,:any]...)(ref:bat[:oid,:oid], 
grp:bat[:oid,:oid], hist:bat[:oid,:any])
+address GROUPmulticolumngroup
 comment "Derivation of a group index over multiple columns.";
diff --git a/monetdb5/optimizer/opt_groups.c b/monetdb5/optimizer/opt_groups.c
--- a/monetdb5/optimizer/opt_groups.c
+++ b/monetdb5/optimizer/opt_groups.c
@@ -22,25 +22,41 @@
 #include "group.h"
 
 int
-OPTgroupsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
+OPTgroupsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci)
 {
-       int i, actions=0;
-       InstrPtr q;
+       int i, j, actions=0;
+       InstrPtr q,p;
        InstrPtr *old, *ref;
        int limit,slimit;
+       int *used;
 
        (void) cntxt;
        (void) stk;
+       (void) pci;
        if (varGetProp(mb, getArg(mb->stmt[0], 0), inlineProp) != NULL) {
                return 0;
        }
 
+// Code should first be synchronized with mergetable
+// And a proper re-ordering test should proof its validity
+       if (1)
+               return 0;
+
        /* beware, new variables and instructions are introduced */
        ref= (InstrPtr*) GDKzalloc(sizeof(InstrPtr) * mb->vtop); /* to find 
last assignment */
        if ( ref == NULL) {
                return 0;
        }
+       used= (int*) GDKzalloc(sizeof(InstrPtr) * mb->vtop); /* use count  */
+       if( used == NULL){
+               GDKfree(ref);
+               return 0;
+       }
 
+       OPTDEBUGgroups {
+               mnstr_printf(cntxt->fdout,"Group by optimizer\n");
+               printFunction(cntxt->fdout,mb,0,LIST_MAL_STMT);
+       }
        old= mb->stmt;
        limit= mb->stop;
        slimit= mb->ssize;
@@ -49,39 +65,53 @@ OPTgroupsImplementation(Client cntxt, Ma
                return 0;
        }
 
+       // determine use count for all variables
        for (i = 0; i<limit; i++){
                p= old[i];
-               if (getModuleId(p) == groupRef && p->argc == 4 && 
getFunctionId(p) == subgroupRef ){
-                       //setModuleId(p, groupRef);
-                       //setFunctionId(p, multicolumnsRef);
-                       ref[getArg(p,0)] = p;
-                       actions++;
-                       OPTDEBUGgroups {
-                               mnstr_printf(cntxt->fdout,"#new groups 
instruction\n");
-                               printInstruction(cntxt->fdout,mb, 0, p, 
LIST_MAL_ALL);
-                       }
+               for(j= p->retc; j<p->argc; j++)
+                       ref[getArg(p,j)]++;
+       }
+       
+
+       for (i = 0; i<limit; i++){
+               p= old[i];
+               if (getModuleId(p) == groupRef ){
+                       if (p->argc == 4 && getFunctionId(p) == subgroupRef  && 
used[getArg(p,1)] ==0 && used[getArg(p,2)] == 0){
+                               setFunctionId(p, multicolumnRef);
+                               ref[getArg(p,0)] = p;
+                               actions++;
+                               OPTDEBUGgroups {
+                                       mnstr_printf(cntxt->fdout,"#new groups 
instruction\n");
+                                       printInstruction(cntxt->fdout,mb, 0, p, 
LIST_MAL_ALL);
+                               }
+                       } else
+                       if (p->argc == 5 && getFunctionId(p) == subgroupRef  && 
(q= ref[getArg(p,p->argc-1)]) && used[getArg(p,1)] ==0 && used[getArg(p,2)] == 
0){
+                               p->argc--;
+                               for( j = q->argc-1; j>= q->retc; j--)
+                                       p = setArgument(mb,p,p->retc, 
getArg(q,j));
+                               ref[getArg(p,0)] = p;
+                               setFunctionId(p, multicolumnRef);
+                               OPTDEBUGgroups{
+                                       mnstr_printf(cntxt->fdout,"#new groups 
instruction extension\n");
+                                       printInstruction(cntxt->fdout,mb, 0, p, 
LIST_MAL_ALL);
+                               }
+                       } else
+                       if (p->argc == 5 && getFunctionId(p) == subgroupdoneRef 
&& (q= ref[getArg(p,p->argc-1)]) ){
+                               /*
+                                * Expand its argument list with what we have 
found so far.
+                                * This creates a series of derive paths, many 
of which will be removed during deadcode elimination.
+                                */
+                               p->argc--;
+                               for( j = q->argc-1; j>= q->retc; j--)
+                                       p = setArgument(mb,p,p->retc, 
getArg(q,j));
+                               ref[getArg(p,0)] = p;
+                               setFunctionId(p, multicolumnRef);
+                               OPTDEBUGgroups{
+                                       mnstr_printf(cntxt->fdout,"#new groups 
instruction extension\n");
+                                       printInstruction(cntxt->fdout,mb, 0, p, 
LIST_MAL_ALL);
+                               }
+                       } 
                }
-               if (getModuleId(p) == groupRef && p->argc == 5 && 
getFunctionId(p) == subgroupdoneRef && ref[getArg(p,4)] != NULL){
-                       /*
-                        * Try to expand its argument list with what we have 
found so far.
-                        * This creates a series of derive paths, many of which 
will be removed during deadcode elimination.
-                        */
-                       q= copyInstruction(ref[getArg(p,4)]);
-                       q= pushArgument(mb, q, getArg(p,3));
-                       getArg(q,0) = getArg(p,0);
-                       setVarType(mb,getArg(q,0),getArgType(mb,p,0));
-                       getArg(q,1) = getArg(p,1);
-                       setVarType(mb,getArg(q,1),getArgType(mb,p,1));
-                       getArg(q,2) = getArg(p,2);
-                       setVarType(mb,getArg(q,2),getArgType(mb,p,2));
-                       ref[getArg(q,0)] = q;
-                       freeInstruction(p);
-                       p= q;
-                       OPTDEBUGgroups{
-                               mnstr_printf(cntxt->fdout,"#new groups 
instruction extension\n");
-                               printInstruction(cntxt->fdout,mb, 0, p, 
LIST_MAL_ALL);
-                       }
-               } 
                pushInstruction(mb,p);
        }
        for(; i<slimit; i++)
@@ -89,9 +119,10 @@ OPTgroupsImplementation(Client cntxt, Ma
                        freeInstruction(old[i]);
        GDKfree(old);
        GDKfree(ref);
+       GDKfree(used);
        OPTDEBUGgroups if( actions) {
                mnstr_printf(cntxt->fdout,"Result of group by optimizer\n");
-               printFunction(cntxt->fdout,mb,0,LIST_MAL_ALL);
+               printFunction(cntxt->fdout,mb,0,LIST_MAL_STMT);
        }
        return actions;
 }
diff --git a/monetdb5/optimizer/opt_pipes.c b/monetdb5/optimizer/opt_pipes.c
--- a/monetdb5/optimizer/opt_pipes.c
+++ b/monetdb5/optimizer/opt_pipes.c
@@ -108,7 +108,7 @@ struct PIPELINES {
         "optimizer.mergetable();"
         "optimizer.deadcode();"
         "optimizer.commonTerms();"
-        //"optimizer.groups();"
+        "optimizer.groups();"
         "optimizer.joinPath();"
         "optimizer.reorder();"
         "optimizer.deadcode();"
diff --git a/monetdb5/optimizer/opt_prelude.c b/monetdb5/optimizer/opt_prelude.c
--- a/monetdb5/optimizer/opt_prelude.c
+++ b/monetdb5/optimizer/opt_prelude.c
@@ -149,6 +149,7 @@ str markTRef;
 str mark_grpRef;
 str materializeRef;
 str mtimeRef;
+str multicolumnRef;
 str dense_rank_grpRef;
 str matRef;
 str max_no_nilRef;
@@ -165,7 +166,6 @@ str mkeyRef;
 str mmathRef;
 str multiplexRef;
 str manifoldRef;
-str multicolumnsRef;
 str mvcRef;
 str newRef;
 str notRef;
@@ -421,6 +421,7 @@ void optimizerInit(void){
                mark_grpRef = putName("mark_grp", 8);
                materializeRef = putName("materialize", 11);
                mtimeRef = putName("mtime", 5);
+               multicolumnRef = putName("multicolumn", 11);
                dense_rank_grpRef = putName("dense_rank_grp", 14);
                matRef = putName("mat", 3);
                max_no_nilRef = putName("max_no_nil", 10);
@@ -437,7 +438,6 @@ void optimizerInit(void){
                mmathRef = putName("mmath", 5);
                multiplexRef = putName("multiplex", 9);
                manifoldRef = putName("manifold", 8);
-               multicolumnsRef = putName("multicolumns", 12);
                mvcRef = putName("mvc", 3);
                newRef = putName("new",3);
                notRef = putName("not",3);
diff --git a/monetdb5/optimizer/opt_prelude.h b/monetdb5/optimizer/opt_prelude.h
--- a/monetdb5/optimizer/opt_prelude.h
+++ b/monetdb5/optimizer/opt_prelude.h
@@ -164,7 +164,7 @@ opt_export  str mkeyRef;
 opt_export  str mmathRef;
 opt_export  str multiplexRef;
 opt_export  str manifoldRef;
-opt_export  str multicolumnsRef;
+opt_export  str multicolumnRef;
 opt_export  str mvcRef;
 opt_export  str newRef;
 opt_export  str notRef;
diff --git 
a/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err 
b/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
--- a/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
+++ b/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
@@ -32,8 +32,10 @@ stderr of test 'constants-optimizer.Bug-
 # 00:10:51 >  mclient -lsql -ftest -i -e --host=eir --port=37160 
 # 00:10:51 >  
 
-MAPI  = (monetdb) /var/tmp/mtest-17482/.s.monetdb.35717
+MAPI  = (monetdb) /var/tmp/mtest-15619/.s.monetdb.35375
 QUERY = set optimizer='dictionary_pipe';
+ERROR = !optimizer 'dictionary_pipe' unknown
+
 
 
 
diff --git a/sql/test/Tests/setoptimizer.stable.err 
b/sql/test/Tests/setoptimizer.stable.err
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to