Changeset: 52f0c4422b43 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=52f0c4422b43
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/exports.stable.out
monetdb5/modules/mal/groupby.c
monetdb5/modules/mal/groupby.h
monetdb5/modules/mal/groupby.mal
monetdb5/optimizer/opt_groups.c
monetdb5/optimizer/opt_pipes.c
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
sql/test/Tests/setoptimizer.stable.err
sql/test/Tests/setoptimizer.stable.out
Branch: default
Log Message:
Cleanup of the groups optimizer
Properly catch the multi-column group by operations
for further optimization.
Code should first be synchronized with mergetable before
activated and a proper re-ordering test should proof its validity
diffs (truncated from 323 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -39292,6 +39292,10 @@ command geom.point(x:dbl,y:dbl):wkb
address wkbcreatepoint;
comment Construct a point from two geometries
+pattern group.multicolumn(b:bat[:oid,:any]...)
(ref:bat[:oid,:oid],grp:bat[:oid,:oid],hist:bat[:oid,:any])
+address GROUPmulticolumngroup;
+comment Derivation of a group index over multiple columns.
+
command
group.subgroupdone(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:oid],h:bat[:oid,:wrd])
(groups:bat[:oid,:oid],extents:bat[:oid,:oid],histo:bat[:oid,:wrd])
address GRPsubgroup4;
command group.subgroupdone(b:bat[:oid,:any_1],g:bat[:oid,:oid])
(groups:bat[:oid,:oid],extents:bat[:oid,:oid],histo:bat[:oid,:wrd])
@@ -39304,10 +39308,6 @@ command group.subgroup(b:bat[:oid,:any_1
address GRPsubgroup2;
command group.subgroup(b:bat[:oid,:any_1])
(groups:bat[:oid,:oid],extents:bat[:oid,:oid],histo:bat[:oid,:wrd])
address GRPsubgroup1;
-pattern group.subgroup(b:bat[:oid,:any]...)
(ref:bat[:oid,:oid],grp:bat[:oid,:oid],hist:bat[:oid,:any])
-address GROUPmulticolumn;
-comment Derivation of a group index over multiple columns.
-
command identifier.#fromstr():void
address IDfromString;
comment Convert a string to an identifier without any check
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1382,7 +1382,7 @@ str FCTgetOwners(int *ret);
str FCTgetPlants(int *ret, int *ret2);
str FCTsetLocation(int *ret, str *loc);
str FCTshutdown(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
-str GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
+str GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
str GRPsubgroup1(bat *ngid, bat *next, bat *nhis, bat *bid);
str GRPsubgroup2(bat *ngid, bat *next, bat *nhis, bat *bid, bat *gid);
str GRPsubgroup4(bat *ngid, bat *next, bat *nhis, bat *bid, bat *gid, bat
*eid, bat *hid);
@@ -2776,7 +2776,7 @@ size_t monet_memory;
void moveInstruction(MalBlkPtr mb, int pc, int target);
str mtimeRef;
str mulRef;
-str multicolumnsRef;
+str multicolumnRef;
str multiplexRef;
str mvcRef;
InstrPtr newAssignment(MalBlkPtr mb);
diff --git a/monetdb5/modules/mal/groupby.c b/monetdb5/modules/mal/groupby.c
--- a/monetdb5/modules/mal/groupby.c
+++ b/monetdb5/modules/mal/groupby.c
@@ -162,7 +162,7 @@ GROUPdelete(AGGRtask *a){
*/
str
-GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
bat *grp = (bat *) getArgReference(stk, pci, 0);
bat *ext = (bat *) getArgReference(stk, pci, 1);
diff --git a/monetdb5/modules/mal/groupby.h b/monetdb5/modules/mal/groupby.h
--- a/monetdb5/modules/mal/groupby.h
+++ b/monetdb5/modules/mal/groupby.h
@@ -35,6 +35,6 @@
#define group_by_export extern
#endif
-group_by_export str GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr
stk, InstrPtr pci);
+group_by_export str GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb,
MalStkPtr stk, InstrPtr pci);
#endif /* _GROUPBY_H */
diff --git a/monetdb5/modules/mal/groupby.mal b/monetdb5/modules/mal/groupby.mal
--- a/monetdb5/modules/mal/groupby.mal
+++ b/monetdb5/modules/mal/groupby.mal
@@ -17,6 +17,6 @@
module group;
-pattern group.subgroup(b:bat[:oid,:any]...)(ref:bat[:oid,:oid],
grp:bat[:oid,:oid], hist:bat[:oid,:any])
-address GROUPmulticolumn
+pattern group.multicolumn(b:bat[:oid,:any]...)(ref:bat[:oid,:oid],
grp:bat[:oid,:oid], hist:bat[:oid,:any])
+address GROUPmulticolumngroup
comment "Derivation of a group index over multiple columns.";
diff --git a/monetdb5/optimizer/opt_groups.c b/monetdb5/optimizer/opt_groups.c
--- a/monetdb5/optimizer/opt_groups.c
+++ b/monetdb5/optimizer/opt_groups.c
@@ -22,25 +22,41 @@
#include "group.h"
int
-OPTgroupsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
+OPTgroupsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci)
{
- int i, actions=0;
- InstrPtr q;
+ int i, j, actions=0;
+ InstrPtr q,p;
InstrPtr *old, *ref;
int limit,slimit;
+ int *used;
(void) cntxt;
(void) stk;
+ (void) pci;
if (varGetProp(mb, getArg(mb->stmt[0], 0), inlineProp) != NULL) {
return 0;
}
+// Code should first be synchronized with mergetable
+// And a proper re-ordering test should proof its validity
+ if (1)
+ return 0;
+
/* beware, new variables and instructions are introduced */
ref= (InstrPtr*) GDKzalloc(sizeof(InstrPtr) * mb->vtop); /* to find
last assignment */
if ( ref == NULL) {
return 0;
}
+ used= (int*) GDKzalloc(sizeof(InstrPtr) * mb->vtop); /* use count */
+ if( used == NULL){
+ GDKfree(ref);
+ return 0;
+ }
+ OPTDEBUGgroups {
+ mnstr_printf(cntxt->fdout,"Group by optimizer\n");
+ printFunction(cntxt->fdout,mb,0,LIST_MAL_STMT);
+ }
old= mb->stmt;
limit= mb->stop;
slimit= mb->ssize;
@@ -49,39 +65,53 @@ OPTgroupsImplementation(Client cntxt, Ma
return 0;
}
+ // determine use count for all variables
for (i = 0; i<limit; i++){
p= old[i];
- if (getModuleId(p) == groupRef && p->argc == 4 &&
getFunctionId(p) == subgroupRef ){
- //setModuleId(p, groupRef);
- //setFunctionId(p, multicolumnsRef);
- ref[getArg(p,0)] = p;
- actions++;
- OPTDEBUGgroups {
- mnstr_printf(cntxt->fdout,"#new groups
instruction\n");
- printInstruction(cntxt->fdout,mb, 0, p,
LIST_MAL_ALL);
- }
+ for(j= p->retc; j<p->argc; j++)
+ ref[getArg(p,j)]++;
+ }
+
+
+ for (i = 0; i<limit; i++){
+ p= old[i];
+ if (getModuleId(p) == groupRef ){
+ if (p->argc == 4 && getFunctionId(p) == subgroupRef &&
used[getArg(p,1)] ==0 && used[getArg(p,2)] == 0){
+ setFunctionId(p, multicolumnRef);
+ ref[getArg(p,0)] = p;
+ actions++;
+ OPTDEBUGgroups {
+ mnstr_printf(cntxt->fdout,"#new groups
instruction\n");
+ printInstruction(cntxt->fdout,mb, 0, p,
LIST_MAL_ALL);
+ }
+ } else
+ if (p->argc == 5 && getFunctionId(p) == subgroupRef &&
(q= ref[getArg(p,p->argc-1)]) && used[getArg(p,1)] ==0 && used[getArg(p,2)] ==
0){
+ p->argc--;
+ for( j = q->argc-1; j>= q->retc; j--)
+ p = setArgument(mb,p,p->retc,
getArg(q,j));
+ ref[getArg(p,0)] = p;
+ setFunctionId(p, multicolumnRef);
+ OPTDEBUGgroups{
+ mnstr_printf(cntxt->fdout,"#new groups
instruction extension\n");
+ printInstruction(cntxt->fdout,mb, 0, p,
LIST_MAL_ALL);
+ }
+ } else
+ if (p->argc == 5 && getFunctionId(p) == subgroupdoneRef
&& (q= ref[getArg(p,p->argc-1)]) ){
+ /*
+ * Expand its argument list with what we have
found so far.
+ * This creates a series of derive paths, many
of which will be removed during deadcode elimination.
+ */
+ p->argc--;
+ for( j = q->argc-1; j>= q->retc; j--)
+ p = setArgument(mb,p,p->retc,
getArg(q,j));
+ ref[getArg(p,0)] = p;
+ setFunctionId(p, multicolumnRef);
+ OPTDEBUGgroups{
+ mnstr_printf(cntxt->fdout,"#new groups
instruction extension\n");
+ printInstruction(cntxt->fdout,mb, 0, p,
LIST_MAL_ALL);
+ }
+ }
}
- if (getModuleId(p) == groupRef && p->argc == 5 &&
getFunctionId(p) == subgroupdoneRef && ref[getArg(p,4)] != NULL){
- /*
- * Try to expand its argument list with what we have
found so far.
- * This creates a series of derive paths, many of which
will be removed during deadcode elimination.
- */
- q= copyInstruction(ref[getArg(p,4)]);
- q= pushArgument(mb, q, getArg(p,3));
- getArg(q,0) = getArg(p,0);
- setVarType(mb,getArg(q,0),getArgType(mb,p,0));
- getArg(q,1) = getArg(p,1);
- setVarType(mb,getArg(q,1),getArgType(mb,p,1));
- getArg(q,2) = getArg(p,2);
- setVarType(mb,getArg(q,2),getArgType(mb,p,2));
- ref[getArg(q,0)] = q;
- freeInstruction(p);
- p= q;
- OPTDEBUGgroups{
- mnstr_printf(cntxt->fdout,"#new groups
instruction extension\n");
- printInstruction(cntxt->fdout,mb, 0, p,
LIST_MAL_ALL);
- }
- }
pushInstruction(mb,p);
}
for(; i<slimit; i++)
@@ -89,9 +119,10 @@ OPTgroupsImplementation(Client cntxt, Ma
freeInstruction(old[i]);
GDKfree(old);
GDKfree(ref);
+ GDKfree(used);
OPTDEBUGgroups if( actions) {
mnstr_printf(cntxt->fdout,"Result of group by optimizer\n");
- printFunction(cntxt->fdout,mb,0,LIST_MAL_ALL);
+ printFunction(cntxt->fdout,mb,0,LIST_MAL_STMT);
}
return actions;
}
diff --git a/monetdb5/optimizer/opt_pipes.c b/monetdb5/optimizer/opt_pipes.c
--- a/monetdb5/optimizer/opt_pipes.c
+++ b/monetdb5/optimizer/opt_pipes.c
@@ -108,7 +108,7 @@ struct PIPELINES {
"optimizer.mergetable();"
"optimizer.deadcode();"
"optimizer.commonTerms();"
- //"optimizer.groups();"
+ "optimizer.groups();"
"optimizer.joinPath();"
"optimizer.reorder();"
"optimizer.deadcode();"
diff --git a/monetdb5/optimizer/opt_prelude.c b/monetdb5/optimizer/opt_prelude.c
--- a/monetdb5/optimizer/opt_prelude.c
+++ b/monetdb5/optimizer/opt_prelude.c
@@ -149,6 +149,7 @@ str markTRef;
str mark_grpRef;
str materializeRef;
str mtimeRef;
+str multicolumnRef;
str dense_rank_grpRef;
str matRef;
str max_no_nilRef;
@@ -165,7 +166,6 @@ str mkeyRef;
str mmathRef;
str multiplexRef;
str manifoldRef;
-str multicolumnsRef;
str mvcRef;
str newRef;
str notRef;
@@ -421,6 +421,7 @@ void optimizerInit(void){
mark_grpRef = putName("mark_grp", 8);
materializeRef = putName("materialize", 11);
mtimeRef = putName("mtime", 5);
+ multicolumnRef = putName("multicolumn", 11);
dense_rank_grpRef = putName("dense_rank_grp", 14);
matRef = putName("mat", 3);
max_no_nilRef = putName("max_no_nil", 10);
@@ -437,7 +438,6 @@ void optimizerInit(void){
mmathRef = putName("mmath", 5);
multiplexRef = putName("multiplex", 9);
manifoldRef = putName("manifold", 8);
- multicolumnsRef = putName("multicolumns", 12);
mvcRef = putName("mvc", 3);
newRef = putName("new",3);
notRef = putName("not",3);
diff --git a/monetdb5/optimizer/opt_prelude.h b/monetdb5/optimizer/opt_prelude.h
--- a/monetdb5/optimizer/opt_prelude.h
+++ b/monetdb5/optimizer/opt_prelude.h
@@ -164,7 +164,7 @@ opt_export str mkeyRef;
opt_export str mmathRef;
opt_export str multiplexRef;
opt_export str manifoldRef;
-opt_export str multicolumnsRef;
+opt_export str multicolumnRef;
opt_export str mvcRef;
opt_export str newRef;
opt_export str notRef;
diff --git
a/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
b/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
--- a/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
+++ b/sql/test/BugTracker-2010/Tests/constants-optimizer.Bug-2317.stable.err
@@ -32,8 +32,10 @@ stderr of test 'constants-optimizer.Bug-
# 00:10:51 > mclient -lsql -ftest -i -e --host=eir --port=37160
# 00:10:51 >
-MAPI = (monetdb) /var/tmp/mtest-17482/.s.monetdb.35717
+MAPI = (monetdb) /var/tmp/mtest-15619/.s.monetdb.35375
QUERY = set optimizer='dictionary_pipe';
+ERROR = !optimizer 'dictionary_pipe' unknown
+
diff --git a/sql/test/Tests/setoptimizer.stable.err
b/sql/test/Tests/setoptimizer.stable.err
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list