Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv23644

Modified Files:
        opt_mergetable.mx opt_prelude.mx opt_support.mx 
Log Message:
mergetable optimizer now handles batcalc.* and aggr.* (except avg).
ie q1 of tpch comes much closer (only avg and 
proper handling of group by columns).

following part of q1 gives correct output 

select
        l_returnflag,
        l_linestatus,
        sum(l_quantity) as sum_qty,
        sum(l_extendedprice) as sum_base_price,
        sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
        count(*) as count_order
from
        lineitem
where
        l_shipdate <= date '1998-12-01' - interval '90' day --(3)
group by
        l_returnflag,
        l_linestatus
;




Index: opt_prelude.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_prelude.mx,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -d -r1.29 -r1.30
--- opt_prelude.mx      12 Nov 2007 22:16:00 -0000      1.29
+++ opt_prelude.mx      14 Nov 2007 10:18:27 -0000      1.30
@@ -53,6 +53,7 @@
 opt_export  str finishRef;
 opt_export  str getRef;
 opt_export  str groupRef;
+opt_export  str groupbyRef;
 opt_export  str hashRef;
 opt_export  str hrangeRef;
 opt_export  str identityRef;
@@ -87,12 +88,14 @@
 opt_export  str packRef;
 opt_export  str plusRef;
 opt_export  str printRef;
+opt_export  str projectRef;
 opt_export  str putRef;
 opt_export  str queryRef;
 opt_export  str reconnectRef;
 opt_export  str remapRef;
 opt_export  str replaceRef;
 opt_export  str resultSetRef;
+opt_export  str reuseRef;
 opt_export  str reverseRef;
 opt_export  str rpcRef;
 opt_export  str rsColumnRef;
@@ -159,6 +162,7 @@
 str finishRef;
 str getRef;
 str groupRef;
+str groupbyRef;
 str hashRef;
 str hrangeRef;
 str identityRef;
@@ -193,12 +197,14 @@
 str packRef;
 str plusRef;
 str printRef;
+str projectRef;
 str putRef;
 str queryRef;
 str reconnectRef;
 str remapRef;
 str replaceRef;
 str resultSetRef;
+str reuseRef;
 str reverseRef;
 str rpcRef;
 str rsColumnRef;
@@ -260,6 +266,7 @@
                finishRef = putName("finish",6);
                getRef = putName("get",3);
                groupRef = putName("group",5);
+               groupbyRef = putName("groupby",7);
                hashRef = putName("hash",4);
                hrangeRef = putName("hrange",6);
                identityRef = putName("identity",8);
@@ -294,16 +301,18 @@
                packRef = putName("pack",4);
                plusRef = putName("+",1);
                printRef = putName("print",5);
+               projectRef = putName("project",7);
                putRef = putName("put",3);
                queryRef = putName("query",5);
                reconnectRef = putName("reconnect",9);
                remapRef = putName("remap",5);
                replaceRef = putName("replace",7);
                resultSetRef = putName("resultSet",9);
+               reuseRef = putName("reuse",5);
                reverseRef = putName("reverse",7);
                rpcRef = putName("rpc",3);
                rsColumnRef = putName("rsColumn",8);
-               selectNotNilRef = putName("selectNotNil",11);
+               selectNotNilRef = putName("selectNotNil",12);
                selectRef = putName("select",6);
                semaRef = putName("sema",4);
                semijoinRef = putName("semijoin",8);

Index: opt_mergetable.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_mergetable.mx,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -d -r1.23 -r1.24
--- opt_mergetable.mx   13 Nov 2007 09:58:18 -0000      1.23
+++ opt_mergetable.mx   14 Nov 2007 10:18:27 -0000      1.24
@@ -160,8 +160,29 @@
        return mtop+1;
 }
 
+static void
+mat_reuse(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int a1, int a2, int 
a3)
+{
+       int k;
+       
+       for(k=1; k<mat[m]->argc; k++) {
+               InstrPtr q = copyInstruction(p);
+               getArg(q,0) = getArg(mat[m],k);
+
+               if (a1 >= 0)
+                       getArg(q,1) = getArg(mat[a1],k);
+               if (a2 >= 0)
+                       getArg(q,2) = getArg(mat[a2],k);
+               if (a3 >= 0)
+                       getArg(q,3) = getArg(mat[a3],k);
+               pushInstruction(mb,q);
+       }
+       freeInstruction(p);
+}
+
+/* TODO change into general mat1,mat2,mat3 etc */
 static InstrPtr
-mat_join(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int n)
+mat_batcalc(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int n, int o)
 {
        int tpe = getArgType(mb,p,0);
        int k;
@@ -174,8 +195,12 @@
        for(k=1; k<mat[m]->argc; k++) {
                InstrPtr q = copyInstruction(p);
                getArg(q,0) = newTmpVariable(mb, tpe);
-               getArg(q,1) = getArg(mat[m],k);
-               getArg(q,2) = getArg(mat[n],k);
+               if (m >= 0)
+                       getArg(q,1) = getArg(mat[m],k);
+               if (n >= 0)
+                       getArg(q,2) = getArg(mat[n],k);
+               if (o >= 0)
+                       getArg(q,3) = getArg(mat[o],k);
                pushInstruction(mb,q);
 
                /* add result to mat */
@@ -187,22 +212,21 @@
 }
 
 static InstrPtr
-mat_group_aggr(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int n, int o)
+mat_join(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int n)
 {
-       int tp = getArgType(mb,p,0);
+       int tpe = getArgType(mb,p,0);
        int k;
        InstrPtr r = newInstruction(mb, ASSIGNsymbol);
 
        setModuleId(r,matRef);
        setFunctionId(r,newRef);
        getArg(r,0) = getArg(p,0);
-
+       
        for(k=1; k<mat[m]->argc; k++) {
                InstrPtr q = copyInstruction(p);
-               getArg(q,0) = newTmpVariable(mb, tp);
+               getArg(q,0) = newTmpVariable(mb, tpe);
                getArg(q,1) = getArg(mat[m],k);
                getArg(q,2) = getArg(mat[n],k);
-               getArg(q,3) = getArg(mat[o],k);
                pushInstruction(mb,q);
 
                /* add result to mat */
@@ -213,6 +237,158 @@
        return r;
 }
 
+static int
+resultof(MalBlkPtr mb, int var, int topstmt)
+{
+       int i;
+
+       while(--topstmt > 0) {
+               InstrPtr p = mb->stmt[topstmt];
+               for(i=0;i<p->retc; i++)
+                       if (p->argv[i] == var)
+                               return topstmt;
+       }
+       return 0;
+}
+
+/* later we should set the Lifespan parts of a variable in the mat_group
+   function
+ */
+
+static int
+group_chain_list_length(MalBlkPtr mb, int var, int topstmt)
+{
+       int cnt = 0;
+       while(var) {
+               int s = resultof(mb, var, topstmt);
+               InstrPtr p = mb->stmt[s];
+
+               var = 0;
+               if (s == 0)
+                       return 0;
+               if (getModuleId(p) == groupRef && getFunctionId(p) == deriveRef)
+                       var = getArg(p, 3);
+               cnt++;
+       }
+       return cnt;
+}
+
+static void
+group_attrs(int *attrs, MalBlkPtr mb, int var, int ext )
+{
+       int cnt = 0;
+       while(var) {
+               int s = resultof(mb, var, mb->stop);
+               InstrPtr q, p = mb->stmt[s];
+               int attr = 0;
+
+               assert(s!=0);
+               var = 0;
+               if (getModuleId(p) == groupRef && getFunctionId(p) == 
deriveRef){
+                       var = getArg(p, 3);
+                       attr = getArg(p, 4);
+               } else if (getModuleId(p) == groupRef && getFunctionId(p) == 
newRef){
+                       attr = getArg(p, 2);
+               } else {
+                       assert(0);
+               }
+               /* ext.join(attr); */
+               q = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(q, algebraRef);
+               setFunctionId(q, joinRef);
+               getArg(q, 0) = newTmpVariable(mb, getVarType(mb,attr));
+               q = pushArgument(mb, q, ext);
+               q = pushArgument(mb, q, attr);
+               pushInstruction(mb, q);
+               attrs[cnt] = getDestVar(q);
+               cnt++;
+       }
+}
+
+static char *
+aggr_phase2(char *aggr)
+{
+       if (aggr == countRef /* || aggr == count_no_nilRef */)
+               return sumRef;
+       /* min/max/sum are fine */
+       return aggr;
+}
+
+static void
+mat_group_aggr(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int g, int ext)
+{
+       int *attrs, tp = getArgType(mb,p,0), i, k, cnt;
+       char *aggr2 = aggr_phase2(getFunctionId(p));
+       InstrPtr ai1 = newInstruction(mb, ASSIGNsymbol), cur = NULL;
+       InstrPtr ai2 = newInstruction(mb, ASSIGNsymbol);
+
+       setModuleId(ai1,matRef);
+       setFunctionId(ai1,packRef);
+       getArg(ai1,0) = newTmpVariable(mb, tp);
+
+       setModuleId(ai2, aggrRef);
+       setFunctionId(ai2, aggr2);
+       getArg(ai2,0) = getArg(p,0);
+
+       for(k=1; k<mat[m]->argc; k++) {
+               InstrPtr q = copyInstruction(p);
+               getArg(q,0) = newTmpVariable(mb, tp);
+               getArg(q,1) = getArg(mat[m],k);
+               getArg(q,2) = getArg(mat[g],k);
+               getArg(q,3) = getArg(mat[ext],k);
+               pushInstruction(mb,q);
+
+               /* add result to mat */
+               ai1 = pushArgument(mb,ai1,getArg(q,0));
+       }
+       freeInstruction(p);
+       pushInstruction(mb, ai1);
+
+       /* The extend/histogram in a mat of groupings should be 
+          rewritten into a single extend. This is simply done by joining the
+          the group attributes with the per part extend, which are merged
+          into a new bats. Also the aggregates are merged.
+          These should then be grouped and aggregated again (allthough 
+          possibly with a different aggregation function).
+
+          TODO optimization: we should reuse groupings among multiple aggrs.
+        */
+       cnt = group_chain_list_length(mb, getArg(mat[g], 1), mb->stop);
+       attrs = alloca(cnt * sizeof(int) * mat[ext]->argc);
+       for(k=1; k<mat[ext]->argc; k++) 
+               group_attrs(attrs+k*cnt, mb, getArg(mat[g], k), 
getArg(mat[ext], k));
+       for(i=cnt-1; i>=0; i--) {
+               /* pack, group (or derive) */
+               InstrPtr pck = newInstruction(mb, ASSIGNsymbol);
+               InstrPtr grp = newInstruction(mb, ASSIGNsymbol);
+
+               setModuleId(grp,groupRef);
+               setFunctionId(grp, newRef);
+               
+               setModuleId(pck,matRef);
+               setFunctionId(pck,packRef);
+               getArg(pck,0) = newTmpVariable(mb, getVarType(mb, 
attrs[cnt+i]));
+               for(k=1; k<mat[m]->argc; k++) 
+                       pck = pushArgument(mb, pck, attrs[k*cnt+i]);
+               pushInstruction(mb, pck);
+
+               getArg(grp,0) = newTmpVariable(mb, 
newBatType(TYPE_oid,TYPE_int));
+               grp = pushReturn(mb, grp, newTmpVariable(mb, 
newBatType(TYPE_oid,TYPE_oid)));
+               if (cur) {
+                       setFunctionId(grp, deriveRef);
+                       grp = pushArgument(mb, grp, getArg(cur, 0));
+                       grp = pushArgument(mb, grp, getArg(cur, 1));
+               }
+               grp = pushArgument(mb, grp, getArg(pck, 0));
+               pushInstruction(mb, grp);
+               cur = grp;
+       }
+       ai2 = pushArgument(mb, ai2, getArg(ai1, 0));
+       ai2 = pushArgument(mb, ai2, getArg(cur, 1));
+       ai2 = pushArgument(mb, ai2, getArg(cur, 0));
+       pushInstruction(mb, ai2);
+}
+
 static InstrPtr
 mat_group(InstrPtr *ext, MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m, int 
n, int o)
 {
@@ -235,9 +411,9 @@
                getArg(q,0) = newTmpVariable(mb, tp0);
                getArg(q,1) = newTmpVariable(mb, tp1);
                getArg(q,2) = getArg(mat[m],k);
-               if (n)
+               if (n >= 0)
                        getArg(q,3) = getArg(mat[n],k);
-               if (o)
+               if (o >= 0)
                        getArg(q,4) = getArg(mat[o],k);
                pushInstruction(mb,q);
 
@@ -344,7 +520,7 @@
                r = newInstruction(mb, ASSIGNsymbol);
                getArg(r,0)= getArg(mat[m],0);
                getArg(r,1)= getArg(mat[m],1);
-       } else{
+       } else {
                if (r == NULL){
                        r = newInstruction(mb, ASSIGNsymbol);
                        setModuleId(r,matRef);
@@ -420,7 +596,7 @@
        memset((char*) mvar, 0, mb->vtop * sizeof(int));
 
        for( i=0; i<oldtop; i++){
-               int n = 0, o = 0;
+               int n = -1, o = -1;
                p = old[i];
                if (getModuleId(p)== matRef && 
                   (getFunctionId(p)==newRef || getFunctionId(p)==packRef)) {
@@ -450,23 +626,46 @@
 #endif
                        continue;
                }
[EMAIL PROTECTED]
+Sometimes mats are reused (ie input mats are used as the output). Such
+statements can allways be done by iterating over the parts.
[EMAIL PROTECTED]
+               if (match > 0 && (m=isMATalias(getArg(p,0), mvar, mtop)) >= 0) {
+                       int x = -1;
+                       if (match > 1) {
+                               n = isMATalias(getArg(p,1), mvar, mtop);
+                               o = isMATalias(getArg(p,2), mvar, mtop);
+                               x = isMATalias(getArg(p,3), mvar, mtop);
+                       }
+                       mat_reuse(mb, p, mat, m, n, o, x); 
+                       actions++;
+                       continue;
+               }
 @- 
-First we handle horizontal aligned mats. This information is passed using
+Here we handle horizontal aligned mats. This information is passed using
 mat.hrange(b,x,y). So if this is available, we can simplify batcalc operations
 and for fetch joins we can use this information to do per part joins only.
 @c
                if (match == 2) {
-                       if (getModuleId(p) == algebraRef &&
-                           getFunctionId(p) == joinRef &&
-                               (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0 &&
-                               (n=isMATalias(getArg(p,2),mvar,mtop)) >= 0 &&
-                               mat[m]->argc == mat[n]->argc ) {
+                       if (getModuleId(p) == algebraRef && 
+                            getFunctionId(p) == joinRef &&
+                            (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0 &&
+                            (n=isMATalias(getArg(p,2),mvar,mtop)) >= 0 &&
+                             mat[m]->argc == mat[n]->argc ) {
                                /* todo match the hranges */
                                mtop= mat_add(mat, mvar, mtop, mat_join(mb, p, 
mat, m, n));
                                actions++;
                                continue;
                        }       
                }
+               if (match > 1 && getModuleId(p) == batcalcRef) { 
+                       m = isMATalias(getArg(p,1),mvar,mtop);
+                       n = isMATalias(getArg(p,2),mvar,mtop);
+                       o = isMATalias(getArg(p,3),mvar,mtop);
+                       mtop = mat_add(mat, mvar, mtop, mat_batcalc(mb, p, mat, 
m, n, o));
+                       actions++;
+                       continue;
+               }
 @- 
 Now we handle group, derive and aggregation statements. 
 @c
@@ -486,16 +685,14 @@
                        continue;
                }
                if (match == 3 && getModuleId(p) == aggrRef && p->argc == 4 &&
-                  (getFunctionId(p)== countRef ||
-                   getFunctionId(p)== minRef ||
-                   getFunctionId(p)== maxRef ||
-                   getFunctionId(p)== sumRef) &&
+                  (getFunctionId(p) == countRef ||
+                   getFunctionId(p) == minRef ||
+                   getFunctionId(p) == maxRef ||
+                   getFunctionId(p) == sumRef) &&
                   ((m=isMATalias(getArg(p,1),mvar,mtop)) >= 0) &&
                   ((n=isMATalias(getArg(p,2),mvar,mtop)) >= 0) &&
                   ((o=isMATalias(getArg(p,3),mvar,mtop)) >= 0)) {
-                       InstrPtr aggr = mat_group_aggr(mb, p, mat, m, n, o);
-
-                       mtop = mat_add(mat, mvar, mtop, aggr);
+                       mat_group_aggr(mb, p, mat, m, n, o);
                        actions++;
                        continue;
                }

Index: opt_support.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_support.mx,v
retrieving revision 1.49
retrieving revision 1.50
diff -u -d -r1.49 -r1.50
--- opt_support.mx      6 Nov 2007 18:08:46 -0000       1.49
+++ opt_support.mx      14 Nov 2007 10:18:27 -0000      1.50
@@ -1176,21 +1176,22 @@
 and should be conservative.
 @c
 int isFragmentGroup(InstrPtr p){
-       return
+       return          (getModuleId(p)== batcalcRef) ||
                        (getModuleId(p)== constraintsRef && 
                                getFunctionId(p)== getName("emptySet",8)) ||
                        (getModuleId(p)== algebraRef && (
                                getFunctionId(p)== selectRef ||
-                               getFunctionId(p)==uselectRef ||
-                               getFunctionId(p) == markTRef ||
-                               getFunctionId(p)==likeselectRef ||
+                               getFunctionId(p)== selectNotNilRef ||
+                               getFunctionId(p)== uselectRef ||
+                               getFunctionId(p)== likeselectRef ||
                                getFunctionId(p)== joinRef ||
                                getFunctionId(p)== semijoinRef ||
-                               getFunctionId(p)== kdifferenceRef
+                               getFunctionId(p)== kdifferenceRef ||
+                               getFunctionId(p)== reuseRef
                        )       )  ||
                        (getModuleId(p)== batRef && (
-                               getFunctionId(p)==reverseRef ||
-                               getFunctionId(p)==mirrorRef ||
+                               getFunctionId(p)== reverseRef ||
+                               getFunctionId(p)== mirrorRef ||
                                getFunctionId(p)== setAccessRef ||
                                getFunctionId(p)== setWriteModeRef 
                        ) );


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to