Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv5560/optimizer

Modified Files:
        opt_joinpath.mx 
Log Message:
The joinpath optimizer now als implements a heurist for the
SQL frontend  to re-use common join paths.


Index: opt_joinpath.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_joinpath.mx,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -d -r1.17 -r1.18
--- opt_joinpath.mx     25 Jan 2008 14:19:29 -0000      1.17
+++ opt_joinpath.mx     27 Jan 2008 08:21:29 -0000      1.18
@@ -19,6 +19,7 @@
 The routine @sc{optimizer.joinPath()}
 walks through the program looking for join operations
 and cascades them into multiple join paths.
+To illustrate, consider
 @verbatim
        a:= bat.new(:oid,:oid);
        b:= bat.new(:oid,:oid);
@@ -28,9 +29,10 @@
        j3:= algebra.join(b,b);
        j4:= algebra.join(b,j3);
 @end verbatim
-The result includes the expanded join expressions.
-The deadcode optimizer should take care of superflous
-paths.
+The optimizer will first replace all arguments
+by their join sequence. The underlying instructions
+are left behind for the deadcode optimizer to be
+removed.
 @verbatim
        a:= bat.new(:oid,:oid);
        j1:= algebra.join(a,b);
@@ -38,6 +40,36 @@
        j3:= algebra.join(b,b);
        j4:= algebra.joinPath(b,b,b);
 @end verbatim
+In principle, the joinpaths may contain common
+subpaths, whose materialization would improve
+performance.
+The SQL front-end produces often produces
+snippets of the following structure
[EMAIL PROTECTED]
+    t1:= algebra.join(b,c);
+    z1:= algebra.join(a,t1);
+       ...
+    t2:= algebra.join(b,d);
+    z2:= algebra.join(a,t2);
[EMAIL PROTECTED] verbatim
+The joinpath would merge them into
[EMAIL PROTECTED]
+    z1:= algebra.joinPath(a,b,c);
+       ...
+    z2:= algebra.joinPath(a,b,d);
[EMAIL PROTECTED] verbatim
+which are handle by a heuristic looking
+at the first two argments and re-uses
+a materialized join.
[EMAIL PROTECTED]
+    _13:= algebra.join(a,b);
+    z1:= algebra.joinPath(_13,c);
+       ...
+    z2:= algebra.joinPath(_13,d);
[EMAIL PROTECTED] verbatim
+An alternative is to make recognition
+of the common re-useable paths an
+integral part of the joinPath body.
 @{
 @mal
 pattern optimizer.joinPath():str
@@ -56,7 +88,7 @@
 #include "opt_support.h"
 #include "mal_interpreter.h"
 
-/* #define DEBUG_OPT_JOINPATH */
+/* #define DEBUG_OPT_JOINPATH  */
 @-
 @c
 #include "mal_config.h"
@@ -69,6 +101,8 @@
        int *pc, *used;
        str joinPathRef = putName("joinPath",8);
        InstrPtr q,r;
+       InstrPtr *old;
+       int limit,slimit;
 
        (void) stk;
        if (varGetProp(mb, getArg(mb->stmt[0], 0), inlineProp) != NULL)
@@ -81,30 +115,31 @@
           we are only expanding when a variable is used once. */
        used= (int*) alloca(sizeof(int)* mb->vtop); 
        memset((char*) used, 0, sizeof(int)* mb->vtop);
-       for (i = 1; i < mb->stop; i++){
-               p= getInstrPtr(mb,i);
-               for(j=p->retc; j< p->argc; j++)
-                       used[getArg(p,j)]++;
-               for(j=0; j< p->retc; j++)
-                       pc[getArg(p,j)]= i;
-       }
 
-       for (i = 1; i < mb->stop; i++){
-               p= getInstrPtr(mb,i);
+       old= mb->stmt;
+       limit= mb->stop;
+       slimit= mb->ssize;
+       newMalBlkStmt(mb,mb->ssize);
+
+       for (i = 0; i<limit; i++){
+               p= old[i];
                if( getModuleId(p)== algebraRef && getFunctionId(p)== joinRef ){
 @-
-Try to expand each of its argument list.
+Try to expand its argument list
 @c
                        q= copyInstruction(p);
                        q->argc=1;
-                       for(j=p->retc; j<p->argc; j++)
-                       if( pc[getArg(p,j)] ){
+                       for(j=p->retc; j<p->argc; j++){
                                r= getInstrPtr(mb,pc[getArg(p,j)]);
+                               assert(r);
 #ifdef DEBUG_OPT_JOINPATH 
-                               stream_printf(GDKout,"expand with \n");
+                               stream_printf(GDKout,"expand list \n");
+                               printInstruction(GDKout,mb, p, LIST_MAL_ALL);
+                               printInstruction(GDKout,mb, q, LIST_MAL_ALL);
+                               if(r)
                                printInstruction(GDKout,mb, r, LIST_MAL_ALL);
 #endif
-                               if(  getModuleId(r)== algebraRef && 
+                               if( r &&  getModuleId(r)== algebraRef && 
                                        ( getFunctionId(r)== joinRef  || 
getFunctionId(r)== joinPathRef) ){
                                        for(k= r->retc; k<r->argc; k++)
                                                q = 
pushArgument(mb,q,getArg(r,k));
@@ -117,33 +152,61 @@
 #endif
                        if(q->argc<= p->argc){
                                /* no change */
-                       nochange:
                                freeInstruction(q);
-                       } else {
+                               goto wrapup;
+                       }
 @-
 Final type check.
 @c
-                               for(j=1; j<q->argc-1; j++)
-                               if( getTailType(getArgType(mb,q,j)) != 
getHeadType(getArgType(mb,q,j+1)) &&
-                               !( getTailType(getArgType(mb,q,j))== TYPE_oid  
&&
-                                       getHeadType(getArgType(mb,q,j))== 
TYPE_void) &&
-                               !( getTailType(getArgType(mb,q,j))== TYPE_void 
&&
-                                       getHeadType(getArgType(mb,q,j))== 
TYPE_oid)
-                                )
-                                       goto nochange;
+                       for(j=1; j<q->argc-1; j++)
+                       if( getTailType(getArgType(mb,q,j)) != 
getHeadType(getArgType(mb,q,j+1)) &&
+                       !( getTailType(getArgType(mb,q,j))== TYPE_oid  &&
+                               getHeadType(getArgType(mb,q,j))== TYPE_void) &&
+                       !( getTailType(getArgType(mb,q,j))== TYPE_void &&
+                               getHeadType(getArgType(mb,q,j))== TYPE_oid)
+                        ){
+                               freeInstruction(q);
+                               goto wrapup;
+                       }
 @-
 Post-optimization. After the join path has been constructed
-it could locate common subpaths with previous ones and factor
-out that processing step. [TODO]
+it should factor out common subpaths. The heuristic is to
+remove the first pair if its variables are used often.
+In principle, any re-used pair should be localized recursively.
+The heuristics is sufficient for the code produced by SQL frontend
 @c
-                               setFunctionId(q,joinPathRef);
-                               getInstrPtr(mb,i)=q;
-                               freeInstruction(p);
-                               p=q;
-                               actions++;
+                       if ( used[getArg(q,1)]>=2 && used[getArg(q,2)]>=2){
+                               r= copyInstruction(q);
+                               r->argc=3;
+                               getArg(r,0)= newTmpVariable(mb,TYPE_any);
+                               getArg(q,1)= getArg(r,0);
+                               delArgument(q,2);
+                               pushInstruction(mb,r);
+                               for(j=0; j< r->retc; j++)
+                                       pc[getArg(r,j)]= mb->stop-1;
+#ifdef DEBUG_OPT_JOINPATH 
+                               stream_printf(GDKout,"heuristic joinPath 
rule\n");
+                               printInstruction(GDKout,mb, r, LIST_MAL_ALL);
+                               printInstruction(GDKout,mb, q, LIST_MAL_ALL);
+#endif
                        }
-               }
+
+                       if ( q->argc > 3)
+                               setFunctionId(q,joinPathRef);
+                       p= q;
+                       actions++;
+               } 
+       wrapup:
+               pushInstruction(mb,p);
+               for(j=0; j< p->retc; j++)
+                       pc[getArg(p,j)]= mb->stop-1;
+               for(j=p->retc; j< p->argc; j++)
+                       used[getArg(p,j)]++;
        }
+       for(; i<slimit; i++)
+       if(old[i])
+               freeInstruction(old[i]);
+       GDKfree(old);
        return actions;
 }
 @include optimizerWrapper.mx


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to