Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26665/src/optimizer

Modified Files:
      Tag: GDK-2
        opt_garbageCollector.mx opt_mergetable.mx opt_support.mx 
Log Message:
propagated changes of Friday Aug 17 2007 - Tuesday Aug 21 2007
from the development trunk to the GDK-2 branch


Index: opt_garbageCollector.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_garbageCollector.mx,v
retrieving revision 1.25.2.1
retrieving revision 1.25.2.2
diff -u -d -r1.25.2.1 -r1.25.2.2
--- opt_garbageCollector.mx     17 Aug 2007 15:38:02 -0000      1.25.2.1
+++ opt_garbageCollector.mx     21 Aug 2007 13:24:30 -0000      1.25.2.2
@@ -94,6 +94,7 @@
 #include "mal_interpreter.h"   /* for showErrors() */
 #include "mal_builder.h"
 #include "opt_prelude.h"
+#include "mal_properties.h"
 
 @-
 There are two basic ways to release a BAT. The cheapest one is
@@ -147,6 +148,22 @@
        actions++;
 }
 @}
[EMAIL PROTECTED]
+Garbage collection is not always required, even unwanted.
+For example, the BATs used to assemble the pieces of
+an XML document should be released after it has been
+consumed in e.g. xml.str().
+
+Keeping variables around beyond their end-of-life-span
+can be marked with the proper 'keep'.
[EMAIL PROTECTED]
+static int
+OPTkeepVariable(MalBlkPtr mb, InstrPtr p, int idx){
+       static char *keep=0;
+       if( keep == 0)
+               keep= putName("keep",4);
+       return isPropertyDefined( getProps(mb, getArg(p,idx)),keep);
+}
 @c
 static int
 OPTgarbageCollectorImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -181,7 +198,7 @@
                                        if (getArg(p, j) == getArg(p, k))
                                                done++;
 
-                               if (done == 0 ){
+                               if (done == 0 && !OPTkeepVariable(mb,p,j)){
 #ifdef DEBUG_OPT_GARBAGE
                                printf("remove the variable %s at %d\n", 
getArgName(mb,p,j),i);
 #endif

Index: opt_mergetable.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_mergetable.mx,v
retrieving revision 1.12.2.1
retrieving revision 1.12.2.2
diff -u -d -r1.12.2.1 -r1.12.2.2
--- opt_mergetable.mx   17 Aug 2007 15:38:09 -0000      1.12.2.1
+++ opt_mergetable.mx   21 Aug 2007 13:24:30 -0000      1.12.2.2
@@ -33,7 +33,7 @@
 level operations is aware of their existence.
 
 In the first approach of the MAT optimizer we assume that the 
-last BAT in the MAT sequence is used as an accumulator.
+first BAT in the MAT sequence is used as an accumulator.
 Furthermore, no semantic knowledge is used to reduce the
 possible superflous (semi)joins. Instead, we limit expansion
 to a single argument. 
@@ -63,7 +63,7 @@
        _34 := algebra.select(m1,1,3);
        _35 := algebra.select(m2,1,3);
     s := mat.new(_33,_34,_35);
-    i := 0;
+    i := 0:int;
     _36 := aggr.count(_33);
     i := calc.+(i,_36);
     _37 := aggr.count(_34);
@@ -92,15 +92,27 @@
     j := mat.new(_39,_40);
 @end example
 
-The drawback of this scheme is the explosion in MAL statements.
-The system could also use iterators to avoid it.
+The drawback of the scheme is the potential explosion in MAL statements.
+A challenge of the optimizer is to find the minimum by inspection of
+the properties of the MAT elements. For example, it might attempt
+to partially pack elements before proceding. This would be a runtime
+scheduling decision.
+
+Alternative, the system could use MAT iterators to avoid it.
+At the cost of more complex program analysis afterwards.
 
 @example
-barrier m:= mat.newIterator(m0,m1,m2);
-       io.print(m);
-       redo m:= mat.newIterator(m0,m1,m2);
-exit m;
+       ji:= bat.new(:oid,:int);
+barrier b:= mat.newIterator(m0,m1,m2);
+barrier c:= mat.newIterator(c0,c1);
+       ji := algebra.join(b,c);
+       bat.insert(j,ji);
+       redo c:= mat.newIterator(c0,c1);
+       redo b:= mat.newIterator(m0,m1,m2);
+exit c;
+exit b;
 @end example
+
 @{
 Also consider the MAT as a variable property. This is needed
 to pass a MAT as an argument to functions. If the function does
@@ -188,42 +200,15 @@
        }
        return cnt;
 }
-
-static int
-OPTmergetableImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){
-       InstrPtr *old=0, q,r, mat[256];
-       int oldtop,fm,i,k,m,mtop=0, mvar[256],tpe;
-       int size,match,actions=0;
-#ifdef DEBUG_OPT_MERGETABLE
-       stream_printf(GDKout,"Start MAT optimizer\n");
-       printFunction(GDKout, mb, 0);
-#endif
-
-       old = mb->stmt;
-       oldtop= mb->stop;
-       size = (mb->stop *1.2 < mb->ssize)? mb->ssize: (int) (mb->stop *1.2);
-       mb->stmt = (InstrPtr *) GDKzalloc(size  * sizeof(InstrPtr));
-       mb->ssize = size ;
-       mb->stop = 0;
-
-
-       for( i=0; i<oldtop; i++){
-               p= old[i];
-               if( (getModuleId(p)== matRef && getFunctionId(p)==newRef) ||
-                       (getModuleId(p)== matRef && getFunctionId(p)==packRef) 
){
-                       mat[mtop]= p;
-                       mvar[mtop] = getArg(p,0);
-                       mtop++;
-                       pushInstruction(mb,p);
-                       continue;
-               }
 @-
 A sql.bind() operation should be trapped to inspect the partition catalog
 for definitions. For now we simulate a partitioned table, comprised
 of the original base and an empty one.
 @c
-               if( (getModuleId(p)== sqlRef && getFunctionId(p)==bindRef)){
-                       InstrPtr p2;
+static InstrPtr
+MATpartitionTableDummy(MalBlkPtr mb, InstrPtr p)
+{
+                       InstrPtr q,r,p2;
                        int v= getArg(p,0);
 
                        freezeVarType(mb,getArg(p,0));
@@ -243,14 +228,60 @@
                        getArg(p,0)= getArg(q,0);
                        getArg(p2,1)= getArg(p,0);
                        getArg(q,0)= v;
-                       q= pushArgument(mb,q,getArg(p,0));
-                       q= pushArgument(mb,q,getArg(p2,0));
+                       q= pushArgument(mb, q, getArg(p,0));
+                       q= pushArgument(mb, q, getArg(p2,0));
+                       
+                       /* the oid ranges should be sequentially aligned */
+                       r = newStmt(mb,batRef, putName("setBase",7));
+                       r= pushArgument(mb, r, getArg(p,0));
+                       r= pushArgument(mb, r, getArg(p2,0));
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"new block\n");
                        printInstruction(GDKout,mb,p,0);
                        printInstruction(GDKout,mb,p2,0);
                        printInstruction(GDKout,mb,q,0);
+                       printInstruction(GDKout,mb,r,0);
+#endif
+               return q;
+}
+
+static int
+OPTmergetableImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){
+       InstrPtr *old=0, q,r, *mat;
+       int oldtop,fm,i,k,m,mtop=0, *mvar,tpe;
+       int size,match,actions=0;
+
+#ifdef DEBUG_OPT_MERGETABLE
+       stream_printf(GDKout,"Start MAT optimizer\n");
+       printFunction(GDKout, mb, 0);
 #endif
+
+       old = mb->stmt;
+       oldtop= mb->stop;
+       size = (mb->stop *1.2 < mb->ssize)? mb->ssize: (int) (mb->stop *1.2);
+       mb->stmt = (InstrPtr *) GDKzalloc(size  * sizeof(InstrPtr));
+       mb->ssize = size ;
+       mb->stop = 0;
+
+       /* the number of MATs is limited to the variable stack*/
+       mat= (InstrPtr*) alloca(mb->vtop * sizeof(InstrPtr));
+       memset((char*) mat, 0, mb->vtop * sizeof(InstrPtr));
+       mvar= (int*) alloca(mb->vtop * sizeof(int));
+       memset((char*) mvar, 0, mb->vtop * sizeof(InstrPtr));
+
+       for( i=0; i<oldtop; i++){
+               p= old[i];
+               if( (getModuleId(p)== matRef && getFunctionId(p)==newRef) ||
+                       (getModuleId(p)== matRef && getFunctionId(p)==packRef) 
){
+                       mat[mtop]= p;
+                       mvar[mtop] = getArg(p,0);
+                       mtop++;
+                       pushInstruction(mb,p);
+                       continue;
+               }
+               if( (getModuleId(p)== sqlRef && getFunctionId(p)==bindRef)){
+                       int v= getArg(p,0);
+                       q= MATpartitionTableDummy(mb,p);
                        mat[mtop]= q;
                        mvar[mtop] = v;
                        mtop++;
@@ -274,13 +305,14 @@
 Pack MAT arguments, except one, to limit plan explosion.
 The preferred partitioned one is the first argment as it
 often reflects a base table.
+Look at the depth of the MAT definition to limit the explosion.
 @c
                for( fm= p->argc-1; fm>p->retc ; fm--)
                if(     (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0)
                        break;
 
                for( k=fm-1; match > 1 && k >= p->retc ; k--)
-               if(     (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0){
+               if(     (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0 ){
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"Pack k=%d\n",k);
                        printInstruction(GDKout,mb,p,0);
@@ -290,9 +322,66 @@
                        match--;
                }
 @-
-Look at the depth of the MAT definition to limit the explosion.
-               
-Left with an instruction with one MAT we can replace it.
+From here we have to differentiate between the operations.
+This simply means that any SQL bat is extended with an always
+empty second component. If all work, we switch to the last.
+Their semantics should be respected while producing code.
[EMAIL PROTECTED]
+The insertions are sent to the first component of the MAT.
+Selection of the proper component based on e.g. range descriptors is for
+later.
+
+CAVEAT. The MATs that represent partitions in SQL can not
+be packed to facilitate updates. We assume the first
+partition is dominant. 
[EMAIL PROTECTED]
+               if(     getModuleId(p)== batRef && 
+                               ( getFunctionId(p)== insertRef  ||
+                                 getFunctionId(p)== appendRef  
+                               ) &&
+                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+                       getArg(p,1) = getArg(mat[m],mat[m]->retc);
+                       pushInstruction(mb,p);
+                       continue;
+               } 
[EMAIL PROTECTED]
+The kdifference operator is a good example where the
+action does not produce multiple results, but requires
+an accumulation implementation.
[EMAIL PROTECTED]
+A similar situation occurs in delete operations 
+using partitioned input.
[EMAIL PROTECTED]
+               if(((getModuleId(p)== algebraRef &&
+                       getFunctionId(p) == kdifferenceRef) ||
+                       (getModuleId(p)== batRef &&
+                       getFunctionId(p) == deleteRef) ) &&
+                       fm == 2 &&
+                       (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
+#ifdef DEBUG_OPT_MERGETABLE
+                       stream_printf(GDKout,"kdifference resolution 
fm=%d\n",fm);
+                       printInstruction(GDKout,mb,p,0);
+#endif
+
+                       tpe= getVarType(mb,getArg(p,0));
+                       for(k=1; k< mat[m]->argc; k++){
+                               q= copyInstruction(p);
+                               if( k == mat[m]->argc-1)
+                                       getArg(q,0)= getArg(p,0);
+                               else{
+                                       getArg(q,0) = newTmpVariable(mb, tpe);
+                                       getArg(p,1)= getArg(q,0);
+                               }
+                               getArg(q,fm) = getArg(mat[m],k);
+                               pushInstruction(mb,q);
+#ifdef DEBUG_OPT_MERGETABLE
+                               printInstruction(GDKout,mb,q,0);
+#endif
+                       }
+                       freeInstruction(p);
+                       continue;
+               }
[EMAIL PROTECTED]
 Not all instructions can be replaced by the sequence. We have to
 group them and check for them individually.
 @c
@@ -321,12 +410,13 @@
                                if( r)
                                        r= pushArgument(mb,r,getArg(q,0));
                        }
+                       freeInstruction(p);
                        actions++;
                        if(r){
                                mat[mtop]= r;
                                mvar[mtop] = getArg(r,0);
                                mtop++;
-                               pushInstruction(mb,r);
+                               /* pushInstruction(mb,r);*/
                        }
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"Fragment result \n");
@@ -335,28 +425,27 @@
                        continue;
                } 
 @-
-The insertions are sent to the last component of the MAT.
-Selection of the proper componetn based on range descriptors will follow.
[EMAIL PROTECTED]
-               if(     getModuleId(p)== batRef && getFunctionId(p)== insertRef 
&&
-                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
-                       getArg(p,1) = getArg(mat[m],mat[m]->argc-1);
-                       pushInstruction(mb,p);
-                       continue;
-               } 
[EMAIL PROTECTED]
+Aggregate handling is a prime target for optimization.
+The simple cases are dealt with first.
 Handle the rewrite v:=aggr.count(b) and sum()
+And the min/max is as easy
 @c
-               if( ((getModuleId(p)==aggrRef && getFunctionId(p)== countRef) 
|| 
-                       (getModuleId(p)==aggrRef && getFunctionId(p)==sumRef && 
p->argc==2)) &&
-                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+               if( getModuleId(p)==aggrRef && 
+                       ( getFunctionId(p)== countRef || 
+                       ( getFunctionId(p)== minRef && p->argc==2) ||
+                       ( getFunctionId(p)== maxRef && p->argc==2) ||
+                       ( getFunctionId(p)== sumRef && p->argc==2)) &&
+                       (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"count/sum \n");
                        printInstruction(GDKout,mb,p,0);
 #endif
                        r = newInstruction(mb,ASSIGNsymbol);
                        getArg(r,0)= getArg(p,0);
-                       r= pushInt(mb,r,0);
+                       if( getFunctionId(p)== minRef || 
getFunctionId(p)==maxRef)
+                               r= pushNil(mb,r,getVarType(mb,getArg(p,0)));
+                       else
+                               r= pushZero(mb,r,getVarType(mb,getArg(p,0)));
                        pushInstruction(mb,r);
                        for(k=1; k< mat[m]->argc; k++){
                                int v= newTmpVariable(mb,TYPE_int);
@@ -369,7 +458,10 @@
 
                                q= newInstruction(mb,ASSIGNsymbol);
                                setModuleId(q,calcRef);
-                               setFunctionId(q,plusRef);
+                               if( getFunctionId(p)== minRef || 
getFunctionId(p)==maxRef)
+                                       setFunctionId(q,getFunctionId(p));
+                               else
+                                       setFunctionId(q,plusRef);
                                getArg(q,0)= getArg(r,0);
                                q= pushArgument(mb,q,getArg(r,0));
                                q= pushArgument(mb,q,v);
@@ -381,39 +473,8 @@
                        continue;
                } 
 @-
-And the min/max is as easy
[EMAIL PROTECTED]
-               if( ((getModuleId(p)==aggrRef && getFunctionId(p)== maxRef) || 
-                       (getModuleId(p)==aggrRef && getFunctionId(p)==minRef )) 
&&
-                       p->argc==3 &&
-                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
-                       r = newInstruction(mb,ASSIGNsymbol);
-                       getArg(r,0)= getArg(p,0);
-                       r= pushNil(mb,r,getArgType(mb,p,0));
-                       pushInstruction(mb,r);
-                       for(k=1; k< mat[m]->argc; k++){
-                               int v= newTmpVariable(mb,TYPE_int);
-                               q= newInstruction(mb,ASSIGNsymbol);
-                               setModuleId(q,aggrRef);
-                               setFunctionId(q,getFunctionId(p));
-                               getArg(q,0)= v;
-                               q= pushArgument(mb,q,getArg(mat[m],k));
-                               pushInstruction(mb,q);
-
-                               q= newInstruction(mb,ASSIGNsymbol);
-                               setModuleId(q,calcRef);
-                               setFunctionId(q,plusRef);
-                               getArg(q,0)= getArg(r,0);
-                               q= pushArgument(mb,q,getArg(r,0));
-                               q= pushArgument(mb,q,v);
-                               pushInstruction(mb,q);
-                       }
-                       continue;
-               } 
[EMAIL PROTECTED]
-All other instructions should be checked for a MAT dependency.
-It require the MAT to be materialized. We drop the MAT
-afterwards for further consideration.
+All other instructions should be checked for remaining MAT dependencies.
+It require the MAT to be materialized. 
 @c
                for( k= p->retc; k<p->argc; k++)
                if(     (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0){
@@ -426,7 +487,7 @@
                        break;
                }
 #ifdef DEBUG_OPT_MERGETABLE
-                       stream_printf(GDKout,"last catc\n");
+                       stream_printf(GDKout,"last option\n");
                        printInstruction(GDKout,mb,p,0);
 #endif
                pushInstruction(mb,p);

Index: opt_support.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_support.mx,v
retrieving revision 1.46.2.1
retrieving revision 1.46.2.2
diff -u -d -r1.46.2.1 -r1.46.2.2
--- opt_support.mx      17 Aug 2007 15:38:24 -0000      1.46.2.1
+++ opt_support.mx      21 Aug 2007 13:24:30 -0000      1.46.2.2
@@ -632,6 +632,17 @@
                        printFunction(GDKout, mb, LIST_MAL_ALL);
                }
        }
+       /* code to collect all last versions to study code coverage  in SQL
+       {stream *fd;
+       char nme[25];
+       snprintf(nme,25,"/tmp/mal_%d",getpid());
+       fd= open_wastream(nme);
+       if( fd == NULL)
+               printf("Error in %s\n",nme);
+       printFunction(fd,mb,LIST_MAL_ALL);
+       stream_close(fd);
+       }
+       */
        return mb->errors;
 }
 @-
@@ -1162,7 +1173,6 @@
                                getFunctionId(p)==uselectRef ||
                                getFunctionId(p)==likeselectRef ||
                                getFunctionId(p)==markTRef ||
-                               getFunctionId(p)==kdifferenceRef ||
                                getFunctionId(p)== joinRef ||
                                getFunctionId(p)== semijoinRef
                        )       )  ||
@@ -1170,9 +1180,7 @@
                                getFunctionId(p)==reverseRef ||
                                getFunctionId(p)==mirrorRef ||
                                getFunctionId(p)== setAccessRef ||
-                               getFunctionId(p)== setWriteModeRef ||
-                               getFunctionId(p)== appendRef  ||
-                               getFunctionId(p)== deleteRef
+                               getFunctionId(p)== setWriteModeRef 
                        ) );
 }
 @}


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to