Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv16455

Modified Files:
        opt_mergetable.mx 
Log Message:
Proper handling of kdifference, updates and aggregates brings
the number of SQL tests for further study down to 38.



Index: opt_mergetable.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_mergetable.mx,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- opt_mergetable.mx   17 Aug 2007 13:56:55 -0000      1.13
+++ opt_mergetable.mx   19 Aug 2007 07:38:02 -0000      1.14
@@ -33,7 +33,7 @@
 level operations is aware of their existence.
 
 In the first approach of the MAT optimizer we assume that the 
-last BAT in the MAT sequence is used as an accumulator.
+first BAT in the MAT sequence is used as an accumulator.
 Furthermore, no semantic knowledge is used to reduce the
 possible superflous (semi)joins. Instead, we limit expansion
 to a single argument. 
@@ -63,7 +63,7 @@
        _34 := algebra.select(m1,1,3);
        _35 := algebra.select(m2,1,3);
     s := mat.new(_33,_34,_35);
-    i := 0;
+    i := 0:int;
     _36 := aggr.count(_33);
     i := calc.+(i,_36);
     _37 := aggr.count(_34);
@@ -92,14 +92,25 @@
     j := mat.new(_39,_40);
 @end example
 
-The drawback of this scheme is the explosion in MAL statements.
-The system could also use iterators to avoid it.
+The drawback of the scheme is the potential explosion in MAL statements.
+A challenge of the optimizer is to find the minimum by inspection of
+the properties of the MAT elements. For example, it might attempt
+to partially pack elements before proceding. This would be a runtime
+scheduling decision.
+
+Alternative, the system could use MAT iterators to avoid it.
+At the cost of more complex program analysis.
 
 @example
-barrier m:= mat.newIterator(m0,m1,m2);
-       io.print(m);
-       redo m:= mat.newIterator(m0,m1,m2);
-exit m;
+       ji:= bat.new(:oid,:int);
+barrier b:= mat.newIterator(m0,m1,m2);
+barrier c:= mat.newIterator(c0,c1);
+       ji := algebra.join(b,c);
+       bat.insert(j,ji);
+       redo c:= mat.newIterator(c0,c1);
+       redo b:= mat.newIterator(m0,m1,m2);
+exit c;
+exit b;
 @end example
 @{
 Also consider the MAT as a variable property. This is needed
@@ -243,13 +254,19 @@
                        getArg(p,0)= getArg(q,0);
                        getArg(p2,1)= getArg(p,0);
                        getArg(q,0)= v;
-                       q= pushArgument(mb,q,getArg(p,0));
-                       q= pushArgument(mb,q,getArg(p2,0));
+                       q= pushArgument(mb, q, getArg(p,0));
+                       q= pushArgument(mb, q, getArg(p2,0));
+                       
+                       /* the oid ranges should be sequentially aligned */
+                       r = newStmt(mb,batRef, putName("setBase",7));
+                       r= pushArgument(mb, r, getArg(p,0));
+                       r= pushArgument(mb, r, getArg(p2,0));
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"new block\n");
                        printInstruction(GDKout,mb,p,0);
                        printInstruction(GDKout,mb,p2,0);
                        printInstruction(GDKout,mb,q,0);
+                       printInstruction(GDKout,mb,r,0);
 #endif
                        mat[mtop]= q;
                        mvar[mtop] = v;
@@ -274,6 +291,7 @@
 Pack MAT arguments, except one, to limit plan explosion.
 The preferred partitioned one is the first argment as it
 often reflects a base table.
+Look at the depth of the MAT definition to limit the explosion.
 @c
                for( fm= p->argc-1; fm>p->retc ; fm--)
                if(     (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0)
@@ -290,9 +308,55 @@
                        match--;
                }
 @-
-Look at the depth of the MAT definition to limit the explosion.
-               
-Left with an instruction with one MAT we can replace it.
+From here we have to differentiate between the operations.
+This simply means that any SQL bat is extended with an always
+empty second component. If all work, we switch to the last.
+Their semantics should be respected while producing code.
[EMAIL PROTECTED]
+The insertions are sent to the first component of the MAT.
+Selection of the proper component based on e.g. range descriptors is for
+later.
[EMAIL PROTECTED]
+               if(     getModuleId(p)== batRef && 
+                               ( getFunctionId(p)== insertRef  ||
+                                 getFunctionId(p)== appendRef  
+                               ) &&
+                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+                       getArg(p,1) = getArg(mat[m],mat[m]->retc);
+                       pushInstruction(mb,p);
+                       continue;
+               } 
[EMAIL PROTECTED]
+The kdifference operator is a good example where the
+action does not produce multiple results, but requires
+an accumulation implementation.
[EMAIL PROTECTED]
+A similar situation occurs in delete operations 
+using partitioned input.
[EMAIL PROTECTED]
+               if(((getModuleId(p)== algebraRef &&
+                       getFunctionId(p) == kdifferenceRef) ||
+                       (getModuleId(p)== batRef &&
+                       getFunctionId(p) == deleteRef) ) &&
+                       fm == 2 &&
+                       (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
+
+                       tpe= getVarType(mb,getArg(p,0));
+                       for(k=1; k< mat[m]->argc; k++){
+                               q= copyInstruction(p);
+                               if( k < mat[m]->argc-1)
+                                       getArg(q,0)= getArg(p,0);
+                               else{
+                                       getArg(q,0) = newTmpVariable(mb, tpe);
+                                       getArg(p,1)= getArg(q,0);
+                               }
+                               getArg(q,fm) = getArg(mat[m],k);
+                               pushInstruction(mb,q);
+                       }
+                       freeInstruction(p);
+                       continue;
+               }
[EMAIL PROTECTED]
 Not all instructions can be replaced by the sequence. We have to
 group them and check for them individually.
 @c
@@ -321,12 +385,13 @@
                                if( r)
                                        r= pushArgument(mb,r,getArg(q,0));
                        }
+                       freeInstruction(p);
                        actions++;
                        if(r){
                                mat[mtop]= r;
                                mvar[mtop] = getArg(r,0);
                                mtop++;
-                               pushInstruction(mb,r);
+                               /* pushInstruction(mb,r);*/
                        }
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"Fragment result \n");
@@ -335,28 +400,27 @@
                        continue;
                } 
 @-
-The insertions are sent to the last component of the MAT.
-Selection of the proper componetn based on range descriptors will follow.
[EMAIL PROTECTED]
-               if(     getModuleId(p)== batRef && getFunctionId(p)== insertRef 
&&
-                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
-                       getArg(p,1) = getArg(mat[m],mat[m]->argc-1);
-                       pushInstruction(mb,p);
-                       continue;
-               } 
[EMAIL PROTECTED]
+Aggregate handling is a prime target for optimization.
+The simple cases are dealt with first.
 Handle the rewrite v:=aggr.count(b) and sum()
+And the min/max is as easy
 @c
-               if( ((getModuleId(p)==aggrRef && getFunctionId(p)== countRef) 
|| 
-                       (getModuleId(p)==aggrRef && getFunctionId(p)==sumRef && 
p->argc==2)) &&
-                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+               if( getModuleId(p)==aggrRef && 
+                       ( getFunctionId(p)== countRef || 
+                       ( getFunctionId(p)== minRef && p->argc==2) ||
+                       ( getFunctionId(p)== maxRef && p->argc==2) ||
+                       ( getFunctionId(p)== sumRef && p->argc==2)) &&
+                       (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
 #ifdef DEBUG_OPT_MERGETABLE
                        stream_printf(GDKout,"count/sum \n");
                        printInstruction(GDKout,mb,p,0);
 #endif
                        r = newInstruction(mb,ASSIGNsymbol);
                        getArg(r,0)= getArg(p,0);
-                       r= pushInt(mb,r,0);
+                       if( getFunctionId(p)== minRef || 
getFunctionId(p)==maxRef)
+                               r= pushNil(mb,r,getVarType(mb,getArg(p,0)));
+                       else
+                               r= pushZero(mb,r,getVarType(mb,getArg(p,0)));
                        pushInstruction(mb,r);
                        for(k=1; k< mat[m]->argc; k++){
                                int v= newTmpVariable(mb,TYPE_int);
@@ -369,7 +433,10 @@
 
                                q= newInstruction(mb,ASSIGNsymbol);
                                setModuleId(q,calcRef);
-                               setFunctionId(q,plusRef);
+                               if( getFunctionId(p)== minRef || 
getFunctionId(p)==maxRef)
+                                       setFunctionId(q,getFunctionId(p));
+                               else
+                                       setFunctionId(q,plusRef);
                                getArg(q,0)= getArg(r,0);
                                q= pushArgument(mb,q,getArg(r,0));
                                q= pushArgument(mb,q,v);
@@ -381,39 +448,8 @@
                        continue;
                } 
 @-
-And the min/max is as easy
[EMAIL PROTECTED]
-               if( ((getModuleId(p)==aggrRef && getFunctionId(p)== maxRef) || 
-                       (getModuleId(p)==aggrRef && getFunctionId(p)==minRef )) 
&&
-                       p->argc==3 &&
-                       (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
-                       r = newInstruction(mb,ASSIGNsymbol);
-                       getArg(r,0)= getArg(p,0);
-                       r= pushNil(mb,r,getArgType(mb,p,0));
-                       pushInstruction(mb,r);
-                       for(k=1; k< mat[m]->argc; k++){
-                               int v= newTmpVariable(mb,TYPE_int);
-                               q= newInstruction(mb,ASSIGNsymbol);
-                               setModuleId(q,aggrRef);
-                               setFunctionId(q,getFunctionId(p));
-                               getArg(q,0)= v;
-                               q= pushArgument(mb,q,getArg(mat[m],k));
-                               pushInstruction(mb,q);
-
-                               q= newInstruction(mb,ASSIGNsymbol);
-                               setModuleId(q,calcRef);
-                               setFunctionId(q,plusRef);
-                               getArg(q,0)= getArg(r,0);
-                               q= pushArgument(mb,q,getArg(r,0));
-                               q= pushArgument(mb,q,v);
-                               pushInstruction(mb,q);
-                       }
-                       continue;
-               } 
[EMAIL PROTECTED]
-All other instructions should be checked for a MAT dependency.
-It require the MAT to be materialized. We drop the MAT
-afterwards for further consideration.
+All other instructions should be checked for remaining MAT dependencies.
+It require the MAT to be materialized. 
 @c
                for( k= p->retc; k<p->argc; k++)
                if(     (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0){
@@ -426,7 +462,7 @@
                        break;
                }
 #ifdef DEBUG_OPT_MERGETABLE
-                       stream_printf(GDKout,"last catc\n");
+                       stream_printf(GDKout,"last option\n");
                        printInstruction(GDKout,mb,p,0);
 #endif
                pushInstruction(mb,p);


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to