Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv16455
Modified Files:
opt_mergetable.mx
Log Message:
Proper handling of kdifference, updates and aggregates brings
the number of SQL tests for further study down to 38.
Index: opt_mergetable.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_mergetable.mx,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- opt_mergetable.mx 17 Aug 2007 13:56:55 -0000 1.13
+++ opt_mergetable.mx 19 Aug 2007 07:38:02 -0000 1.14
@@ -33,7 +33,7 @@
level operations is aware of their existence.
In the first approach of the MAT optimizer we assume that the
-last BAT in the MAT sequence is used as an accumulator.
+first BAT in the MAT sequence is used as an accumulator.
Furthermore, no semantic knowledge is used to reduce the
possible superflous (semi)joins. Instead, we limit expansion
to a single argument.
@@ -63,7 +63,7 @@
_34 := algebra.select(m1,1,3);
_35 := algebra.select(m2,1,3);
s := mat.new(_33,_34,_35);
- i := 0;
+ i := 0:int;
_36 := aggr.count(_33);
i := calc.+(i,_36);
_37 := aggr.count(_34);
@@ -92,14 +92,25 @@
j := mat.new(_39,_40);
@end example
-The drawback of this scheme is the explosion in MAL statements.
-The system could also use iterators to avoid it.
+The drawback of the scheme is the potential explosion in MAL statements.
+A challenge of the optimizer is to find the minimum by inspection of
+the properties of the MAT elements. For example, it might attempt
+to partially pack elements before proceding. This would be a runtime
+scheduling decision.
+
+Alternative, the system could use MAT iterators to avoid it.
+At the cost of more complex program analysis.
@example
-barrier m:= mat.newIterator(m0,m1,m2);
- io.print(m);
- redo m:= mat.newIterator(m0,m1,m2);
-exit m;
+ ji:= bat.new(:oid,:int);
+barrier b:= mat.newIterator(m0,m1,m2);
+barrier c:= mat.newIterator(c0,c1);
+ ji := algebra.join(b,c);
+ bat.insert(j,ji);
+ redo c:= mat.newIterator(c0,c1);
+ redo b:= mat.newIterator(m0,m1,m2);
+exit c;
+exit b;
@end example
@{
Also consider the MAT as a variable property. This is needed
@@ -243,13 +254,19 @@
getArg(p,0)= getArg(q,0);
getArg(p2,1)= getArg(p,0);
getArg(q,0)= v;
- q= pushArgument(mb,q,getArg(p,0));
- q= pushArgument(mb,q,getArg(p2,0));
+ q= pushArgument(mb, q, getArg(p,0));
+ q= pushArgument(mb, q, getArg(p2,0));
+
+ /* the oid ranges should be sequentially aligned */
+ r = newStmt(mb,batRef, putName("setBase",7));
+ r= pushArgument(mb, r, getArg(p,0));
+ r= pushArgument(mb, r, getArg(p2,0));
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"new block\n");
printInstruction(GDKout,mb,p,0);
printInstruction(GDKout,mb,p2,0);
printInstruction(GDKout,mb,q,0);
+ printInstruction(GDKout,mb,r,0);
#endif
mat[mtop]= q;
mvar[mtop] = v;
@@ -274,6 +291,7 @@
Pack MAT arguments, except one, to limit plan explosion.
The preferred partitioned one is the first argment as it
often reflects a base table.
+Look at the depth of the MAT definition to limit the explosion.
@c
for( fm= p->argc-1; fm>p->retc ; fm--)
if( (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0)
@@ -290,9 +308,55 @@
match--;
}
@-
-Look at the depth of the MAT definition to limit the explosion.
-
-Left with an instruction with one MAT we can replace it.
+From here we have to differentiate between the operations.
+This simply means that any SQL bat is extended with an always
+empty second component. If all work, we switch to the last.
+Their semantics should be respected while producing code.
[EMAIL PROTECTED]
+The insertions are sent to the first component of the MAT.
+Selection of the proper component based on e.g. range descriptors is for
+later.
[EMAIL PROTECTED]
+ if( getModuleId(p)== batRef &&
+ ( getFunctionId(p)== insertRef ||
+ getFunctionId(p)== appendRef
+ ) &&
+ (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+ getArg(p,1) = getArg(mat[m],mat[m]->retc);
+ pushInstruction(mb,p);
+ continue;
+ }
[EMAIL PROTECTED]
+The kdifference operator is a good example where the
+action does not produce multiple results, but requires
+an accumulation implementation.
[EMAIL PROTECTED]
+A similar situation occurs in delete operations
+using partitioned input.
[EMAIL PROTECTED]
+ if(((getModuleId(p)== algebraRef &&
+ getFunctionId(p) == kdifferenceRef) ||
+ (getModuleId(p)== batRef &&
+ getFunctionId(p) == deleteRef) ) &&
+ fm == 2 &&
+ (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
+
+ tpe= getVarType(mb,getArg(p,0));
+ for(k=1; k< mat[m]->argc; k++){
+ q= copyInstruction(p);
+ if( k < mat[m]->argc-1)
+ getArg(q,0)= getArg(p,0);
+ else{
+ getArg(q,0) = newTmpVariable(mb, tpe);
+ getArg(p,1)= getArg(q,0);
+ }
+ getArg(q,fm) = getArg(mat[m],k);
+ pushInstruction(mb,q);
+ }
+ freeInstruction(p);
+ continue;
+ }
[EMAIL PROTECTED]
Not all instructions can be replaced by the sequence. We have to
group them and check for them individually.
@c
@@ -321,12 +385,13 @@
if( r)
r= pushArgument(mb,r,getArg(q,0));
}
+ freeInstruction(p);
actions++;
if(r){
mat[mtop]= r;
mvar[mtop] = getArg(r,0);
mtop++;
- pushInstruction(mb,r);
+ /* pushInstruction(mb,r);*/
}
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"Fragment result \n");
@@ -335,28 +400,27 @@
continue;
}
@-
-The insertions are sent to the last component of the MAT.
-Selection of the proper componetn based on range descriptors will follow.
[EMAIL PROTECTED]
- if( getModuleId(p)== batRef && getFunctionId(p)== insertRef
&&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
- getArg(p,1) = getArg(mat[m],mat[m]->argc-1);
- pushInstruction(mb,p);
- continue;
- }
[EMAIL PROTECTED]
+Aggregate handling is a prime target for optimization.
+The simple cases are dealt with first.
Handle the rewrite v:=aggr.count(b) and sum()
+And the min/max is as easy
@c
- if( ((getModuleId(p)==aggrRef && getFunctionId(p)== countRef)
||
- (getModuleId(p)==aggrRef && getFunctionId(p)==sumRef &&
p->argc==2)) &&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+ if( getModuleId(p)==aggrRef &&
+ ( getFunctionId(p)== countRef ||
+ ( getFunctionId(p)== minRef && p->argc==2) ||
+ ( getFunctionId(p)== maxRef && p->argc==2) ||
+ ( getFunctionId(p)== sumRef && p->argc==2)) &&
+ (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"count/sum \n");
printInstruction(GDKout,mb,p,0);
#endif
r = newInstruction(mb,ASSIGNsymbol);
getArg(r,0)= getArg(p,0);
- r= pushInt(mb,r,0);
+ if( getFunctionId(p)== minRef ||
getFunctionId(p)==maxRef)
+ r= pushNil(mb,r,getVarType(mb,getArg(p,0)));
+ else
+ r= pushZero(mb,r,getVarType(mb,getArg(p,0)));
pushInstruction(mb,r);
for(k=1; k< mat[m]->argc; k++){
int v= newTmpVariable(mb,TYPE_int);
@@ -369,7 +433,10 @@
q= newInstruction(mb,ASSIGNsymbol);
setModuleId(q,calcRef);
- setFunctionId(q,plusRef);
+ if( getFunctionId(p)== minRef ||
getFunctionId(p)==maxRef)
+ setFunctionId(q,getFunctionId(p));
+ else
+ setFunctionId(q,plusRef);
getArg(q,0)= getArg(r,0);
q= pushArgument(mb,q,getArg(r,0));
q= pushArgument(mb,q,v);
@@ -381,39 +448,8 @@
continue;
}
@-
-And the min/max is as easy
[EMAIL PROTECTED]
- if( ((getModuleId(p)==aggrRef && getFunctionId(p)== maxRef) ||
- (getModuleId(p)==aggrRef && getFunctionId(p)==minRef ))
&&
- p->argc==3 &&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
- r = newInstruction(mb,ASSIGNsymbol);
- getArg(r,0)= getArg(p,0);
- r= pushNil(mb,r,getArgType(mb,p,0));
- pushInstruction(mb,r);
- for(k=1; k< mat[m]->argc; k++){
- int v= newTmpVariable(mb,TYPE_int);
- q= newInstruction(mb,ASSIGNsymbol);
- setModuleId(q,aggrRef);
- setFunctionId(q,getFunctionId(p));
- getArg(q,0)= v;
- q= pushArgument(mb,q,getArg(mat[m],k));
- pushInstruction(mb,q);
-
- q= newInstruction(mb,ASSIGNsymbol);
- setModuleId(q,calcRef);
- setFunctionId(q,plusRef);
- getArg(q,0)= getArg(r,0);
- q= pushArgument(mb,q,getArg(r,0));
- q= pushArgument(mb,q,v);
- pushInstruction(mb,q);
- }
- continue;
- }
[EMAIL PROTECTED]
-All other instructions should be checked for a MAT dependency.
-It require the MAT to be materialized. We drop the MAT
-afterwards for further consideration.
+All other instructions should be checked for remaining MAT dependencies.
+It require the MAT to be materialized.
@c
for( k= p->retc; k<p->argc; k++)
if( (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0){
@@ -426,7 +462,7 @@
break;
}
#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"last catc\n");
+ stream_printf(GDKout,"last option\n");
printInstruction(GDKout,mb,p,0);
#endif
pushInstruction(mb,p);
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins