Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26665/src/optimizer
Modified Files:
Tag: GDK-2
opt_garbageCollector.mx opt_mergetable.mx opt_support.mx
Log Message:
propagated changes of Friday Aug 17 2007 - Tuesday Aug 21 2007
from the development trunk to the GDK-2 branch
Index: opt_garbageCollector.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_garbageCollector.mx,v
retrieving revision 1.25.2.1
retrieving revision 1.25.2.2
diff -u -d -r1.25.2.1 -r1.25.2.2
--- opt_garbageCollector.mx 17 Aug 2007 15:38:02 -0000 1.25.2.1
+++ opt_garbageCollector.mx 21 Aug 2007 13:24:30 -0000 1.25.2.2
@@ -94,6 +94,7 @@
#include "mal_interpreter.h" /* for showErrors() */
#include "mal_builder.h"
#include "opt_prelude.h"
+#include "mal_properties.h"
@-
There are two basic ways to release a BAT. The cheapest one is
@@ -147,6 +148,22 @@
actions++;
}
@}
[EMAIL PROTECTED]
+Garbage collection is not always required, even unwanted.
+For example, the BATs used to assemble the pieces of
+an XML document should be released after it has been
+consumed in e.g. xml.str().
+
+Keeping variables around beyond their end-of-life-span
+can be marked with the proper 'keep'.
[EMAIL PROTECTED]
+static int
+OPTkeepVariable(MalBlkPtr mb, InstrPtr p, int idx){
+ static char *keep=0;
+ if( keep == 0)
+ keep= putName("keep",4);
+ return isPropertyDefined( getProps(mb, getArg(p,idx)),keep);
+}
@c
static int
OPTgarbageCollectorImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -181,7 +198,7 @@
if (getArg(p, j) == getArg(p, k))
done++;
- if (done == 0 ){
+ if (done == 0 && !OPTkeepVariable(mb,p,j)){
#ifdef DEBUG_OPT_GARBAGE
printf("remove the variable %s at %d\n",
getArgName(mb,p,j),i);
#endif
Index: opt_mergetable.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_mergetable.mx,v
retrieving revision 1.12.2.1
retrieving revision 1.12.2.2
diff -u -d -r1.12.2.1 -r1.12.2.2
--- opt_mergetable.mx 17 Aug 2007 15:38:09 -0000 1.12.2.1
+++ opt_mergetable.mx 21 Aug 2007 13:24:30 -0000 1.12.2.2
@@ -33,7 +33,7 @@
level operations is aware of their existence.
In the first approach of the MAT optimizer we assume that the
-last BAT in the MAT sequence is used as an accumulator.
+first BAT in the MAT sequence is used as an accumulator.
Furthermore, no semantic knowledge is used to reduce the
possible superflous (semi)joins. Instead, we limit expansion
to a single argument.
@@ -63,7 +63,7 @@
_34 := algebra.select(m1,1,3);
_35 := algebra.select(m2,1,3);
s := mat.new(_33,_34,_35);
- i := 0;
+ i := 0:int;
_36 := aggr.count(_33);
i := calc.+(i,_36);
_37 := aggr.count(_34);
@@ -92,15 +92,27 @@
j := mat.new(_39,_40);
@end example
-The drawback of this scheme is the explosion in MAL statements.
-The system could also use iterators to avoid it.
+The drawback of the scheme is the potential explosion in MAL statements.
+A challenge of the optimizer is to find the minimum by inspection of
+the properties of the MAT elements. For example, it might attempt
+to partially pack elements before proceding. This would be a runtime
+scheduling decision.
+
+Alternative, the system could use MAT iterators to avoid it.
+At the cost of more complex program analysis afterwards.
@example
-barrier m:= mat.newIterator(m0,m1,m2);
- io.print(m);
- redo m:= mat.newIterator(m0,m1,m2);
-exit m;
+ ji:= bat.new(:oid,:int);
+barrier b:= mat.newIterator(m0,m1,m2);
+barrier c:= mat.newIterator(c0,c1);
+ ji := algebra.join(b,c);
+ bat.insert(j,ji);
+ redo c:= mat.newIterator(c0,c1);
+ redo b:= mat.newIterator(m0,m1,m2);
+exit c;
+exit b;
@end example
+
@{
Also consider the MAT as a variable property. This is needed
to pass a MAT as an argument to functions. If the function does
@@ -188,42 +200,15 @@
}
return cnt;
}
-
-static int
-OPTmergetableImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){
- InstrPtr *old=0, q,r, mat[256];
- int oldtop,fm,i,k,m,mtop=0, mvar[256],tpe;
- int size,match,actions=0;
-#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"Start MAT optimizer\n");
- printFunction(GDKout, mb, 0);
-#endif
-
- old = mb->stmt;
- oldtop= mb->stop;
- size = (mb->stop *1.2 < mb->ssize)? mb->ssize: (int) (mb->stop *1.2);
- mb->stmt = (InstrPtr *) GDKzalloc(size * sizeof(InstrPtr));
- mb->ssize = size ;
- mb->stop = 0;
-
-
- for( i=0; i<oldtop; i++){
- p= old[i];
- if( (getModuleId(p)== matRef && getFunctionId(p)==newRef) ||
- (getModuleId(p)== matRef && getFunctionId(p)==packRef)
){
- mat[mtop]= p;
- mvar[mtop] = getArg(p,0);
- mtop++;
- pushInstruction(mb,p);
- continue;
- }
@-
A sql.bind() operation should be trapped to inspect the partition catalog
for definitions. For now we simulate a partitioned table, comprised
of the original base and an empty one.
@c
- if( (getModuleId(p)== sqlRef && getFunctionId(p)==bindRef)){
- InstrPtr p2;
+static InstrPtr
+MATpartitionTableDummy(MalBlkPtr mb, InstrPtr p)
+{
+ InstrPtr q,r,p2;
int v= getArg(p,0);
freezeVarType(mb,getArg(p,0));
@@ -243,14 +228,60 @@
getArg(p,0)= getArg(q,0);
getArg(p2,1)= getArg(p,0);
getArg(q,0)= v;
- q= pushArgument(mb,q,getArg(p,0));
- q= pushArgument(mb,q,getArg(p2,0));
+ q= pushArgument(mb, q, getArg(p,0));
+ q= pushArgument(mb, q, getArg(p2,0));
+
+ /* the oid ranges should be sequentially aligned */
+ r = newStmt(mb,batRef, putName("setBase",7));
+ r= pushArgument(mb, r, getArg(p,0));
+ r= pushArgument(mb, r, getArg(p2,0));
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"new block\n");
printInstruction(GDKout,mb,p,0);
printInstruction(GDKout,mb,p2,0);
printInstruction(GDKout,mb,q,0);
+ printInstruction(GDKout,mb,r,0);
+#endif
+ return q;
+}
+
+static int
+OPTmergetableImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){
+ InstrPtr *old=0, q,r, *mat;
+ int oldtop,fm,i,k,m,mtop=0, *mvar,tpe;
+ int size,match,actions=0;
+
+#ifdef DEBUG_OPT_MERGETABLE
+ stream_printf(GDKout,"Start MAT optimizer\n");
+ printFunction(GDKout, mb, 0);
#endif
+
+ old = mb->stmt;
+ oldtop= mb->stop;
+ size = (mb->stop *1.2 < mb->ssize)? mb->ssize: (int) (mb->stop *1.2);
+ mb->stmt = (InstrPtr *) GDKzalloc(size * sizeof(InstrPtr));
+ mb->ssize = size ;
+ mb->stop = 0;
+
+ /* the number of MATs is limited to the variable stack*/
+ mat= (InstrPtr*) alloca(mb->vtop * sizeof(InstrPtr));
+ memset((char*) mat, 0, mb->vtop * sizeof(InstrPtr));
+ mvar= (int*) alloca(mb->vtop * sizeof(int));
+ memset((char*) mvar, 0, mb->vtop * sizeof(InstrPtr));
+
+ for( i=0; i<oldtop; i++){
+ p= old[i];
+ if( (getModuleId(p)== matRef && getFunctionId(p)==newRef) ||
+ (getModuleId(p)== matRef && getFunctionId(p)==packRef)
){
+ mat[mtop]= p;
+ mvar[mtop] = getArg(p,0);
+ mtop++;
+ pushInstruction(mb,p);
+ continue;
+ }
+ if( (getModuleId(p)== sqlRef && getFunctionId(p)==bindRef)){
+ int v= getArg(p,0);
+ q= MATpartitionTableDummy(mb,p);
mat[mtop]= q;
mvar[mtop] = v;
mtop++;
@@ -274,13 +305,14 @@
Pack MAT arguments, except one, to limit plan explosion.
The preferred partitioned one is the first argment as it
often reflects a base table.
+Look at the depth of the MAT definition to limit the explosion.
@c
for( fm= p->argc-1; fm>p->retc ; fm--)
if( (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0)
break;
for( k=fm-1; match > 1 && k >= p->retc ; k--)
- if( (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0){
+ if( (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0 ){
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"Pack k=%d\n",k);
printInstruction(GDKout,mb,p,0);
@@ -290,9 +322,66 @@
match--;
}
@-
-Look at the depth of the MAT definition to limit the explosion.
-
-Left with an instruction with one MAT we can replace it.
+From here we have to differentiate between the operations.
+This simply means that any SQL bat is extended with an always
+empty second component. If all work, we switch to the last.
+Their semantics should be respected while producing code.
[EMAIL PROTECTED]
+The insertions are sent to the first component of the MAT.
+Selection of the proper component based on e.g. range descriptors is for
+later.
+
+CAVEAT. The MATs that represent partitions in SQL can not
+be packed to facilitate updates. We assume the first
+partition is dominant.
[EMAIL PROTECTED]
+ if( getModuleId(p)== batRef &&
+ ( getFunctionId(p)== insertRef ||
+ getFunctionId(p)== appendRef
+ ) &&
+ (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+ getArg(p,1) = getArg(mat[m],mat[m]->retc);
+ pushInstruction(mb,p);
+ continue;
+ }
[EMAIL PROTECTED]
+The kdifference operator is a good example where the
+action does not produce multiple results, but requires
+an accumulation implementation.
[EMAIL PROTECTED]
+A similar situation occurs in delete operations
+using partitioned input.
[EMAIL PROTECTED]
+ if(((getModuleId(p)== algebraRef &&
+ getFunctionId(p) == kdifferenceRef) ||
+ (getModuleId(p)== batRef &&
+ getFunctionId(p) == deleteRef) ) &&
+ fm == 2 &&
+ (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
+#ifdef DEBUG_OPT_MERGETABLE
+ stream_printf(GDKout,"kdifference resolution
fm=%d\n",fm);
+ printInstruction(GDKout,mb,p,0);
+#endif
+
+ tpe= getVarType(mb,getArg(p,0));
+ for(k=1; k< mat[m]->argc; k++){
+ q= copyInstruction(p);
+ if( k == mat[m]->argc-1)
+ getArg(q,0)= getArg(p,0);
+ else{
+ getArg(q,0) = newTmpVariable(mb, tpe);
+ getArg(p,1)= getArg(q,0);
+ }
+ getArg(q,fm) = getArg(mat[m],k);
+ pushInstruction(mb,q);
+#ifdef DEBUG_OPT_MERGETABLE
+ printInstruction(GDKout,mb,q,0);
+#endif
+ }
+ freeInstruction(p);
+ continue;
+ }
[EMAIL PROTECTED]
Not all instructions can be replaced by the sequence. We have to
group them and check for them individually.
@c
@@ -321,12 +410,13 @@
if( r)
r= pushArgument(mb,r,getArg(q,0));
}
+ freeInstruction(p);
actions++;
if(r){
mat[mtop]= r;
mvar[mtop] = getArg(r,0);
mtop++;
- pushInstruction(mb,r);
+ /* pushInstruction(mb,r);*/
}
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"Fragment result \n");
@@ -335,28 +425,27 @@
continue;
}
@-
-The insertions are sent to the last component of the MAT.
-Selection of the proper componetn based on range descriptors will follow.
[EMAIL PROTECTED]
- if( getModuleId(p)== batRef && getFunctionId(p)== insertRef
&&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
- getArg(p,1) = getArg(mat[m],mat[m]->argc-1);
- pushInstruction(mb,p);
- continue;
- }
[EMAIL PROTECTED]
+Aggregate handling is a prime target for optimization.
+The simple cases are dealt with first.
Handle the rewrite v:=aggr.count(b) and sum()
+And the min/max is as easy
@c
- if( ((getModuleId(p)==aggrRef && getFunctionId(p)== countRef)
||
- (getModuleId(p)==aggrRef && getFunctionId(p)==sumRef &&
p->argc==2)) &&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
+ if( getModuleId(p)==aggrRef &&
+ ( getFunctionId(p)== countRef ||
+ ( getFunctionId(p)== minRef && p->argc==2) ||
+ ( getFunctionId(p)== maxRef && p->argc==2) ||
+ ( getFunctionId(p)== sumRef && p->argc==2)) &&
+ (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"count/sum \n");
printInstruction(GDKout,mb,p,0);
#endif
r = newInstruction(mb,ASSIGNsymbol);
getArg(r,0)= getArg(p,0);
- r= pushInt(mb,r,0);
+ if( getFunctionId(p)== minRef ||
getFunctionId(p)==maxRef)
+ r= pushNil(mb,r,getVarType(mb,getArg(p,0)));
+ else
+ r= pushZero(mb,r,getVarType(mb,getArg(p,0)));
pushInstruction(mb,r);
for(k=1; k< mat[m]->argc; k++){
int v= newTmpVariable(mb,TYPE_int);
@@ -369,7 +458,10 @@
q= newInstruction(mb,ASSIGNsymbol);
setModuleId(q,calcRef);
- setFunctionId(q,plusRef);
+ if( getFunctionId(p)== minRef ||
getFunctionId(p)==maxRef)
+ setFunctionId(q,getFunctionId(p));
+ else
+ setFunctionId(q,plusRef);
getArg(q,0)= getArg(r,0);
q= pushArgument(mb,q,getArg(r,0));
q= pushArgument(mb,q,v);
@@ -381,39 +473,8 @@
continue;
}
@-
-And the min/max is as easy
[EMAIL PROTECTED]
- if( ((getModuleId(p)==aggrRef && getFunctionId(p)== maxRef) ||
- (getModuleId(p)==aggrRef && getFunctionId(p)==minRef ))
&&
- p->argc==3 &&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
- r = newInstruction(mb,ASSIGNsymbol);
- getArg(r,0)= getArg(p,0);
- r= pushNil(mb,r,getArgType(mb,p,0));
- pushInstruction(mb,r);
- for(k=1; k< mat[m]->argc; k++){
- int v= newTmpVariable(mb,TYPE_int);
- q= newInstruction(mb,ASSIGNsymbol);
- setModuleId(q,aggrRef);
- setFunctionId(q,getFunctionId(p));
- getArg(q,0)= v;
- q= pushArgument(mb,q,getArg(mat[m],k));
- pushInstruction(mb,q);
-
- q= newInstruction(mb,ASSIGNsymbol);
- setModuleId(q,calcRef);
- setFunctionId(q,plusRef);
- getArg(q,0)= getArg(r,0);
- q= pushArgument(mb,q,getArg(r,0));
- q= pushArgument(mb,q,v);
- pushInstruction(mb,q);
- }
- continue;
- }
[EMAIL PROTECTED]
-All other instructions should be checked for a MAT dependency.
-It require the MAT to be materialized. We drop the MAT
-afterwards for further consideration.
+All other instructions should be checked for remaining MAT dependencies.
+It require the MAT to be materialized.
@c
for( k= p->retc; k<p->argc; k++)
if( (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0){
@@ -426,7 +487,7 @@
break;
}
#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"last catc\n");
+ stream_printf(GDKout,"last option\n");
printInstruction(GDKout,mb,p,0);
#endif
pushInstruction(mb,p);
Index: opt_support.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_support.mx,v
retrieving revision 1.46.2.1
retrieving revision 1.46.2.2
diff -u -d -r1.46.2.1 -r1.46.2.2
--- opt_support.mx 17 Aug 2007 15:38:24 -0000 1.46.2.1
+++ opt_support.mx 21 Aug 2007 13:24:30 -0000 1.46.2.2
@@ -632,6 +632,17 @@
printFunction(GDKout, mb, LIST_MAL_ALL);
}
}
+ /* code to collect all last versions to study code coverage in SQL
+ {stream *fd;
+ char nme[25];
+ snprintf(nme,25,"/tmp/mal_%d",getpid());
+ fd= open_wastream(nme);
+ if( fd == NULL)
+ printf("Error in %s\n",nme);
+ printFunction(fd,mb,LIST_MAL_ALL);
+ stream_close(fd);
+ }
+ */
return mb->errors;
}
@-
@@ -1162,7 +1173,6 @@
getFunctionId(p)==uselectRef ||
getFunctionId(p)==likeselectRef ||
getFunctionId(p)==markTRef ||
- getFunctionId(p)==kdifferenceRef ||
getFunctionId(p)== joinRef ||
getFunctionId(p)== semijoinRef
) ) ||
@@ -1170,9 +1180,7 @@
getFunctionId(p)==reverseRef ||
getFunctionId(p)==mirrorRef ||
getFunctionId(p)== setAccessRef ||
- getFunctionId(p)== setWriteModeRef ||
- getFunctionId(p)== appendRef ||
- getFunctionId(p)== deleteRef
+ getFunctionId(p)== setWriteModeRef
) );
}
@}
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins