Changeset: 1e547f85bbdf for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1e547f85bbdf
Modified Files:
monetdb5/optimizer/Tests/dataflow5.stable.out
monetdb5/optimizer/opt_dataflow.c
Branch: default
Log Message:
Rewritting the dataflow optimizer
A major code cleanup, avoiding duplicate code and better structuring.
No functional changes, except for slightly better control over de
dataflow blocks in face of multiple assigments.
diffs (truncated from 449 to 300 lines):
diff --git a/monetdb5/optimizer/Tests/dataflow5.stable.out
b/monetdb5/optimizer/Tests/dataflow5.stable.out
--- a/monetdb5/optimizer/Tests/dataflow5.stable.out
+++ b/monetdb5/optimizer/Tests/dataflow5.stable.out
@@ -37,12 +37,14 @@ function user.tst():void;
bat.insert(b,1@0,2);
bat.insert(b,2@0,3);
io.print(b);
+end tst;
function user.tst():void;
b := bat.new(:oid,:int);
bat.insert(b,0@0,1);
bat.insert(b,1@0,2);
bat.insert(b,2@0,3);
io.print(b);
+end tst;
# 16:07:31 >
# 16:07:31 > "Done."
diff --git a/monetdb5/optimizer/opt_dataflow.c
b/monetdb5/optimizer/opt_dataflow.c
--- a/monetdb5/optimizer/opt_dataflow.c
+++ b/monetdb5/optimizer/opt_dataflow.c
@@ -33,10 +33,13 @@
* a dataflow block and who are used multiple times. They should be
* garbage collected outside the parallel block.
*/
+
+#define isSimple(p) (getModuleId(p) == calcRef || getModuleId(p) == mtimeRef
|| getModuleId(p) == strRef || getModuleId(p)== mmathRef || \
+ p->token == ENDsymbol || getFunctionId(p) == multiplexRef ||
blockCntrl(p) || blockStart(p) || blockExit(p))
static int
simpleFlow(InstrPtr *old, int start, int last)
{
- int i, j, simple = TRUE;
+ int i, j, k, simple = TRUE;
InstrPtr p = NULL, q;
/* skip simple first */
@@ -52,8 +55,9 @@ simpleFlow(InstrPtr *old, int start, int
if( !simple) {
simple = FALSE;
for( j= q->retc; j < q->argc; j++)
- if( getArg(p,0) == getArg(q,j))
- simple= TRUE;
+ for( k =0; k < p->retc; k++)
+ if( getArg(p,k) == getArg(q,j))
+ simple= TRUE;
if( !simple)
return 0;
}
@@ -118,16 +122,25 @@ void removeDataflow(MalBlkPtr mb)
GDKfree(delete);
}
+// take care of side-effects in updates
+static void setAssigned(InstrPtr p, int k, int *assigned){
+ if ( isUpdateInstruction(p) || hasSideEffects(p,TRUE))
+ assigned[getArg(p,p->retc)] ++;
+ assigned[getArg(p,k)]++;
+}
+
static int
-dflowAssignTest(Lifespan span, InstrPtr p, int i)
+dflowAssignConflict(InstrPtr p, int *assigned)
{
int j;
- /* flow blocks should be closed (and not opened) when we reach a point
- where a variable is assigned that is not the last
+ /* flow blocks should be closed when we reach a point
+ where a variable is assigned more then once
*/
for(j=0; j<p->retc; j++)
- if (getLastUpdate(span, getArg(p,j)) != i)
+ if ( assigned[getArg(p,j)] )
return 1;
+ if ( isUpdateInstruction(p) && assigned[getArg(p,p->retc)] )
+ return 1;
return 0;
}
@@ -140,25 +153,24 @@ dflowAssignTest(Lifespan span, InstrPtr
/* a limited set of MAL instructions may appear in the dataflow block*/
static int
-dflowInstruction(InstrPtr p) {
+dflowConflict(InstrPtr p) {
+ if ( p->token == ENDsymbol || getFunctionId(p) == multiplexRef ||
blockCntrl(p) || blockStart(p) || blockExit(p))
+ return TRUE;
switch(p->token){
case ASSIGNsymbol:
case PATcall:
case CMDcall:
case FACcall:
case FCNcall:
- return ! ( hasSideEffects(p,FALSE) || isUnsafeFunction(p)
|| blockCntrl(p) );
+ return ( hasSideEffects(p,FALSE) || isUnsafeFunction(p)
);
}
- return FALSE;
+ return TRUE;
}
static int
-dflowGarbagesink(MalBlkPtr mb, InstrPtr *old, int start, int last, int var,
InstrPtr *sink, int top){
+dflowGarbagesink(MalBlkPtr mb, int var, InstrPtr *sink, int top){
InstrPtr r;
- (void) start;
- (void) last;
- (void) old;
r = newInstruction(NULL,ASSIGNsymbol);
getModuleId(r) = languageRef;
getFunctionId(r) = passRef;
@@ -168,219 +180,165 @@ dflowGarbagesink(MalBlkPtr mb, InstrPtr
return top;
}
-static void
-DFLOWinitvars(MalBlkPtr mb, InstrPtr p,int start, int last, Lifespan span,
char *init)
-{
- int k;
- for( k=0; k<p->retc; k++)
- if( getBeginLifespan(span,getArg(p,k)) >= start &&
getEndLifespan(span,getArg(p,k)) >= last && init[getArg(p,k)]==0){
- InstrPtr r= newAssignment(mb);
- getArg(r,0)= getArg(p,k);
- pushNil(mb,r,getArgType(mb,p,k));
- init[getArg(p,k)]=1;
- }
-}
-
-#define copyblock() \
- for( j=start ; j<i; j++) if (old[j]) pushInstruction(mb,old[j]);\
- for( j=0; j<top; j++) pushInstruction(mb,sink[j]);
-
-#define exitblock() \
- if (!sf && entries>1){ \
- q= newAssignment(mb); \
- q->barrier= EXITsymbol; \
- getArg(q,0) = flowblock; \
- }\
- memset((char*) usage, 0, vlimit);\
- entries = flowblock = 0;\
- actions++;
-
/* dataflow blocks are transparent, because they are always
executed, either sequentially or in parallell */
-#define startblock()\
- q= newFcnCall(mb,languageRef,dataflowRef);\
- q->barrier= BARRIERsymbol;\
- getArg(q,0)= flowblock;\
- varSetProperty(mb, getArg(q,0), "transparent",0,0);
int
OPTdataflowImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
p)
{
- int i,j,k, var, cnt, start=1,entries=0, actions=0;
- int flowblock= 0, dumbcopy=0;
- InstrPtr *sink, *old, q;
+ int i,j,k, start=1, conflict, actions=0, simple = TRUE;
+ int flowblock= 0;
+ InstrPtr *sink = NULL, *old = NULL, q;
int limit, slimit, vlimit, top = 0;
- Lifespan span;
- char *init;
- int *usage;
+ char *init = NULL;
+ int *used = NULL, *assigned = NULL, *eolife = NULL;
/* don't use dataflow on single processor systems */
if (GDKnr_threads <= 1)
return 0;
- (void) cntxt;
(void) stk;
/* inlined functions will get their dataflow control later */
if ( varGetProp(mb, getArg(getInstrPtr(mb,0),0),inlineProp)!= NULL)
return 0;
- span= setLifespan(mb);
- if( span == NULL)
- return 0;
- init= (char*) GDKzalloc(mb->vtop);
- if ( init == NULL){
- GDKfree(span);
- return 0;
- }
- usage= (int*) GDKzalloc(vlimit = mb->vtop * sizeof(int));
- if ( usage == NULL){
- GDKfree(span);
- GDKfree(init);
- return 0;
- }
- sink= (InstrPtr*) GDKzalloc(mb->vsize * sizeof(InstrPtr));
- if ( sink == NULL){
- GDKfree(span);
- GDKfree(init);
- GDKfree(usage);
- return 0;
+ OPTDEBUGdataflow{
+ mnstr_printf(cntxt->fdout,"#dataflow input\n");
+ printFunction(cntxt->fdout, mb, 0, LIST_MAL_STMT);
}
+ vlimit = mb->vsize;
+ eolife= (int*) GDKzalloc(vlimit * sizeof(int));
+ init= (char*) GDKzalloc(mb->vtop);
+ used= (int*) GDKzalloc(vlimit * sizeof(int));
+ sink= (InstrPtr*) GDKzalloc(vlimit * sizeof(InstrPtr));
+ assigned= (int*) GDKzalloc(vlimit * sizeof(int));
+ if (eolife == NULL || init == NULL || used == NULL || sink == NULL ||
assigned == NULL)
+ goto wrapup;
+
limit= mb->stop;
slimit= mb->ssize;
old = mb->stmt;
- if ( newMalBlkStmt(mb, mb->ssize+mb->vtop) <0 ){
- GDKfree(span);
- GDKfree(init);
- GDKfree(usage);
- GDKfree(sink);
- return 0;
+ // collect end of variable lives
+ for (i = 0; i<limit; i++) {
+ p = old[i];
+ assert( p);
+ for (j = 0; j < p->argc; j++)
+ eolife[getArg(p,j)]= i;
}
+
+ if ( newMalBlkStmt(mb, mb->ssize+mb->vtop) <0 )
+ goto wrapup;
+
pushInstruction(mb,old[0]);
- //removeDataflow(mb); To be done explicit by optimizers
-
/* inject new dataflow barriers */
for (i = 1; i<limit; i++) {
p = old[i];
+ assert(p);
+ conflict = 0;
- if (p == NULL)
- continue;
-
- if (p->token == ENDsymbol)
- break;
- if (!dflowInstruction(p) || (!dumbcopy && blockExit(p)) ||
dflowAssignTest(span,p,i) ){
- /* close old flow block */
- if (flowblock){
- int sf = simpleFlow(old,start,i);
- top = 0;
- if (!sf && entries > 1){
- for( j=start ; j<i; j++)
- if (old[j]) {
- DFLOWinitvars(mb, old[j],
start, i, span, init);
- /* collect variables garbage
collected within the block */
- for( k=old[j]->retc;
k<old[j]->argc; k++)
- if(
getEndLifespan(span, var = getArg(old[j],k)) == j && usage[var]>=1 &&
isaBatType(getVarType(mb,var)))
- top =
dflowGarbagesink(mb,old, start, i, getArg(old[j],k), sink,top);
- else
- if(
getEndLifespan(span,getArg(old[j],k)) < i && isaBatType(getVarType(mb,var)))
-
usage[getArg(old[j],k)]++;
- assert(top <mb->vsize);
+ if ( dflowConflict(p) || (conflict =
dflowAssignConflict(p,assigned)) ) {
+ /* close previous flow block */
+ if ( !(simple = simpleFlow(old,start,i))){
+ for( j=start ; j<i; j++){
+ q = old[j];
+ // initialize variables used beyond the
dataflow block
+ for( k=0; k<p->retc; k++)
+ if( eolife[getArg(q,k)] >= i &&
init[getArg(q,k)]==0){
+ InstrPtr r= newAssignment(mb);
+ getArg(r,0)= getArg(q,k);
+
pushNil(mb,r,getArgType(mb,q,k));
+ init[getArg(r,0)]=1;
}
- startblock();
+ // collect BAT variables garbage
collected within the block
+ for( k=q->retc; k<q->argc; k++)
+ if (
isaBatType(getVarType(mb,getArg(q,k))) ){
+ if( eolife[getArg(q,k)] == j &&
used[getArg(q,k)]>=1 )
+ top =
dflowGarbagesink(mb, getArg(q,k), sink, top);
+ else
+ if( eolife[getArg(q,k)] < i )
+ used[getArg(q,k)]++;
+ }
}
- copyblock();
- exitblock();
+ flowblock = newTmpVariable(mb,TYPE_bit);
+ q= newFcnCall(mb,languageRef,dataflowRef);\
+ q->barrier= BARRIERsymbol;\
+ getArg(q,0)= flowblock;\
+ varSetProperty(mb, getArg(q,0),
"transparent",0,0);
}
- pushInstruction(mb,p);
- continue;
+ //copyblock
+ for( j=start ; j<i; j++)
+ pushInstruction(mb,old[j]);
+ // force the pending final garbage statements
+ for( j=0; j<top; j++)
+ pushInstruction(mb,sink[j]);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list