Changeset: d2bb1c337a68 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d2bb1c337a68
Modified Files:
        gdk/gdk_select.c
        monetdb5/mal/mal_factory.c
        monetdb5/modules/mal/mat.c
        monetdb5/optimizer/opt_dataflow.c
        monetdb5/optimizer/opt_pipes.c
Branch: default
Log Message:

Merge from Feb2013 branch.


diffs (truncated from 396 to 300 lines):

diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1143,6 +1143,9 @@ BATsubselect(BAT *b, BAT *s, const void 
                                /* linear extrapolation plus 10% margin */
                                estimate = (BUN) ((dbl) slct_cnt / (dbl) 
smpl_cnt 
                                                  * (dbl) BATcount(b) * 1.1);
+                       } else if (smpl_cnt > 0 && slct_cnt == 0) {
+                               /* estimate low enough to trigger hash select */
+                               estimate = (BATcount(b)/100) -1;
                        }
                }
                hash = hash && estimate < BATcount(b) / 100;
diff --git a/monetdb5/mal/mal_factory.c b/monetdb5/mal/mal_factory.c
--- a/monetdb5/mal/mal_factory.c
+++ b/monetdb5/mal/mal_factory.c
@@ -345,7 +345,8 @@ runFactory(Client cntxt, MalBlkPtr mb, M
        pl->inuse = 1;
        /* inherit debugging */
        cmd = stk->cmd;
-       assert(pl->stk != NULL); /* we should have a stack to run against */
+       if ( pl->stk == NULL)
+                       throw(MAL, "factory.new", "internal error, stack frame 
missing");
 
        /* copy the calling arguments onto the stack
           of the factory */
diff --git a/monetdb5/modules/mal/mat.c b/monetdb5/modules/mal/mat.c
--- a/monetdb5/modules/mal/mat.c
+++ b/monetdb5/modules/mal/mat.c
@@ -141,7 +141,9 @@ str
 MATpackIncrement(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
 {
        int *ret = (int*) getArgReference(stk,p,0);
+       int     pieces;
        BAT *b, *bb, *bn;
+       size_t newsize;
 
        (void) cntxt;
        b = BATdescriptor( stk->stk[getArg(p,1)].val.ival);
@@ -150,9 +152,16 @@ MATpackIncrement(Client cntxt, MalBlkPtr
 
        if ( getArgType(mb,p,2) == TYPE_int){
                /* first step */
-               bn = BATnew(TYPE_void, b->ttype, BATcount(b) * 
stk->stk[getArg(p,2)].val.ival);
+               pieces = stk->stk[getArg(p,2)].val.ival;
+               bn = BATnew(TYPE_void, b->ttype, BATcount(b) * pieces);
                if (bn == NULL)
                        throw(MAL, "mat.pack", MAL_MALLOC_FAIL);
+               /* allocate enough space for the strings */
+               if ( b->T->vheap && bn->T->vheap ){
+                       newsize =  b->T->vheap->size * pieces;
+                       if (HEAPextend(bn->T->vheap, newsize) < 0) 
+                               throw(MAL, "mat.pack", MAL_MALLOC_FAIL);
+               }
                BATsettrivprop(bn);
                BATseqbase(bn, b->H->seq);
                BATseqbase(BATmirror(bn), b->T->seq);
diff --git a/monetdb5/optimizer/opt_dataflow.c 
b/monetdb5/optimizer/opt_dataflow.c
--- a/monetdb5/optimizer/opt_dataflow.c
+++ b/monetdb5/optimizer/opt_dataflow.c
@@ -40,7 +40,8 @@ simpleFlow(InstrPtr *old, int start, int
        InstrPtr p = NULL, q;
 
        /* skip simple first */
-       for( ; simple && start < last; start++)  {
+       for( ; simple && start < last; start++)  
+       if ( old[start] ) {
                p= old[start];
                simple = getModuleId(p) == calcRef || getModuleId(p) == 
mtimeRef || getModuleId(p) == strRef || getModuleId(p)== mmathRef;
        }
@@ -55,8 +56,8 @@ simpleFlow(InstrPtr *old, int start, int
                                        simple= TRUE;
                        if( !simple)
                                return 0;
-                       p = q;
                }
+               p = q;
        }
        return 1;
 }
@@ -96,27 +97,12 @@ dflowAssignTest(Lifespan span, InstrPtr 
        return 0;
 }
 
-static int
-dflowUpdateTest(Lifespan span, InstrPtr p, int i)
-{
-       /* Updates are permitted if it is a unique update on 
-        * a BAT created in the context of this block
-        * As far as we know, no SQL nor MAL test re-uses the
-        * target BAT to insert again and subsequently calls dataflow.
-        * In MAL scripts, they still can occur.
-       */
-       (void) span;
-       (void) i;
-       if ( getModuleId(p) == batRef  &&
-          (getFunctionId(p) == insertRef ||
-               getFunctionId(p) == inplaceRef ||
-               getFunctionId(p) == appendRef ||
-               getFunctionId(p) == updateRef ||
-               getFunctionId(p) == replaceRef ||
-               getFunctionId(p) == deleteRef ) )
-                       return FALSE;/* always */
-       return FALSE;
-}
+/* Updates are permitted if it is a unique update on 
+ * a BAT created in the context of this block
+ * As far as we know, no SQL nor MAL test re-uses the
+ * target BAT to insert again and subsequently calls dataflow.
+ * In MAL scripts, they still can occur.
+*/
 
 /* a limited set of MAL instructions may appear in the dataflow block*/
 static int
@@ -134,42 +120,62 @@ dflowInstruction(InstrPtr p) {
 
 static int
 dflowGarbagesink(MalBlkPtr mb, InstrPtr *old, int start, int last, int var, 
InstrPtr *sink, int top){
-       InstrPtr p, q, r;
-       int j,k;
+       InstrPtr r;
        
-       q= newInstruction(NULL,ASSIGNsymbol); 
-       getModuleId(q) = languageRef;
-       getFunctionId(q) = sinkRef;
-       getArg(q,0)= newTmpVariable(mb,TYPE_void);
-       q= pushArgument(mb, q, var);
-       for ( j= start; j< last; j++){
-               assert(top <mb->vsize);
-               p = old[j];
-               if ( p )
-               for (k = p->retc; k< p->argc; k++)
-                       if ( getArg(p,k)== var) {
-                               r = newInstruction(NULL,ASSIGNsymbol);
-                               getModuleId(r) = languageRef;
-                               getFunctionId(r) = passRef;
-                               getArg(r,0) = 
newTmpVariable(mb,getArgType(mb,p,0));
-                               r= pushArgument(mb,r, getArg(p,0));
-                               sink[top++] = r;
-                               q= pushArgument(mb,q, getArg(r,0));
-                               break;
-               }
-       }
-       assert(top <mb->vsize);
-       sink[top++] = q;
+       (void) start;
+       (void) last;
+       (void) old;
+       r = newInstruction(NULL,ASSIGNsymbol);
+       getModuleId(r) = languageRef;
+       getFunctionId(r) = passRef;
+       getArg(r,0) = newTmpVariable(mb,getVarType(mb,var));
+       r= pushArgument(mb,r, var);
+       sink[top++] = r;
        return top;
 }
 
+static void
+DFLOWinitvars(MalBlkPtr mb, InstrPtr p,int start, int last, Lifespan span, 
char *init)
+{
+       int k;
+       for( k=0; k<p->retc; k++)
+       if( getBeginLifespan(span,getArg(p,k)) >= start && 
getEndLifespan(span,getArg(p,k)) >= last && init[getArg(p,k)]==0){
+               InstrPtr r= newAssignment(mb);
+               getArg(r,0)= getArg(p,k);
+               pushNil(mb,r,getArgType(mb,p,k));
+               init[getArg(p,k)]=1;
+       }
+}
+
+#define copyblock() \
+       for( j=start ; j<i; j++) if (old[j]) pushInstruction(mb,old[j]);\
+       for( j=0; j<top; j++) pushInstruction(mb,sink[j]);
+
+#define exitblock() \
+       if (!sf && entries>1){ \
+               q= newAssignment(mb); \
+               q->barrier= EXITsymbol; \
+               getArg(q,0) = flowblock; \
+       }\
+       memset((char*) usage, 0, vlimit);\
+       entries = flowblock = 0;\
+       actions++;
+
+/* dataflow blocks are transparent, because they are always
+   executed, either sequentially or in parallell */
+#define startblock()\
+       q= newFcnCall(mb,languageRef,dataflowRef);\
+       q->barrier= BARRIERsymbol;\
+       getArg(q,0)= flowblock;\
+       varSetProperty(mb, getArg(q,0), "transparent",0,0);
+
 int
 OPTdataflowImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
p)
 {
        int i,j,k, var, cnt, start=1,entries=0, actions=0;
        int flowblock= 0, dumbcopy=0;
        InstrPtr *sink, *old, q;
-       int limit, slimit, top = 0;
+       int limit, slimit, vlimit, top = 0;
        Lifespan span;
        char *init;
        int *usage;
@@ -191,7 +197,7 @@ OPTdataflowImplementation(Client cntxt, 
                GDKfree(span);
                return 0;
        }
-       usage= (int*) GDKzalloc(mb->vtop * sizeof(int));
+       usage= (int*) GDKzalloc(vlimit = mb->vtop * sizeof(int));
        if ( usage == NULL){
                GDKfree(span);
                GDKfree(init);
@@ -228,7 +234,7 @@ OPTdataflowImplementation(Client cntxt, 
 
                if (p->token == ENDsymbol)
                        break;
-               if (!dflowInstruction(p) || (!dumbcopy && blockExit(p)) || 
dflowAssignTest(span,p,i) || dflowUpdateTest(span,p,i)){
+               if (!dflowInstruction(p) || (!dumbcopy && blockExit(p)) || 
dflowAssignTest(span,p,i) ){
                        /* close old flow block */
                        if (flowblock){
                                int sf = simpleFlow(old,start,i);
@@ -236,43 +242,20 @@ OPTdataflowImplementation(Client cntxt, 
                                if (!sf && entries > 1){
                                        for( j=start ; j<i; j++)
                                        if (old[j]) {
-                                               for( k=0; k<old[j]->retc; k++)
-                                               if( 
getBeginLifespan(span,getArg(old[j],k)) >= start && 
getEndLifespan(span,getArg(old[j],k)) >= i && init[getArg(old[j],k)]==0){
-                                                       InstrPtr r= 
newAssignment(mb);
-                                                       getArg(r,0)= 
getArg(old[j],k);
-                                                       
pushNil(mb,r,getArgType(mb,old[j],k));
-                                                       
init[getArg(old[j],k)]=1;
-                                               }
+                                               DFLOWinitvars(mb, old[j], 
start, i, span, init);
                                                /* collect variables garbage 
collected within the block */
                                                for( k=old[j]->retc; 
k<old[j]->argc; k++)
-                                                       if( 
getEndLifespan(span, var = getArg(old[j],k)) == j && usage[var]==1 && 
!isVarConstant(mb, var) )
+                                                       if( 
getEndLifespan(span, var = getArg(old[j],k)) == j && usage[var]>=1 && 
isaBatType(getVarType(mb,var)))
                                                                top = 
dflowGarbagesink(mb,old, start, i, getArg(old[j],k), sink,top);
                                                        else
-                                                       if( 
getEndLifespan(span,getArg(old[j],k)) < i && !isVarConstant(mb, var) )
+                                                       if( 
getEndLifespan(span,getArg(old[j],k)) < i && isaBatType(getVarType(mb,var)))
                                                                
usage[getArg(old[j],k)]++;
                                                assert(top <mb->vsize);
                                        }
-                                       q= 
newFcnCall(mb,languageRef,dataflowRef);
-                                       q->barrier= BARRIERsymbol;
-                                       getArg(q,0)= flowblock;
-                                       /* dataflow blocks are transparent, 
because they are always
-                                          executed, either sequentially or in 
parallell */
-                                       varSetProperty(mb, getArg(q,0), 
"transparent",0,0);
+                                       startblock();
                                }
-                               for( j=start ; j<i; j++)
-                                       if (old[j])
-                                               pushInstruction(mb,old[j]);
-                               for( j=0; j<top; j++)
-                                               pushInstruction(mb,sink[j]);
-                               if (!sf && entries>1){
-                                       q= newAssignment(mb);
-                                       q->barrier= EXITsymbol;
-                                       getArg(q,0) = flowblock;
-                               }
-                               /* inject the optional garbage sink statement */
-                               entries = 0;
-                               flowblock = 0;
-                               actions++;
+                               copyblock();
+                               exitblock();
                        }
                        pushInstruction(mb,p);
                        continue;
@@ -288,43 +271,20 @@ OPTdataflowImplementation(Client cntxt, 
                                        if (!sf && entries > 1){
                                                for( j=start ; j<i; j++)
                                                if (old[j]) {
-                                                       for( k=0; 
k<old[j]->retc; k++)
-                                                       if( 
getBeginLifespan(span,getArg(old[j],k)) >= start && 
getEndLifespan(span,getArg(old[j],k)) >= i && init[getArg(old[j],k)]==0){
-                                                               InstrPtr r= 
newAssignment(mb);
-                                                               getArg(r,0)= 
getArg(old[j],k);
-                                                               
pushNil(mb,r,getArgType(mb,old[j],k));
-                                                               
init[getArg(old[j],k)]=1;
-                                                       }
+                                                       DFLOWinitvars(mb, 
old[j], start, i, span, init);
                                                        /* collect variables 
garbagecollected in the block */
                                                        for( k=old[j]->retc; 
k<old[j]->argc; k++)
-                                                       if( 
getEndLifespan(span, var = getArg(old[j],k)) == j && usage[var]==1 && 
!isVarConstant(mb, var) )
-                                                               top = 
dflowGarbagesink(mb,old, start, i, getArg(old[j],k), sink,top);
-                                                       else
-                                                       if( 
getEndLifespan(span,getArg(old[j],k)) < i && !isVarConstant(mb, var) )
-                                                               
usage[getArg(old[j],k)]++;
+                                                               if( 
getEndLifespan(span, var = getArg(old[j],k)) == j && usage[var]>=1 && 
isaBatType(getVarType(mb,var)))
+                                                                       top = 
dflowGarbagesink(mb,old, start, i, getArg(old[j],k), sink,top);
+                                                               else
+                                                               if( 
getEndLifespan(span,getArg(old[j],k)) < i && isaBatType(getVarType(mb,var)))
+                                                                       
usage[getArg(old[j],k)]++;
                                                }
-                                               q= 
newFcnCall(mb,languageRef,dataflowRef);
-                                               q->barrier= BARRIERsymbol;
-                                               getArg(q,0)= flowblock;
-                                               /* dataflow blocks are 
transparent, because they are always
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to