Changeset: 876696d85bde for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=876696d85bde
Removed Files:
monetdb5/modules/atoms/mcurl.h
Modified Files:
clients/Tests/exports.stable.out
monetdb5/mal/mal_recycle.c
monetdb5/mal/mal_recycle.h
monetdb5/modules/atoms/Makefile.ag
monetdb5/modules/atoms/mcurl.c
Branch: default
Log Message:
merge with local clone
diffs (truncated from 386 to 300 lines):
diff --git a/monetdb5/mal/mal_recycle.c b/monetdb5/mal/mal_recycle.c
--- a/monetdb5/mal/mal_recycle.c
+++ b/monetdb5/mal/mal_recycle.c
@@ -91,19 +91,37 @@ static lng recycleSearchTime =0; /* cach
* The profiler record is re-used to store recycler information.
* The clk is used by the LRU scheme, counter is the number of
* times this pattern was used, ticks is the clock ticks
- * used to produce the result. rbytes+wbytes depict the storage
- * size of operands and result arguments.
+ * used to produce the result. wbytes is the best approximation
+ * of the amount of storage needed for the intermediate.
+ * The read cost depends on too many factors.
*
- * The cost function is a weighted balance between cpu and
- * storage cost. Often there is a direct relationship,
+ * For each intermediate we have to assume that at some point
+ * it is written to disk. This is the most expensive cost involved.
+ * For it means we also have to read it back in again.
+ * This leads to a cost estimate C =2 * writecost(Bytes)
+ * If this number is smaller the the inital cpu cost it is
+ * worth considering to keep the result. Otherwise we simply
+ * recalculate it.
+ * Once stored, an intermediate becomes more valuable as it is
+ * reused more often.
+ *
+ * If we don't write the intermediate, then its actual read time
+ * is the normalised cost.
*/
+#define MB (1024*1024)
+#define USECperMB (75/1000000.0) /* 75MB per second, should be determined once
*/
+#define IOcost(B) (B/(MB) * USECperMB)
+#define recycleProfit2(X) (recycleBlk->profiler[X].wbytes?
IOcost(recycleBlk->profiler[X].wbytes): recycleBlk->profiler[X].ticks)
+
#define recycleCost(X) (recycleBlk->profiler[X].wbytes)
-#define recycleW(X) ((recycleBlk->profiler[X].trace &&
(recycleBlk->profiler[X].calls >1 )) ? \
- (recycleBlk->profiler[X].calls
-1) : 0.1 )
+#define recycleW(X) ((recycleBlk->profiler[X].calls >1 ) ?
recycleBlk->profiler[X].calls : 0.1 )
#define recycleLife(X) ((GDKusec() - recycleBlk->profiler[X].rbytes)/ 1000.0)
-#define recycleProfit(X) (recycleCost(X) * recycleW(X) / recycleLife(X))
+#define recycleProfit(X) recycleCost(X) * recycleW(X)
+/*
+ * The new cost function is focussed on minimizing the IO overhead
+ */
static str bindRef = 0, bind_idxRef = 0, sqlRef = 0;
static str subselectRef = 0, thetasubselectRef = 0, likesubselectRef = 0;
@@ -183,6 +201,7 @@ RECYCLEgarbagecollect(MalBlkPtr mb, Inst
}
/* leaves : array of indices of leaf instructions */
+/* choose victum based on the predicted benefit from keeping it around */
static
int chooseVictims(Client cntxt, int *leaves, int ltop, lng wr)
@@ -196,12 +215,18 @@ int chooseVictims(Client cntxt, int *lea
wben = (dbl *)GDKzalloc(sizeof(dbl)*ltop);
for (l = 0; l < ltop; l++){
sz = recycleBlk->profiler[leaves[l]].wbytes;
- ben = recycleProfit(leaves[l]);
+ ben = recycleProfit2(leaves[l]);
wben[l] = sz? ben / sz : -1;
+#ifdef _DEBUG_CACHE_
+ mnstr_printf(cntxt->fdout,"#leaf[%d], wr "LLFMT" size "LLFMT"
benefit %6.2f (%6.2f) wben %6.2f\n" ,l, wr, sz, (float)ben, recycleProfit2(l),
(float)wben[l]);
+#endif
totmem += sz;
}
if (totmem <= wr) { /* all leaves need to be dropped */
GDKfree(wben);
+#ifdef _DEBUG_CACHE_
+ mnstr_printf(cntxt->fdout,"#drop all leaves\n");
+#endif
return ltop;
}
@@ -230,14 +255,14 @@ int chooseVictims(Client cntxt, int *lea
tmp = wben[mpos];
wben[mpos] = wben[newtop-1];
wben[newtop-1] = tmp;
- tot_ben += recycleProfit(tmpl);
+ tot_ben += recycleProfit2(tmpl);
}
else break;
}
/* compare benefits of knap-sack content and the critical item */
ci_ben = 0; /* find the critical item */
for (l = 0; l < ltop; l++) {
- ben = recycleProfit(leaves[l]);
+ ben = recycleProfit2(leaves[l]);
if (recycleBlk->profiler[leaves[l]].wbytes <= targmem &&
ben > ci_ben){
ci = l;
@@ -260,7 +285,7 @@ int chooseVictims(Client cntxt, int *lea
}
-static void RECYCLEcleanCache(Client cntxt, lng clk){
+static void RECYCLEcleanCache(Client cntxt){
int j,i,l,ltop,v,vtop;
InstrPtr p;
InstrPtr *old, *newstmt;
@@ -268,11 +293,15 @@ static void RECYCLEcleanCache(Client cnt
int k, *leaves, *vm;
int limit, idx;
size_t mem;
- lng oldclk,wr;
+ lng wr = recyclerMemoryUsed - MEMORY_THRESHOLD * monet_memory;
dbl minben, ben;
bte *used;
assert(recycleBlk);
+#ifdef _DEBUG_RESET_
+ mnstr_printf(cntxt->fdout,"#CACHE BEFORE CLEANUP\n");
+ RECYCLEdumpInternal(cntxt->fdout);
+#endif
newpass:
if (recycleBlk->stop == 0)
return;
@@ -288,7 +317,7 @@ newpass:
if (used[getArg(p,j)]<2) used[getArg(p,j)]++;
}
- /* find the leaves */
+ /* find the leafs */
lmask = (bit*)GDKzalloc(recycleBlk->stop);
ltop = 0;
for (i = 0; i < recycleBlk->stop; i++){
@@ -311,39 +340,18 @@ newpass:
}
GDKfree(lmask);
- /* find the oldest leave */
- oldclk = recycleBlk->profiler[leaves[0]].clk;
- idx = 0;
- for (l = 1; l < ltop; l++){
- k = leaves[l];
- if( recycleBlk->profiler[k].clk < oldclk){
- oldclk = recycleBlk->profiler[k].clk;
- idx = l;
- }
- }
-
- /* protect leaves from current the query invocation */
- if ( oldclk < clk) {
- l = 0;
- for (j = 0; j < ltop; j++)
- if (recycleBlk->profiler[leaves[j]].clk < clk)
- leaves[l++] = leaves[j];
- ltop = l;
- }
-
-
#ifdef _DEBUG_CACHE_
- wr = recyclerMemoryUsed - (lng) (MEMORY_THRESHOLD * monet_memory);
- mnstr_printf(cntxt->fdout,"#RECYCLEcleanCache: usedmem="LLFMT" target
memory freed "LLFMT"\n", recyclerMemoryUsed, wr);
- mnstr_printf(cntxt->fdout,"#Candidates for
eviction\n#LRU\t\tTicks\tLife\tSZ\tCnt\tWgt\tBen\tProf)\n");
- for (l = 0; l < ltop; l++)
- mnstr_printf(cntxt->fdout,"#%3d "LLFMT"\t"LLFMT"\t"LLFMT"\t
%5.2f\t "LLFMT"\t%3d\t%5.1f\n",
- leaves[l],clk,recycleBlk->profiler[leaves[l]].clk,
- recycleBlk->profiler[leaves[l]].ticks,
- recycleLife(leaves[l]),
- recycleBlk->profiler[leaves[l]].wbytes,
- recycleBlk->profiler[leaves[l]].calls,
- recycleW(leaves[l]));
+ mnstr_printf(cntxt->fdout,"#RECYCLEcleanCache: usedmem="LLFMT"\n",
recyclerMemoryUsed);
+ mnstr_printf(cntxt->fdout,"#Candidates for
eviction\n#LRU\tclk\t\tticks\t\twbytes\tCalls\tProf\tProf2\n");
+ for (l = 0; l < ltop; l++)
+ mnstr_printf(cntxt->fdout,"#%3d\t"LLFMT"\t"LLFMT"\t\t
"LLFMT"\t%3d\t%5.1f\t%5.1f\n",
+ leaves[l],
+ recycleBlk->profiler[leaves[l]].clk,
+ recycleBlk->profiler[leaves[l]].ticks,
+ recycleBlk->profiler[leaves[l]].wbytes,
+ recycleBlk->profiler[leaves[l]].calls,
+ recycleProfit(leaves[l]),
+ recycleProfit2(leaves[l]));
#endif
/* find entries to evict */
@@ -352,10 +360,10 @@ newpass:
vtop = 0;
if (!mem){ /* evict 1 entry */
- minben = recycleProfit(leaves[0]);
+ minben = recycleProfit2(leaves[0]);
idx = 0;
for (l = 1; l < ltop; l++){
- ben = recycleProfit(leaves[l]);
+ ben = recycleProfit2(leaves[l]);
if( ben < minben) {
minben = ben;
idx = l;
@@ -363,8 +371,8 @@ newpass:
}
vm[vtop++] = leaves[idx];
} else { /* evict several to get enough memory */
- wr = recyclerMemoryUsed - (lng) (MEMORY_THRESHOLD *
monet_memory);
- k = 0; /* exclude binds that don't free memory */
+ wr = recyclerMemoryUsed - MEMORY_THRESHOLD * monet_memory;
+ k = 0;
for (l = 0; l < ltop; l++) {
// also discard leaves that are more expensive to find
then compute
if( recycleBlk->profiler[leaves[l]].ticks <
recycleSearchTime)
@@ -437,7 +445,7 @@ newpass:
#ifdef _DEBUG_CACHE_
static void
-RECYCLEsync(InstrPtr p)
+RECYCLEsync(Client cntxt, InstrPtr p)
{
int i, j, k;
InstrPtr q;
@@ -454,6 +462,11 @@ RECYCLEsync(InstrPtr p)
&getVarConstant(recycleBlk,getArg(q,j))))
break;
if (j == p->argc) {
+#ifdef _DEBUG_RECYCLE_
+ mnstr_printf(cntxt->fdout,"#RECYCLE sync [%3d] ",i);
+#else
+ (void) cntxt;
+#endif
for(k=0; k< p->retc; k++){
pa = &getVarConstant(recycleBlk,getArg(p,k));
qa = &getVarConstant(recycleBlk,getArg(q,k));
@@ -496,9 +509,22 @@ RECYCLEkeep(Client cntxt, MalBlkPtr mb,
j= getArg(p,i);
v = &s->stk[j];
VALcopy(&cst,v);
- c = fndConstant(recycleBlk, &cst, recycleBlk->vtop);
- if (c<0)
- c = defConstant(recycleBlk, v->vtype, &cst);
+
+ if ( i >= p->retc){
+ c = fndConstant(recycleBlk, &cst, recycleBlk->vtop);
+ if (c<0)
+ c = defConstant(recycleBlk, v->vtype, &cst);
+ } else {
+ c = newTmpVariable(recycleBlk, v->vtype);
+ setVarConstant(recycleBlk, c);
+ setVarFixed(recycleBlk, c);
+ if (v->vtype >= 0 && ATOMextern(v->vtype))
+ setVarCleanup(recycleBlk, c);
+ else
+ clrVarCleanup(recycleBlk, c);
+ v = &getVarConstant(recycleBlk, c);
+ *v = cst;
+ }
if (v->vtype == TYPE_bat)
BBPincref( *(const int*)VALptr(v), TRUE);
setVarUsed(recycleBlk,c);
@@ -506,7 +532,7 @@ RECYCLEkeep(Client cntxt, MalBlkPtr mb,
}
#ifdef _DEBUG_RECYCLE_
mnstr_printf(cntxt->fdout,"#RECYCLE [%3d] ",recycleBlk->stop);
- printInstruction( cntxt->fdout,recycleBlk, 0, q,LIST_MAL_DEBUG);
+ printInstruction( cntxt->fdout,recycleBlk, 0, q, LIST_MAL_DEBUG);
#else
(void) cntxt;
#endif
@@ -521,7 +547,7 @@ RECYCLEkeep(Client cntxt, MalBlkPtr mb,
recyclerMemoryUsed += wr;
#ifdef _DEBUG_CACHE_
- RECYCLEsync(q);
+ if(0)RECYCLEsync(cntxt,q);
#endif
}
@@ -901,7 +927,7 @@ RECYCLEentry(Client cntxt, MalBlkPtr mb,
if ( i>=0 ){
MT_lock_set(&recycleLock, "recycle");
mnstr_printf(cntxt->fdout,"#REUSED [%3d] ",i);
- printInstruction(cntxt->fdout,mb,0,p, LIST_MAL_STMT);
+
printInstruction(cntxt->fdout,recycleBlk,0,getInstrPtr(recycleBlk,i),
LIST_MAL_DEBUG);
MT_lock_unset(&recycleLock, "recycle");
}
#endif
@@ -922,7 +948,7 @@ RECYCLEexitImpl(Client cntxt, MalBlkPtr
MT_lock_set(&recycleLock, "recycle");
if ( (GDKmem_cursize() > MEMORY_THRESHOLD * monet_memory &&
recyclerMemoryUsed > MEMORY_THRESHOLD * monet_memory) || recycleBlk->stop ==
recycleCacheLimit)
- RECYCLEcleanCache(cntxt, mb->starttime);
+ RECYCLEcleanCache(cntxt);
if ( RECYCLEinterest(p)){
/* infinite case, admit all new instructions */
@@ -953,12 +979,14 @@ RECYCLEdrop(Client cntxt){
if( recycleBlk == NULL)
return ;
+ MT_lock_set(&recycleLock, "recycle");
used = (bte*)GDKzalloc(recycleBlk->vtop);
- MT_lock_set(&recycleLock, "recycle");
#ifdef _DEBUG_RECYCLE_
- mnstr_printf(cntxt->fdout,"#RECYCLE drop\n");
- printFunction(cntxt->fdout, recycleBlk,0,0);
- printStack(cntxt->fdout,mb,0);
+ if( cntxt) {
+ mnstr_printf(cntxt->fdout,"#RECYCLE drop\n");
+ printFunction(cntxt->fdout, recycleBlk,0,0);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list