Update of /cvsroot/monetdb/pathfinder/compiler/algebra
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv15416/compiler/algebra
Modified Files:
Tag: Aug2009_NFI
algebra.c algebra_cse.c builtins.c intro_borders.c logical.c
physical.c planner.c
Log Message:
optimizations for NFI XIRAF use case -- thanks a great bunch Jan R.!!
- ds_link (already in Stable) optimized for 1-node case
- indices now contain all data (but still not used automatically, nor based on
Lefteris' new indexing schemes)
most prominently though is: subexpression result caching
- caching hints in pragmas
- query enclosed in (# pf:session id:msec ) { query } or (# pf:session-use
id:msec ) { query }
+ queries in the same session use the same working set (documents opened only
once)
+ same working set allows to cache results
+ pf:session-use only uses cache, cannot add to it
- but, multiple pf:session-use can run concurrently; whereas pf:session is
exclusive
- inside a query, an arbitrary number of expressions can be marked up for
caching/reuse
+ (# pf:cache id ) { subexpr }
+ subexpr may not be enclosed by a for-loop
U algebra.c
Index: algebra.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algebra.c,v
retrieving revision 1.108
retrieving revision 1.108.6.1
diff -u -d -r1.108 -r1.108.6.1
--- algebra.c 12 Jun 2009 13:06:04 -0000 1.108
+++ algebra.c 28 Sep 2009 23:20:25 -0000 1.108.6.1
@@ -1164,6 +1164,7 @@
case alg_fun_call_xrpc: return "XRPC";
case alg_fun_call_xrpc_helpers: return "XRPC helper";
case alg_fun_call_tijah: return "Tijah";
+ case alg_fun_call_cache: return "Query Cache";
}
return NULL;
}
U physical.c
Index: physical.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/physical.c,v
retrieving revision 1.94
retrieving revision 1.94.6.1
diff -u -d -r1.94 -r1.94.6.1
--- physical.c 12 Jun 2009 13:06:10 -0000 1.94
+++ physical.c 28 Sep 2009 23:20:25 -0000 1.94.6.1
@@ -3360,6 +3360,50 @@
}
/**
+ * Constructor for a query cache
+ */
+PFpa_op_t *
+PFpa_cache (const PFpa_op_t *side_effects, const PFpa_op_t *n,
+ char *id, PFalg_col_t item)
+{
+ PFpa_op_t *ret = wire2 (pa_cache, side_effects, n);
+
+ assert (n);
+
+ /* allocate memory for the result schema */
+ ret->schema.count = n->schema.count;
+ ret->schema.items
+ = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
+
+ /* copy schema from n */
+ for (unsigned int i = 0; i < n->schema.count; i++) {
+ ret->schema.items[i] = n->schema.items[i];
+ }
+
+ ret->sem.cache.id = id;
+ ret->sem.cache.item = item;
+
+ /* costs */
+ ret->cost = DEFAULT_COST + side_effects->cost + n->cost;
+
+ return ret;
+}
+
+/**
+ * Border for cache operator describing which operators
+ * lie in-/outside.
+ */
+PFpa_op_t *
+PFpa_cache_border (const PFpa_op_t *n)
+{
+ /* the operator behaves like a recursion border */
+ PFpa_op_t *ret = PFpa_rec_border (n);
+ ret->kind = pa_cache_border;
+
+ return ret;
+}
+
+/**
* Constructor for a debug operator
*/
PFpa_op_t *
@@ -3843,6 +3887,16 @@
PFord_set_add (ret->orderings, sortby (schema.items[0].name,
schema.items[1].name));
+ /* Caching information is returned in iter|pos order */
+ if (kind == alg_fun_call_cache) {
+ /* if the loop relation is constant everything is sorted by pos */
+ if (PFprop_const (loop->prop, iter))
+ PFord_set_add (ret->orderings, sortby (schema.items[1].name));
+ /* the result is ordered by iter|pos */
+ PFord_set_add (ret->orderings, sortby (schema.items[0].name,
+ schema.items[1].name));
+ }
+
/* costs */
ret->cost = loop->cost + param_list->cost + DEFAULT_COST;
U planner.c
Index: planner.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/planner.c,v
retrieving revision 1.86.2.1
retrieving revision 1.86.2.1.2.1
diff -u -d -r1.86.2.1 -r1.86.2.1.2.1
--- planner.c 28 Sep 2009 13:42:26 -0000 1.86.2.1
+++ planner.c 28 Sep 2009 23:20:25 -0000 1.86.2.1.2.1
@@ -100,6 +100,7 @@
#include <assert.h>
#include <stdio.h>
+#include <string.h>
#include "planner.h"
@@ -1064,6 +1065,15 @@
order_col = op->sem.type.col;
op = L(op);
+ if (op->kind == la_project) {
+ for (unsigned int i = 0; i < op->sem.proj.count; i++) {
+ /* check for a non-problematic projection */
+ if (op->sem.proj.items[i].new != op->sem.proj.items[i].new)
+ return ret;
+ }
+ op = L(op);
+ }
+
/* and finally check for the row numbering operator
that provides the position information */
if (op->kind != la_rownum ||
@@ -2579,6 +2589,57 @@
}
/**
+ * Check if the position and item column of a given cached query
+ * are identical. In this case we are sure that item order is
+ * maintained as the cached result is always sorted by pos.
+ */
+static bool
+cache_item_ordered (const PFla_op_t *n, char *id)
+{
+ bool item_ordered = false;
+
+ if (n->kind == la_cache &&
+ !strcmp (id, n->sem.cache.id))
+ return n->sem.cache.pos == n->sem.cache.item;
+
+ for (unsigned int i = 0; i < PFLA_OP_MAXCHILD; i++)
+ if (n->child[i])
+ item_ordered |= cache_item_ordered (n->child[i], id);
+
+ return item_ordered;
+}
+
+/**
+ * 'cache' operator in the logical algebra just get a 1:1 mapping
+ * into the physical cache operator.
+ */
+static PFplanlist_t *
+plan_cache (const PFla_op_t *n)
+{
+ PFplanlist_t *ret = new_planlist ();
+ PFplanlist_t *sorted_n2 = new_planlist ();
+ PFalg_col_t pos = n->sem.cache.pos;
+
+ for (unsigned int i = 0; i < PFarray_last (R(n)->plans); i++)
+ add_plans (sorted_n2,
+ ensure_ordering (
+ *(plan_t **) PFarray_at (R(n)->plans, i),
+ sortby (pos)));
+
+ /* for each plan, generate a cache operator */
+ for (unsigned int i = 0; i < PFarray_last (L(n)->plans); i++)
+ for (unsigned int j = 0; j < PFarray_last (sorted_n2); j++)
+ add_plan (ret,
+ cache (
+ *(plan_t **) PFarray_at (L(n)->plans, i),
+ *(plan_t **) PFarray_at (sorted_n2, j),
+ n->sem.cache.id,
+ n->sem.cache.item));
+
+ return ret;
+}
+
+/**
* Constructor for a debug operator
*/
static PFplanlist_t *
@@ -2673,7 +2734,7 @@
* into the physical fun_call operator.
*/
static PFplanlist_t *
-plan_fun_call (const PFla_op_t *n)
+plan_fun_call (const PFla_op_t *n, const PFla_op_t *root)
{
unsigned int i;
PFplanlist_t *ret = new_planlist ();
@@ -2701,7 +2762,8 @@
return ret;
else {
PFplanlist_t *single_plan = new_planlist ();
- plan_t *cheapest_plan = NULL;
+ plan_t *cheapest_plan = NULL,
+ *sorted_plan;
/* find the cheapest plan */
for (unsigned int i = 0; i < PFarray_last (ret); i++)
@@ -2711,6 +2773,35 @@
cheapest_plan = *(plan_t **) PFarray_at (ret, i);
add_plan (single_plan, cheapest_plan);
+
+ if (n->sem.fun_call.kind == alg_fun_call_cache) {
+ /* lookup the cache id */
+ char *id;
+ assert (R(n)->kind == la_fun_param &&
+ RR(n)->kind == la_nil &&
+ R(n)->schema.count == 3 &&
+ PFprop_const (R(n)->prop, R(n)->schema.items[2].name));
+ id = (PFprop_const_val (R(n)->prop,
+ R(n)->schema.items[2].name)).val.str;
+
+ /* Add more ordering information if the input/output is guaranteed
+ to be sorted on the item column. */
+ if (cache_item_ordered (root, id)) {
+ PFord_set_add (cheapest_plan->orderings,
+ sortby (n->schema.items[2].name));
+ }
+ /* Add heuristic that says that cached node sequences are
+ often sorted by their preorder. This is exploited
+ by explicitly assigning the sort operation no cost. */
+ else {
+ sorted_plan = std_sort (cheapest_plan,
+ sortby(n->schema.items[2].name));
+ /* make the sort cost as inexpensive as possible */
+ sorted_plan->cost = cheapest_plan->cost + 1;
+ add_plan (single_plan, sorted_plan);
+ }
+ }
+
return single_plan;
}
}
@@ -3443,9 +3534,10 @@
* skip planning in that case.
*
* @param n subexpression to translate
+ * @param root the root operator
*/
static void
-plan_subexpression (PFla_op_t *n)
+plan_subexpression (PFla_op_t *n, PFla_op_t *root)
{
PFplanlist_t *plans = NULL;
@@ -3466,7 +3558,7 @@
information is translated after the value part) */
for (unsigned int i = PFLA_OP_MAXCHILD; i > 0; i--)
if (n->child[i - 1])
- plan_subexpression (n->child[i - 1]);
+ plan_subexpression (n->child[i - 1], root);
}
/* Compute possible plans. */
@@ -3672,6 +3764,7 @@
add_plan (plans, nil ());
break;
+ case la_cache: plans = plan_cache (n); break;
case la_trace: plans = plan_trace (n); break;
case la_trace_items: plans = plan_trace_items (n); break;
case la_trace_msg: plans = plan_trace_msg (n); break;
@@ -3693,7 +3786,7 @@
assert (cur->kind == la_rec_param &&
L(cur)->kind == la_rec_arg);
rec_arg = L(cur);
- plan_subexpression (L(rec_arg));
+ plan_subexpression (L(rec_arg), root);
cur = R(cur);
}
@@ -3730,15 +3823,15 @@
assert (cur->kind == la_rec_param &&
L(cur)->kind == la_rec_arg);
rec_arg = L(cur);
- plan_subexpression (R(rec_arg));
+ plan_subexpression (R(rec_arg), root);
cur = R(cur);
}
/* create plans for the side effects */
- plan_subexpression (LL(n));
+ plan_subexpression (LL(n), root);
/* create plans for the result relation */
- plan_subexpression (R(n));
+ plan_subexpression (R(n), root);
/* put together all ingredients to form a physical
representation of the recursion (basically a 1:1
@@ -3776,9 +3869,9 @@
PFalg_fun_call_t old_fun_call_kind = fun_call_kind;
fun_call_kind = n->sem.fun_call.kind;
- plan_subexpression (L(n));
- plan_subexpression (R(n));
- plans = plan_fun_call (n);
+ plan_subexpression (L(n), root);
+ plan_subexpression (R(n), root);
+ plans = plan_fun_call (n, root);
fun_call_kind = old_fun_call_kind;
} break;
@@ -3826,6 +3919,7 @@
case la_frag_union:
case la_empty_frag:
case la_nil:
+ case la_cache:
case la_trace:
case la_trace_msg:
case la_trace_map:
@@ -3924,7 +4018,7 @@
assert (root->kind == la_serialize_seq);
/* compute all interesting plans for root */
- plan_subexpression (root);
+ plan_subexpression (root, root);
/* Now pick the cheapest one. */
assert (root->plans);
U algebra_cse.c
Index: algebra_cse.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algebra_cse.c,v
retrieving revision 1.85
retrieving revision 1.85.6.1
diff -u -d -r1.85 -r1.85.6.1
--- algebra_cse.c 12 Jun 2009 13:06:06 -0000 1.85
+++ algebra_cse.c 28 Sep 2009 23:20:25 -0000 1.85.6.1
@@ -541,6 +541,12 @@
return true;
break;
+ case la_cache:
+ return (a->sem.cache.id == b->sem.cache.id &&
+ a->sem.cache.pos == b->sem.cache.pos &&
+ a->sem.cache.item == b->sem.cache.item);
+ break;
+
case la_trace:
case la_trace_items:
case la_trace_msg:
U intro_borders.c
Index: intro_borders.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/intro_borders.c,v
retrieving revision 1.2
retrieving revision 1.2.6.1
diff -u -d -r1.2 -r1.2.6.1
--- intro_borders.c 26 May 2009 11:46:07 -0000 1.2
+++ intro_borders.c 28 Sep 2009 23:20:25 -0000 1.2.6.1
@@ -269,7 +269,8 @@
pfIN(n) = true;
if (n == dep_op) {
- assert (n->kind == pa_dep_cross);
+ assert (n->kind == pa_dep_cross ||
+ n->kind == pa_cache);
mark_plan (L(n), dep_op);
@@ -284,7 +285,7 @@
* Worker for introduce_dep_borders.
*/
static void
-introduce_dep_borders_worker (PFpa_op_t *n)
+introduce_borders_worker (PFpa_op_t *n, PFpa_op_kind_t kind)
{
/* We mark only the independent operators as SEEN as we stop
as soon as we reach a boundary to an dependent operator. */
@@ -298,17 +299,19 @@
n->child[i]->kind != pa_nil) {
/* Introduce a border if a boundary is reached
and stop the traversal. */
- n->child[i] = PFpa_dep_border (n->child[i]);
+ n->child[i] = (kind == pa_dep_cross)
+ ?PFpa_dep_border (n->child[i])
+ :PFpa_cache_border (n->child[i]);
n->child[i]->prop = L(n->child[i])->prop;
}
else
/* Recursively traverse the plan otherwise. */
- introduce_dep_borders_worker (n->child[i]);
+ introduce_borders_worker (n->child[i], kind);
/* Rewrite dependent cross products that are nested in
the right side of another dependent cross into normal
cross products again. */
- if (n->kind == pa_dep_cross)
+ if (kind == pa_dep_cross && n->kind == pa_dep_cross)
/* we can replace the kind as the two cross
product operators behave exactly the same */
n->kind = pa_cross;
@@ -319,7 +322,7 @@
* introduce dependencies.
*/
static void
-introduce_dep_borders (PFpa_op_t *n, PFpa_op_t *root)
+introduce_borders (PFpa_op_t *n, PFpa_op_t *root)
{
if (SEEN(n))
return;
@@ -327,9 +330,9 @@
SEEN(n) = true;
/* Detect borders for this operator. */
- if (n->kind == pa_dep_cross) {
+ if (n->kind == pa_dep_cross || n->kind == pa_cache) {
/* Traverse the query plan for the left argument */
- introduce_dep_borders (L(n), root);
+ introduce_borders (L(n), root);
/* Mark all operators that are reachable from the
root (except for the right side of n) to detect
@@ -339,18 +342,18 @@
/* Dependent cross products that have no operators
that can be evaluated in dependence are rewritten
into normal cross products again. */
- if (pfIN(R(n)))
+ if (n->kind == pa_dep_cross && pfIN(R(n)))
/* we can replace the kind as the two cross
product operators behave exactly the same */
n->kind = pa_cross;
else
/* Introduce the borders for this operator. */
- introduce_dep_borders_worker (R(n));
+ introduce_borders_worker (R(n), n->kind);
in_reset (root);
}
else
for (unsigned int i = 0; i < PFPA_OP_MAXCHILD && n->child[i]; i++)
- introduce_dep_borders (n->child[i], root);
+ introduce_borders (n->child[i], root);
}
/**
@@ -365,8 +368,9 @@
PFpa_dag_reset (n);
/* Introduce border operators
- for dependency generating operators */
- introduce_dep_borders (n, n);
+ for dependency generating
+ and caching operators */
+ introduce_borders (n, n);
PFpa_dag_reset (n);
return n;
U logical.c
Index: logical.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/logical.c,v
retrieving revision 1.126
retrieving revision 1.126.6.1
diff -u -d -r1.126 -r1.126.6.1
--- logical.c 12 Jun 2009 13:06:09 -0000 1.126
+++ logical.c 28 Sep 2009 23:20:25 -0000 1.126.6.1
@@ -265,8 +265,9 @@
* or below a `rec_fix' operator if the side effects appear in the recursion
* body.
* The `side_effects' operator contains a (possibly empty) list of operations
- * that may trigger side effects (operators `error' and `trace') in its left
- * child and the fragment or recursion parameters in the right child.
+ * that may trigger side effects (operators `error', `cache' and `trace')
+ * in its left child and the fragment or recursion parameters in the right
+ * child.
*/
PFla_op_t *
PFla_side_effects (const PFla_op_t *side_effects, const PFla_op_t *params)
@@ -275,6 +276,7 @@
assert (side_effects);
assert (side_effects->kind == la_error ||
+ side_effects->kind == la_cache ||
side_effects->kind == la_trace ||
side_effects->kind == la_nil);
assert (params);
@@ -3616,6 +3618,40 @@
/**
+ * Constructor for a caching operator.
+ *
+ * This operator puts a query to the query cache (with the key in @a id).
+ */
+PFla_op_t *
+PFla_cache (const PFla_op_t *n1, const PFla_op_t *n2, char *id,
+ PFalg_col_t pos, PFalg_col_t item)
+{
+ PFla_op_t *ret;
+ unsigned int i;
+
+ assert(n1);
+
+ ret = la_op_wire2 (la_cache, n1, n2);
+
+ /* allocate memory for the result schema; it's the same schema as n's */
+ ret->schema.count = n2->schema.count;
+ ret->schema.items
+ = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
+
+ /* copy schema from argument 'n' */
+ for (i = 0; i < n2->schema.count; i++) {
+ ret->schema.items[i] = n2->schema.items[i];
+ }
+
+ ret->sem.cache.id = id;
+ ret->sem.cache.pos = pos;
+ ret->sem.cache.item = item;
+
+ return ret;
+}
+
+
+/**
* Constructor for a debug operator
*/
PFla_op_t *
@@ -4452,6 +4488,13 @@
case la_nil:
return PFla_nil ();
+ case la_cache:
+ return PFla_cache (left,
+ right,
+ n->sem.cache.id,
+ n->sem.cache.pos,
+ n->sem.cache.item);
+
case la_trace:
return PFla_trace (left, right);
U builtins.c
Index: builtins.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/builtins.c,v
retrieving revision 1.126.2.1
retrieving revision 1.126.2.1.2.1
diff -u -d -r1.126.2.1 -r1.126.2.1.2.1
--- builtins.c 7 Sep 2009 21:30:58 -0000 1.126.2.1
+++ builtins.c 28 Sep 2009 23:20:25 -0000 1.126.2.1.2.1
@@ -5794,6 +5794,62 @@
}
/**
+ * Algebra implementation for pragma pf:cache.
+ */
+struct PFla_pair_t
+PFbui_pf_query_cache (const PFla_op_t *loop,
+ bool ordering,
+ PFla_op_t **side_effects,
+ struct PFla_pair_t *args)
+{
+ (void) ordering;
+
+ /* We do forbidden side effects here --- They however don't harm us. */
+ PFprop_infer_card ((PFla_op_t *) loop);
+
+ /* If we are inside an iteration discard caching. */
+ if (PFprop_card(loop->prop) != 1)
+ return args[1];
+
+ /* Otherwise introduce a side effect for the caching and replace
+ the query by a cache lookup. */
+ else {
+ PFalg_simple_type_t ty = PFprop_type_of (args[1].rel, col_item);
+ char *id;
+ /* extract the id information */
+ PFprop_infer_const (args[0].rel);
+ assert (PFprop_const (args[0].rel->prop, col_item));
+ id = (PFprop_const_val (args[0].rel->prop, col_item)).val.str;
+
+ /* Provide a (possibly) new cache as side effect. */
+ *side_effects = cache (
+ *side_effects,
+ args[1].rel,
+ id,
+ col_pos,
+ col_item);
+
+ /* Use a cache lookup as the replacement. */
+ PFla_op_t *res = fun_call (loop, /* loop relation */
+ fun_param (args[0].rel, /* query cache id */
+ nil(),
+ ipi_schema(aat_str)),
+ ipi_schema(ty), /* i|p|i schema */
+ alg_fun_call_cache, /* function kind */
+ PFqname (PFns_wild, NULL), /* qname */
+ NULL, /* ctx */
+ col_iter, /* iter */
+ alg_occ_unknown); /* occurrence indicator */
+
+ return (struct PFla_pair_t) {
+ .rel = res,
+ .frag = (ty & aat_node)
+ ? PFla_set (frag_extract (res, 2))
+ : PFla_empty_set () };
+ }
+}
+
+/**
* The fs:distinct-doc-order function sorts its input sequence of
* nodes by document order and removes duplicates.
*/
@@ -6460,8 +6516,7 @@
.rel = rank (res,
col_pos,
sortby (col_item)),
- /* FIXME: if uncomment the next line there is a seqmentation fault */
- .frag = PFla_empty_set() }; /* PFla_set (frag_extract (res, 2)) }; */
+ .frag = PFla_set (frag_extract (res, 1)) };
}
/**
@@ -6488,8 +6543,7 @@
.rel = rank (res,
col_pos,
sortby (col_item)),
- /* FIXME: if uncomment the next line, a seqmentation fault appears */
- .frag = PFla_empty_set() }; /* PFla_set (frag_extract (res, 2)) }; */
+ .frag = PFla_set (frag_extract (res, 1)) };
}
/**
------------------------------------------------------------------------------
Come build with us! The BlackBerry® Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay
ahead of the curve. Join us from November 9-12, 2009. Register now!
http://p.sf.net/sfu/devconf
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins