Update of /cvsroot/monetdb/pathfinder/compiler/sql
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv24443/sql
Modified Files:
lalg2sql.brg
Log Message:
-- Introduced serialization variants that take into account a distinct
operator and a rank operator sitting on top of the query plan.
This prevents us from loosing the context information before the last
step (which collects all subtree nodes for serialization) and also
avoids repeated sorting.
Index: lalg2sql.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/sql/lalg2sql.brg,v
retrieving revision 1.88
retrieving revision 1.89
diff -u -d -r1.88 -r1.89
--- lalg2sql.brg 20 Nov 2007 16:57:33 -0000 1.88
+++ lalg2sql.brg 26 Nov 2007 10:06:25 -0000 1.89
@@ -175,36 +175,41 @@
%%
Query: serialize_seq (Frag, Rel) = 1 (10);
+Query: serialize_seq (Frag, distinct (Rel)) = 2 (10);
+Query: serialize_seq (Frag, project (rank (Rel))) = 3 (10);
+Query: serialize_seq (Frag,
+ project (
+ rank (distinct (Rel)))) = 4 (10);
Query: serialize_seq (
frag_union (
empty_frag, fragment (twig (Twig))),
- roots_ (twig (Twig))) = 2 (10);
-Query: serialize_seq (Frag, empty_tbl) = 3 (10);
-Rel: lit_tbl = 4 (10);
-Rel: attach (Rel) = 5 (10);
+ roots_ (twig (Twig))) = 5 (10);
+Query: serialize_seq (Frag, empty_tbl) = 6 (10);
+Rel: lit_tbl = 9 (10);
+Rel: attach (Rel) = 10 (10);
-Rel: cross (Rel, Rel) = 6 (10);
-Rel: eqjoin (Rel, Rel) = 7 (10);
-Rel: semijoin (Rel, Rel) = 8 (10);
-Rel: thetajoin (Rel, Rel) = 9 (10);
-Rel: project (Rel) = 10 (10);
-Rel: select_ (Rel) = 11 (10);
-Rel: pos_select (Rel) = 12 (10);
+Rel: cross (Rel, Rel) = 11 (10);
+Rel: eqjoin (Rel, Rel) = 12 (10);
+Rel: semijoin (Rel, Rel) = 13 (10);
+Rel: thetajoin (Rel, Rel) = 14 (10);
+Rel: project (Rel) = 15 (10);
+Rel: select_ (Rel) = 16 (10);
+Rel: pos_select (Rel) = 17 (10);
-Rel: disjunion (Rel, Rel) = 15 (10);
-Rel: intersect (Rel, Rel) = 16 (10);
-Rel: difference (Rel, Rel) = 17 (10);
-Rel: distinct (Rel) = 18 (10);
+Rel: disjunion (Rel, Rel) = 20 (10);
+Rel: intersect (Rel, Rel) = 21 (10);
+Rel: difference (Rel, Rel) = 22 (10);
+Rel: distinct (Rel) = 23 (10);
-Rel: fun_1to1 (Rel) = 20 (10);
-Rel: num_eq (Rel) = 21 (10);
-Rel: num_gt (Rel) = 22 (10);
-Rel: bool_and (Rel) = 23 (10);
-Rel: bool_or (Rel) = 24 (10);
-Rel: bool_not (Rel) = 25 (10);
-Rel: type (Rel) = 26 (10);
-Rel: type_assert (Rel) = 27 (10);
-Rel: cast (Rel) = 28 (10);
+Rel: fun_1to1 (Rel) = 25 (10);
+Rel: num_eq (Rel) = 26 (10);
+Rel: num_gt (Rel) = 27 (10);
+Rel: bool_and (Rel) = 28 (10);
+Rel: bool_or (Rel) = 29 (10);
+Rel: bool_not (Rel) = 30 (10);
+Rel: type (Rel) = 31 (10);
+Rel: type_assert (Rel) = 32 (10);
+Rel: cast (Rel) = 33 (10);
Rel: avg (Rel) = 35 (10);
Rel: max_ (Rel) = 36 (10);
@@ -454,6 +459,7 @@
#define ITER_ column_name (special_col (sql_col_iter))
#define POS_ column_name (special_col (sql_col_pos))
#define MAX_ column_name (special_col (sql_col_max))
+#define DIST_ column_name (special_col (sql_col_dist))
#define PRE(n) ext_column_name (n, special_col (sql_col_pre))
#define SIZE(n) ext_column_name (n, special_col (sql_col_size))
@@ -1600,13 +1606,13 @@
switch (rule) {
/* Rel: cross (Rel, Rel) */
- case 6:
+ case 11:
/* Rel: eqjoin (Rel, Rel) */
- case 7:
+ case 12:
/* Rel: semijoin (Rel, Rel) */
- case 8:
+ case 13:
/* Rel: thetajoin (Rel, Rel) */
- case 9:
+ case 14:
/* Rel: string_join (Rel, Rel) */
case 43:
/* Rel: cond_err (Rel, Rel) */
@@ -1621,9 +1627,45 @@
}
switch (rule) {
+ /* Query: serialize_seq (Frag, project (rank (Rel))) */
+ case 3:
+ /* Query: serialize_seq (Frag,
+ project (
+ rank (distinct (Rel)))) */
+ case 4:
+ {
+ /* The main implementation happens inside the scope
+ of the next case block into which we fall through.
+ Here we prepare the translation by resolving
+ any renaming done by the projection operator. */
+ PFalg_att_t item = p->sem.ser_seq.item;
+ PFalg_att_t pos = p->sem.ser_seq.pos;
+
+ assert (R(p)->schema.count <= 2);
+ if (R(p)->sem.proj.items[0].new == item)
+ item = R(p)->sem.proj.items[0].old;
+ else if (R(p)->sem.proj.items[1].new == item)
+ item = R(p)->sem.proj.items[1].old;
+
+ if (R(p)->sem.proj.items[0].new == pos)
+ pos = R(p)->sem.proj.items[0].old;
+ else if (R(p)->sem.proj.items[1].new == pos)
+ pos = R(p)->sem.proj.items[1].old;
+
+ /* ignore the projection operator */
+ p->sem.ser_seq.item = item;
+ p->sem.ser_seq.pos = pos;
+ } /* fall through */
+
/* Query: serialize_seq (Frag, Rel) */
case 1:
+ /* Query: serialize_seq (Frag, distinct (Rel)) */
+ case 2:
{
+ /* lookup whether we need a SELECT or a SELECT DISTINCT */
+ bool distinct = rule == 2 || rule == 4;
+ /* use the second non-terminal as the query part */
+ PFla_op_t *query = kids[1];
PFalg_att_t item = p->sem.ser_seq.item;
PFalg_att_t pos = p->sem.ser_seq.pos;
PFalg_simple_type_t item_ty = type_of (p, p->sem.ser_seq.item);
@@ -1633,14 +1675,52 @@
*orderby = NULL,
*ser_info,
*finalquery = NULL;
+
+ /* Make sure that we bind the input if we have
+ to perform outer-joins (for mixed results)
+ and our input has more than one input relation
+ or contains a where clause. */
+ if (item_ty & aat_node && item_ty & ~aat_node &&
+ (PFarray_last (FROMLIST(query)) != 1 ||
+ PFarray_last(WHERELIST(query))))
+ bind_operator (query, false);
- assert (PFarray_last (COLMAP(R(p))));
+ assert (PFarray_last (COLMAP(query)));
assert (monomorphic (pos_ty));
- orderby = col_env_lookup (COLMAP(R(p)), pos, pos_ty);
- orderby = IS_LITERAL(orderby)
- ? NULL : sortkey_list (sortkey_item (orderby, true));
+ /* The order criterion consists of the column pos. */
+ if (rule == 1 || rule == 2) {
+ orderby = col_env_lookup (COLMAP(query), pos, pos_ty);
+ orderby = IS_LITERAL(orderby)
+ ? NULL : sortkey_list (sortkey_item (orderby, true));
+ }
+ /* The order is represented by the order list of a rank operator.
+ Transform the list of order columns into a SQL ORDER BY list. */
+ else {
+ PFord_ordering_t sortby;
+ PFalg_att_t ord;
+ bool asc;
+
+ assert (R(p)->kind == la_project &&
+ RL(p)->kind == la_rank &&
+ pos == RL(p)->sem.rank.res);
+
+ sortby = RL(p)->sem.rank.sortby;
+ for (int i = PFord_count (sortby) - 1; i >= 0; i--) {
+ ord = PFord_order_col_at (sortby, i);
+ asc = PFord_order_dir_at (sortby, i) == DIR_ASC;
+ orderby = sortkey_list (
+ sortkey_item (
+ col_env_lookup (
+ COLMAP(query),
+ ord,
+ type_of (query, ord)),
+ asc),
+ orderby);
+
+ }
+ }
/* construct schema information for serializer */
ser_info = ser_info_item (
@@ -1669,8 +1749,8 @@
ser_info)))));
/* the item information */
- for (unsigned int i = 0; i < PFarray_last (COLMAP(R(p))); i++) {
- sql_column_env_t entry = col_env_at (COLMAP(R(p)), i);
+ for (unsigned int i = 0; i < PFarray_last (COLMAP(query)); i++) {
+ sql_column_env_t entry = col_env_at (COLMAP(query), i);
if (((entry.att == item) && !(entry.type & aat_node)) &&
(entry.att != pos)) {
sername = COLUMN_NAME (att_item, entry.type);
@@ -1690,6 +1770,29 @@
ser_map (sername, colname),
ser_info);
}
+ /* If we have to eliminate duplicates then we are not
+ allowed to remove any column from the selection list. */
+ else if (distinct) {
+ colexpr = entry.expression;
+ /* Ensure that boolean columns are replaced. */
+ if (entry.type == aat_bln)
+ colexpr = case_ (when (colexpr, lit_int (1)),
+ else_ (lit_int (0)));
+
+ /* Generate a special column name DIST
+ that can be overloaded ... */
+ sername = DIST_;
+ /* ... and overload it with a unique identifier. */
+ sername->sem.column.name->ty = i;
+
+ /* Rename the columns to avoid name conflicts
+ during serialization. */
+ if (colexpr->kind != sql_column_assign)
+ colexpr = column_assign (colexpr, sername);
+
+ /* Add the column to the select list. */
+ selectlist = select_list (colexpr, selectlist);
+ }
}
/* our result contains only atomic values */
@@ -1700,10 +1803,10 @@
ser_info);
finalquery = PFsql_select (
- false,
+ distinct,
select_list (selectlist),
- transform_frommap (R(p)),
- transform_wheremap (R(p)),
+ transform_frommap (query),
+ transform_wheremap (query),
orderby,
NULL);
}
@@ -1718,14 +1821,14 @@
so we return just the values we get */
if (SER_REPORT(p) == ser_yes) {
PFsql_aident_t twig = col_env_lookup_step (
- COLMAP(R(p)),
+ COLMAP(query),
item,
item_ty);
assert (twig != PF_SQL_ALIAS_UNBOUND);
- if (RANK_MAP(R(p)) &&
- PFarray_last (RANK_MAP(R(p))) >= 1) {
+ if (RANK_MAP(query) &&
+ PFarray_last (RANK_MAP(query)) >= 1) {
PFarray_t *list;
PFsql_t *sort_item;
rank_map_t *rank_map;
@@ -1733,7 +1836,7 @@
orderby = NULL;
rank_map = *(rank_map_t **)
- PFarray_at (RANK_MAP(R(p)), 0);
+ PFarray_at (RANK_MAP(query), 0);
assert (rank_map->name == p->sem.ser_seq.pos);
assert (rank_map->sort_list);
@@ -1747,15 +1850,16 @@
}
}
finalquery = PFsql_select (
- false,
+ distinct,
select_list (
PRE(twig),
SIZE(twig),
KIND(twig),
VALUE(twig),
- NAME(twig)),
- transform_frommap (R(p)),
- transform_wheremap (R(p)),
+ NAME(twig),
+ selectlist),
+ transform_frommap (query),
+ transform_wheremap (query),
orderby,
NULL);
} else {
@@ -1776,23 +1880,24 @@
frags = table_name (newtable);
}
- from_list_copy (FROMLIST(p), FROMLIST(R(p)));
- where_list_copy (WHERELIST(p), WHERELIST(R(p)));
+ from_list_copy (FROMLIST(p), FROMLIST(query));
+ where_list_copy (WHERELIST(p), WHERELIST(query));
/* make sure to get the full table schema
for the context nodes */
- doc1 = col_env_lookup_step (COLMAP(R(p)), item, item_ty);
+ doc1 = col_env_lookup_step (COLMAP(query), item, item_ty);
if (doc1 == PF_SQL_ALIAS_UNBOUND) {
doc1 = new_alias ();
from_list_add (FROMLIST(p), frags, doc1);
where_list_add (WHERELIST(p),
eq (col_env_lookup (
- COLMAP(R(p)), item, item_ty),
+ COLMAP(query), item, item_ty),
PRE(doc1)));
}
- if (col_env_lookup_step_leaf (COLMAP(R(p)), item, item_ty)
- && col_env_lookup_step (COLMAP(R(p)), item, item_ty))
+ /* avoid step if we already have leaf nodes in our hands */
+ if (col_env_lookup_step_leaf (COLMAP(query), item, item_ty)
+ && col_env_lookup_step (COLMAP(query), item, item_ty))
doc2 = doc1;
else {
doc2 = new_alias ();
@@ -1806,13 +1911,14 @@
}
finalquery = PFsql_select (
- false,
+ distinct,
select_list (
PRE(doc2),
SIZE(doc2),
KIND(doc2),
VALUE(doc2),
- NAME(doc2)),
+ NAME(doc2),
+ selectlist),
transform_frommap (p),
transform_wheremap (p),
orderby,
@@ -1844,24 +1950,14 @@
frags = table_name (newtable);
}
- /* if the right child has more than on table
- * or it contains a where-clause bind it */
- if (PFarray_last (FROMLIST(R(p))) != 1 ||
- PFarray_last(WHERELIST(R(p))))
- bind_operator (R(p), false);
-
- item_expr = col_env_lookup (COLMAP(R(p)), item, aat_pre);
- orderby = col_env_lookup (COLMAP(R(p)), pos, pos_ty);
- orderby = IS_LITERAL(orderby)
- ? NULL
- : sortkey_list (sortkey_item (orderby, true));
+ item_expr = col_env_lookup (COLMAP(query), item, aat_pre);
doc1 = new_alias ();
doc2 = new_alias ();
- from = from_list_at (FROMLIST(R(p)), 0);
+ from = from_list_at (FROMLIST(query), 0);
from_bind = alias_bind (from.table, alias (from.alias));
finalquery = PFsql_select (
- false,
+ distinct,
select_list (
selectlist,
PRE(doc2),
@@ -1914,7 +2010,7 @@
* frag_union (empty_frag, Frag),
* roots_ (twig (Twig)))
*/
- case 2:
+ case 5:
{
PFsql_aident_t content = new_alias ();
PFsql_t *sortkey_list = NULL;
@@ -1933,7 +2029,7 @@
" DO NOT EDIT THESE LINES"),
ser_info_item (
ser_type (lit_str("Type"),
- lit_str("ATOMIC_ONLY")),
+ lit_str("NODES_ONLY")),
ser_info_item (
ser_map (PRE_, PRE_),
ser_info_item (
@@ -1999,13 +2095,13 @@
} break;
/* Query: serialize_seq (Frag, empty_tbl) */
- case 3:
+ case 6:
/* FIXME: implementation is missting */
assert (!"missing");
break;
/* Rel: lit_tbl */
- case 4:
+ case 9:
if (p->sem.lit_tbl.count == 1) {
for (unsigned int col = 0; col < p->schema.count; col++)
for (PFalg_simple_type_t t = 1; t; t <<= 1)
@@ -2110,7 +2206,7 @@
break;
/* Rel: attach (Rel) */
- case 5:
+ case 10:
/* copy all existing column, from, and where lists */
copy_cols_from_where (p, L(p));
@@ -2122,11 +2218,11 @@
break;
/* Rel: cross (Rel, Rel) */
- case 6:
+ case 11:
/* Rel: eqjoin (Rel, Rel) */
- case 7:
+ case 12:
/* Rel: thetajoin (Rel, Rel) */
- case 9:
+ case 14:
{
assert (kids[0] && nts[0]);
assert (kids[1] && nts[1]);
@@ -2238,7 +2334,7 @@
} break;
/* Rel: semijoin (Rel, Rel) */
- case 8:
+ case 13:
{
assert (kids[0] && nts[0]);
assert (kids[1] && nts[0]);
@@ -2318,7 +2414,7 @@
} break;
/* Rel: project (Rel) */
- case 10:
+ case 15:
{
sql_column_env_t entry;
@@ -2344,7 +2440,7 @@
} break;
/* Rel: select_ (Rel) */
- case 11:
+ case 16:
{
PFsql_t *sqlnode;
@@ -2369,7 +2465,7 @@
} break;
/* Rel: pos_select (Rel) */
- case 12:
+ case 17:
/* create a special translation for the positional
predicates [1] and [last()] */
if (p->schema.count <= 2 &&
@@ -2547,9 +2643,9 @@
break;
/* Rel: disjunion (Rel, Rel) */
- case 15:
+ case 20:
/* Rel: difference (Rel, Rel) */
- case 17:
+ case 22:
{
/* FIXME: the translation won't work if the same operator
is referenced twice (as the left environment is
@@ -2644,13 +2740,13 @@
} break;
/* Rel: intersect (Rel, Rel) */
- case 16:
+ case 21:
/* FIXME: implementation is missing */
assert (!"missing");
break;
/* Rel: distinct (Rel) */
- case 18:
+ case 23:
/* copy all existing column, from, and where lists */
copy_cols_from_where (p, L(p));
@@ -2660,7 +2756,7 @@
break;
/* Rel: fun_1to1 (Rel) */
- case 20:
+ case 25:
{
unsigned int count = p->sem.fun_1to1.refs.count;
PFalg_att_t att[count];
@@ -2751,9 +2847,9 @@
} break;
/* Rel: num_eq (Rel) */
- case 21:
+ case 26:
/* Rel: num_gt (Rel) */
- case 22:
+ case 27:
{
PFsql_t * (*op) (const PFsql_t *, const PFsql_t *);
@@ -2778,19 +2874,19 @@
} break;
/* Rel: bool_and (Rel) */
- case 23:
+ case 28:
/* FIXME: implementation is missing */
assert (!"missing");
break;
/* Rel: bool_or (Rel) */
- case 24:
+ case 29:
/* FIXME: implementation is missing */
assert (!"missing");
break;
/* Rel: bool_not (Rel) */
- case 25:
+ case 30:
{
PFsql_t *sqlnode;
@@ -2810,19 +2906,19 @@
} break;
/* Rel: type (Rel) */
- case 26:
+ case 31:
/* FIXME: implementation is missing */
assert (!"missing");
break;
/* Rel: type_assert (Rel) */
- case 27:
+ case 32:
/* FIXME: implementation is missing */
assert (!"missing");
break;
/* Rel: cast (Rel) */
- case 28:
+ case 33:
{
PFalg_att_t att = p->sem.type.att;
PFalg_simple_type_t ty = TYPE_MASK (type_of (p, att));
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins