Changeset: c4a78e109706 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c4a78e109706
Modified Files:
sql/server/rel_optimizer.c
Branch: select-window-pushdown
Log Message:
Optimize filter push down for window functions
This change optimizes the push down of filters to the source tables of
aggregation window functions, which can lead to significant improvements in
performance (see issue #7301).
The filters are only pushed if they completely filter out entire groups. If
they only filter out rows of a group, they cannot be safely pushed down.
diffs (142 lines):
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -4457,6 +4457,89 @@ rel_push_groupby_down(visitor *v, sql_re
return rel;
}
+/*
+ * Gets the column expressions of a diff function and adds them to "columns".
+ * The diff function has two possible argument types: either a sql_exp
representing a column
+ * or a sql_exp representing another diff function, therefore this function is
recursive.
+ */
+static void
+get_diff_function_columns(sql_exp *diffExp, list *columns) {
+ list *args = diffExp->l;
+
+ for (node *arg = args->h; arg; arg = arg->next) {
+ sql_exp *exp = arg->data;
+
+ // diff function
+ if (exp->type == e_func) {
+ get_diff_function_columns(exp, columns);
+ }
+ // column
+ else {
+ list_append(columns, exp);
+ }
+ }
+}
+
+/*
+ * Builds a list of aggregation key columns to be used by the select push down
algorithm, namely for
+ * window functions. Returns NULL if the window function does not partition by
any column
+ */
+static list *
+get_aggregation_key_columns(sql_allocator *sa, sql_rel *r) {
+ for (node* n = r->exps->h; n; n = n->next) {
+ sql_exp *e = n->data;
+
+ if (e->type == e_func) {
+ sql_subfunc *f = e->f;
+
+ // aggregation function
+ if (!strcmp(f->func->base.name, "rank")) {
+ list* rankArguments = e->l;
+ // the partition key is the second argument
+ sql_exp *partitionExp =
rankArguments->h->next->data;
+
+ // check if the key contains any columns, i.e.,
is a diff function
+ if (partitionExp->type == e_func) {
+ // get columns to list
+ list *aggColumns = sa_list(sa);
+ get_diff_function_columns(partitionExp,
aggColumns);
+ return aggColumns;
+ }
+ // the function has no aggregation columns
(e_atom of boolean)
+ else {
+ return NULL;
+ }
+
+ }
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Checks if a filter column is also used as an aggregation key, so it can be
later safely pushed down.
+ */
+static int
+filter_column_in_aggregation_columns(sql_exp *column, list *aggColumns) {
+ char *tableName = column->l;
+ char *columnName = column->r;
+
+ for (node *n = aggColumns->h; n; n = n->next) {
+ sql_exp *aggCol = n->data;
+ char *aggColTableName = aggCol->l;
+ char *aggColColumnName = aggCol->r;
+
+ if (!strcmp(tableName, aggColTableName) && !strcmp(columnName,
aggColColumnName)) {
+ /* match */
+ return 1;
+ }
+ }
+
+ /* no matches found */
+ return 0;
+}
+
+
/*
* Push select down, pushes the selects through (simple) projections. Also
* it cleans up the projections which become useless.
@@ -4605,6 +4688,48 @@ rel_push_select_down(visitor *v, sql_rel
n = next;
}
}
+
+ /* push filters if they match the aggregation key on a window
function */
+ else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps,
0)) {
+ set_processed(pl);
+ /* list of aggregation key columns */
+ list *aggColumns =
get_aggregation_key_columns(v->sql->sa, r);
+
+ /* aggregation keys found, check if any filter matches
them */
+ if (aggColumns) {
+ for (n = exps->h; n;) {
+ node *next = n->next;
+ sql_exp *e = n->data, *ne = NULL;
+
+ if (e->type == e_cmp) {
+ /* simple comparison filter */
+ if (e->flag == cmp_gt ||
e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
+ || e->flag == cmp_equal
|| e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin) {
+ sql_exp* column = e->l;
+
+ /* check if the
expression matches any aggregation key, meaning we can
+ try to safely push
it down */
+ if
(filter_column_in_aggregation_columns(column, aggColumns)) {
+ ne =
exp_push_down_prj(v->sql, e, r, pl);
+
+ /* can we move
it down */
+ if (ne && ne !=
e && pl->exps) {
+ if
(!is_select(pl->op) || rel_is_ref(pl))
+
r->l = pl = rel_select(v->sql->sa, pl, NULL);
+
rel_select_add_exp(v->sql->sa, pl, ne);
+
list_remove_node(exps, NULL, n);
+
v->changes++;
+ }
+ }
+ }
+ }
+ n = next;
+ }
+
+ /* cleanup list */
+ list_destroy(aggColumns);
+ }
+ }
}
/* try push select under set relation */
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]