Changeset: bcc04d7a8c9e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bcc04d7a8c9e Modified Files: sql/backends/monet5/rel_weld.c Branch: rel-weld Log Message:
antijoin + joins that are actually filters diffs (258 lines): diff --git a/sql/backends/monet5/rel_weld.c b/sql/backends/monet5/rel_weld.c --- a/sql/backends/monet5/rel_weld.c +++ b/sql/backends/monet5/rel_weld.c @@ -1018,9 +1018,9 @@ cleanup: static void join_produce(backend *be, sql_rel *rel, weld_state *wstate) { - char new_builder[STR_BUF_SIZE], struct_mbr[64]; - str col_name; - int len = 0, i, count; + char new_builder[STR_BUF_SIZE], probe_key[STR_BUF_SIZE], struct_mbr[64]; + str col_name, old_builder; + int len = 0, i, count, is_filter = 0, result_var, old_num_loops, old_num_parens; node *en; sql_exp *exp; sql_rel *right = rel->r; @@ -1029,14 +1029,42 @@ join_produce(backend *be, sql_rel *rel, list *left_cmp_cols = sa_list(wstate->sa); produce_func left_produce, right_produce; + if (rel->exps == NULL) { + /* Cross product */ + wstate->error = 1; + goto cleanup; + } + for (en = rel->exps->h; en; en = en->next) { + exp = en->data; + if (get_cmp(exp) == cmp_notequal) { + /* We don't support this yet */ + wstate->error = 1; + goto cleanup; + } else if (get_cmp(exp) != cmp_equal) { + is_filter = 1; + } + } + + right_produce = getproduce_func(rel->r); + left_produce = getproduce_func(rel->l); + if (right_produce == NULL || left_produce == NULL) { + wstate->error = 1; + goto cleanup; + } + if (is_filter) { + right_produce(be, rel->r, wstate); + select_produce(be, rel, wstate); + goto cleanup; + } + /* === Produce === */ - int old_num_parens = wstate->num_parens; - int old_num_loops = wstate->num_loops; - str old_builder = wstate->builder; + old_num_parens = wstate->num_parens; + old_num_loops = wstate->num_loops; + old_builder = wstate->builder; /* Create a new builder */ wstate->num_parens = wstate->num_loops = 0; - int result_var = wstate->next_var++; + result_var = wstate->next_var++; wprintf(wstate, "let v%d = (", result_var); wstate->num_parens++; @@ -1053,7 +1081,11 @@ join_produce(backend *be, sql_rel *rel, } len = 0; - len += sprintf(new_builder + len, "groupmerger["); + if (rel->op == op_semi || rel->op == op_anti) { + len += sprintf(new_builder + len, "dictmerger["); + } else { + len += sprintf(new_builder + len, "groupmerger["); + } if (list_length(rel->exps) > 1) { len += sprintf(new_builder + len, "{"); /* key is a struct */ } @@ -1086,33 +1118,32 @@ join_produce(backend *be, sql_rel *rel, len += sprintf(new_builder + len, "}"); /* key is a struct */ } len += sprintf(new_builder + len, ", "); - if (list_length(right->exps) > 1) { - len += sprintf(new_builder + len, "{"); /* value is a struct */ - } - for (en = right->exps->h; en; en = en->next) { - exp = en->data; - int type = exp_subtype(exp)->type->localtype; - if (type == TYPE_str) { - len += sprintf(new_builder + len, "i64"); - } else { - len += sprintf(new_builder + len, "%s", getWeldType(type)); + if (rel->op == op_semi || rel->op == op_anti) { + /* We only need a dictionary to lookup the key */ + len += sprintf(new_builder + len, "i64, +"); + } else { + if (list_length(right->exps) > 1) { + len += sprintf(new_builder + len, "{"); /* value is a struct */ } - if (en->next != NULL) { - len += sprintf(new_builder + len, ", "); + for (en = right->exps->h; en; en = en->next) { + exp = en->data; + int type = exp_subtype(exp)->type->localtype; + if (type == TYPE_str) { + len += sprintf(new_builder + len, "i64"); + } else { + len += sprintf(new_builder + len, "%s", getWeldType(type)); + } + if (en->next != NULL) { + len += sprintf(new_builder + len, ", "); + } } - } - if (list_length(right->exps) > 1) { - len += sprintf(new_builder + len, "}"); /* value is a struct */ + if (list_length(right->exps) > 1) { + len += sprintf(new_builder + len, "}"); /* value is a struct */ + } } len += sprintf(new_builder + len, "]"); wstate->builder = new_builder; - right_produce = getproduce_func(rel->r); - left_produce = getproduce_func(rel->l); - if (right_produce == NULL || left_produce == NULL) { - wstate->error = 1; - goto cleanup; - } right_produce(be, rel->r, wstate); /* === Consume === */ @@ -1131,22 +1162,26 @@ join_produce(backend *be, sql_rel *rel, wprintf(wstate, "}"); /* key is a struct */ } wprintf(wstate, ", "); - if (list_length(right->exps) > 1) { - wprintf(wstate, "{"); /* value is a struct */ - } - /* Build the value */ - for (en = right->exps->h, count = 0; en; en = en->next, count++) { - exp = en->data; - wprintf(wstate, "%s", (str)list_fetch(right_cols, count)); - if (exp_subtype(exp)->type->localtype == TYPE_str) { - wprintf(wstate, "_stridx"); + if (rel->op == op_semi || rel->op == op_anti) { + wprintf(wstate, "1L"); + } else { + if (list_length(right->exps) > 1) { + wprintf(wstate, "{"); /* value is a struct */ } - if (en->next != NULL) { - wprintf(wstate, ", "); + /* Build the value */ + for (en = right->exps->h, count = 0; en; en = en->next, count++) { + exp = en->data; + wprintf(wstate, "%s", (str)list_fetch(right_cols, count)); + if (exp_subtype(exp)->type->localtype == TYPE_str) { + wprintf(wstate, "_stridx"); + } + if (en->next != NULL) { + wprintf(wstate, ", "); + } } - } - if (list_length(right->exps) > 1) { - wprintf(wstate, "}"); /* value is a struct */ + if (list_length(right->exps) > 1) { + wprintf(wstate, "}"); /* value is a struct */ + } } wprintf(wstate, "})"); @@ -1164,45 +1199,48 @@ join_produce(backend *be, sql_rel *rel, left_produce(be, rel->l, wstate); /* === 2nd Consume === */ - wstate->num_loops++; - wstate->num_parens++; - wprintf(wstate, "for("); - if (rel->op == op_semi) { - wprintf(wstate, "slice("); - } - wprintf(wstate, "lookup(v%d, ", result_var); + len = 0; if (list_length(left_cmp_cols) > 1) { - wprintf(wstate, "{"); /* key is a struct */ + len += sprintf(probe_key + len, "{"); /* key is a struct */ } for (en = left_cmp_cols->h; en; en = en->next) { /* Hashtable key */ - wprintf(wstate, "%s", (str)en->data); + len += sprintf(probe_key + len, "%s", (str)en->data); if (en->next != NULL) { - wprintf(wstate, ", "); + len += sprintf(probe_key + len, ", "); } } if (list_length(left_cmp_cols) > 1) { - wprintf(wstate, "}"); /* key is a struct */ - } - wprintf(wstate, ")"); - if (rel->op == op_semi) { - wprintf(wstate, ", 0L, 1L)"); /* Just the first element of the vector */ + len += sprintf(probe_key + len, "}"); } - wprintf(wstate, ", b%d, |b%d, i_%d, n%d|", wstate->num_loops - 1, wstate->num_loops, - wstate->num_loops, wstate->num_loops); - for (en = right->exps->h, count = 0; en; en = en->next, count++) { - len = sprintf(struct_mbr, "n%d", wstate->num_loops); - if (list_length(right->exps) > 1) { - len += sprintf(struct_mbr + len, ".$%d", count); - } - exp = en->data; - col_name = list_fetch(right_cols, count); - if (exp_subtype(exp)->type->localtype == TYPE_str) { - wprintf(wstate, "let %s = strslice(%s_strcol, %s);", - col_name, col_name, struct_mbr); - wprintf(wstate, "let %s_stridx = %s;", col_name, struct_mbr); - } else { - wprintf(wstate, "let %s = %s;", col_name, struct_mbr); + + wstate->num_parens++; + if (rel->op == op_semi) { + /* Reverse the condition */ + wprintf(wstate, "if(keyexists(v%d, %s) == false, b%d, ", result_var, probe_key, wstate->num_loops); + } else if (rel->op == op_anti) { + /* Reverse the condition */ + wprintf(wstate, "if(keyexists(v%d, %s) == true, b%d, ", result_var, probe_key, wstate->num_loops); + } else { + wprintf(wstate, "if(keyexists(v%d, %s) == false, b%d, ", result_var, probe_key, wstate->num_loops); + wstate->num_parens++; + wstate->num_loops++; + wprintf(wstate, "for(lookup(v%d, %s), b%d, |b%d, i_%d, n%d|", result_var, probe_key, + wstate->num_loops - 1, wstate->num_loops, wstate->num_loops, wstate->num_loops); + for (en = right->exps->h, count = 0; en; en = en->next, count++) { + len = sprintf(struct_mbr, "n%d", wstate->num_loops); + if (list_length(right->exps) > 1) { + len += sprintf(struct_mbr + len, ".$%d", count); + } + exp = en->data; + col_name = list_fetch(right_cols, count); + if (exp_subtype(exp)->type->localtype == TYPE_str) { + wprintf(wstate, "let %s = strslice(%s_strcol, %s);", + col_name, col_name, struct_mbr); + wprintf(wstate, "let %s_stridx = %s;", col_name, struct_mbr); + } else { + wprintf(wstate, "let %s = %s;", col_name, struct_mbr); + } } } cleanup: @@ -1452,6 +1490,7 @@ produce_func getproduce_func(sql_rel *re case op_groupby: return &groupby_produce; case op_join: + case op_anti: case op_semi: return &join_produce; default: _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list