Changeset: 701eb0e24417 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=701eb0e24417
Modified Files:
        gdk/gdk_logger.c
        gdk/gdk_string.c
        sql/server/rel_optimizer.c
        sql/server/rel_rel.c
        sql/server/rel_unnest.c
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-0join-query.test
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-1join-query.test
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-1join-view.test
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-2join-query.test
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-2join-view.test
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-prologue.test
Branch: default
Log Message:

Merged with Oct2020


diffs (truncated from 836 to 300 lines):

diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c
--- a/gdk/gdk_string.c
+++ b/gdk/gdk_string.c
@@ -176,6 +176,46 @@ strLocate(Heap *h, const char *v)
        return 0;
 }
 
+static inline gdk_return
+checkUTF8(const char *v)
+{
+       if (v[0] != '\200' || v[1] != '\0') {
+               /* check that string is correctly encoded UTF-8; there
+                * was no need to do this earlier: if the string was
+                * found above, it must have gone through here in the
+                * past */
+               int nutf8 = 0;
+               int m = 0;
+               for (size_t i = 0; v[i]; i++) {
+                       if (nutf8 > 0) {
+                               if ((v[i] & 0xC0) != 0x80 ||
+                                   (m != 0 && (v[i] & m) == 0))
+                                       goto badutf8;
+                               m = 0;
+                               nutf8--;
+                       } else if ((v[i] & 0xE0) == 0xC0) {
+                               nutf8 = 1;
+                               if ((v[i] & 0x1E) == 0)
+                                       goto badutf8;
+                       } else if ((v[i] & 0xF0) == 0xE0) {
+                               nutf8 = 2;
+                               if ((v[i] & 0x0F) == 0)
+                                       m = 0x20;
+                       } else if ((v[i] & 0xF8) == 0xF0) {
+                               nutf8 = 3;
+                               if ((v[i] & 0x07) == 0)
+                                       m = 0x30;
+                       } else if ((v[i] & 0x80) != 0) {
+                               goto badutf8;
+                       }
+               }
+       }
+       return GDK_SUCCEED;
+
+  badutf8:
+       return GDK_FAIL;
+}
+
 var_t
 strPut(BAT *b, var_t *dst, const void *V)
 {
@@ -221,39 +261,9 @@ strPut(BAT *b, var_t *dst, const void *V
        }
        /* the string was not found in the heap, we need to enter it */
 
-       if (v[0] != '\200' || v[1] != '\0') {
-               /* check that string is correctly encoded UTF-8; there
-                * was no need to do this earlier: if the string was
-                * found above, it must have gone through here in the
-                * past */
-               int nutf8 = 0;
-               int m = 0;
-               for (size_t i = 0; v[i]; i++) {
-                       if (nutf8 > 0) {
-                               if ((v[i] & 0xC0) != 0x80 ||
-                                   (m != 0 && (v[i] & m) == 0)) {
-                                 badutf8:
-                                       GDKerror("incorrectly encoded UTF-8");
-                                       return 0;
-                               }
-                               m = 0;
-                               nutf8--;
-                       } else if ((v[i] & 0xE0) == 0xC0) {
-                               nutf8 = 1;
-                               if ((v[i] & 0x1E) == 0)
-                                       goto badutf8;
-                       } else if ((v[i] & 0xF0) == 0xE0) {
-                               nutf8 = 2;
-                               if ((v[i] & 0x0F) == 0)
-                                       m = 0x20;
-                       } else if ((v[i] & 0xF8) == 0xF0) {
-                               nutf8 = 3;
-                               if ((v[i] & 0x07) == 0)
-                                       m = 0x30;
-                       } else if ((v[i] & 0x80) != 0) {
-                               goto badutf8;
-                       }
-               }
+       if (checkUTF8(v) != GDK_SUCCEED) {
+               GDKerror("incorrectly encoded UTF-8\n");
+               return 0;
        }
 
        pad = GDK_VARALIGN - (h->free & (GDK_VARALIGN - 1));
@@ -789,6 +799,10 @@ strWrite(const char *a, stream *s, size_
 
        (void) cnt;
        assert(cnt == 1);
+       if (checkUTF8(a) != GDK_SUCCEED) {
+               GDKerror("incorrectly encoded UTF-8\n");
+               return GDK_FAIL;
+       }
        if (mnstr_writeInt(s, (int) len) && mnstr_write(s, a, len, 1) == 1)
                return GDK_SUCCEED;
        else
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -1667,12 +1667,12 @@ rel_simplify_project_fk_join(mvc *sql, s
 {
        sql_rel *rl = r->l;
        sql_rel *rr = r->r;
-       sql_exp *je;
+       sql_exp *je, *le, *nje, *re;
        node *n;
        int fk_left = 1;
 
        /* check for foreign key join */
-       if (!r->exps || list_length(r->exps) != 1)
+       if (list_length(r->exps) != 1)
                return r;
        if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)))
                return r;
@@ -1715,11 +1715,24 @@ rel_simplify_project_fk_join(mvc *sql, s
                        return r;
        }
 
+       /* rewrite, ie remove pkey side if possible */
+       le = (sql_exp*)je->l, re = (sql_exp*)je->l;
+
+       /* both have NULL and there are semantics, the join cannot be removed */
+       if (is_semantics(je) && has_nil(le) && has_nil(re))
+               return r;
+
        (*changes)++;
-       /* rewrite, ie remove pkey side */
-       if (fk_left)
-               return r->l;
-       return r->r;
+       /* if the foreign key column doesn't have NULL values, then return it */
+       if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || 
(!fk_left && is_right(r->op)))
+               return fk_left ? r->l : r->r;
+
+       /* remove NULL values, ie generate a select not null */
+       nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, 
atom_general(sql->sa, exp_subtype(le), NULL)), cmp_equal);
+       set_anti(nje);
+       set_has_no_nil(nje);
+       set_semantics(nje);
+       return rel_select(sql->sa, fk_left ? r->l : r->r, nje);
 }
 
 static sql_rel *
@@ -1727,11 +1740,11 @@ rel_simplify_count_fk_join(mvc *sql, sql
 {
        sql_rel *rl = r->l;
        sql_rel *rr = r->r;
-       sql_exp *oce, *je;
+       sql_exp *je, *le, *nje, *re, *oce;
        int fk_left = 1;
 
        /* check for foreign key join */
-       if (!r->exps || list_length(r->exps) != 1)
+       if (list_length(r->exps) != 1)
                return r;
        if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)))
                return r;
@@ -1762,11 +1775,24 @@ rel_simplify_count_fk_join(mvc *sql, sql
                r->r = rr;
        }
 
+       /* rewrite, ie remove pkey side if possible */
+       le = (sql_exp*)je->l, re = (sql_exp*)je->l;
+
+       /* both have NULL and there are semantics, the join cannot be removed */
+       if (is_semantics(je) && has_nil(le) && has_nil(re))
+               return r;
+
        (*changes)++;
-       /* rewrite, ie remove pkey side */
-       if (fk_left)
-               return r->l;
-       return r->r;
+       /* if the foreign key column doesn't have NULL values, then return it */
+       if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || 
(!fk_left && is_right(r->op)))
+               return fk_left ? r->l : r->r;
+
+       /* remove NULL values, ie generate a select not null */
+       nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, 
atom_general(sql->sa, exp_subtype(le), NULL)), cmp_equal);
+       set_anti(nje);
+       set_has_no_nil(nje);
+       set_semantics(nje);
+       return rel_select(sql->sa, fk_left ? r->l : r->r, nje);
 }
 
 /*
diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c
--- a/sql/server/rel_rel.c
+++ b/sql/server/rel_rel.c
@@ -828,6 +828,7 @@ rel_basetable(mvc *sql, sql_table *t, co
                        sql_idx *i = cn->data;
                        sql_subtype *t = sql_bind_localtype("lng"); /* hash 
"lng" */
                        char *iname = NULL;
+                       int has_nils = 0;
 
                        /* do not include empty indices in the plan */
                        if ((hash_index(i->type) && list_length(i->columns) <= 
1) || !idx_has_column(i->type))
@@ -837,7 +838,13 @@ rel_basetable(mvc *sql, sql_table *t, co
                                t = sql_bind_localtype("oid");
 
                        iname = sa_strconcat( sa, "%", i->base.name);
-                       e = exp_alias(sa, atname, iname, tname, iname, t, 
CARD_MULTI, 0, 1);
+                       for (node *n = i->columns->h ; n && !has_nils; n = 
n->next) { /* check for NULL values */
+                               sql_kc *kc = n->data;
+
+                               if (kc->c->null)
+                                       has_nils = 1;
+                       }
+                       e = exp_alias(sa, atname, iname, tname, iname, t, 
CARD_MULTI, has_nils, 1);
                        /* index names are prefixed, to make them independent */
                        if (hash_index(i->type)) {
                                p = e->p = prop_create(sa, PROP_HASHIDX, e->p);
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -1893,7 +1893,8 @@ exp_reset_card_and_freevar_set_physical_
        }
        if (is_simple_project(rel->op) && need_distinct(rel)) /* Need distinct, 
all expressions should have CARD_AGGR at max */
                e->card = MIN(e->card, CARD_AGGR);
-       rel->card = MAX(e->card, rel->card); /* the relation cardinality may 
get updated too */
+       if (!is_groupby(rel->op) || !list_empty(rel->r)) /* global groupings 
have atomic cardinality */
+               rel->card = MAX(e->card, rel->card); /* the relation 
cardinality may get updated too */
        return e;
 }
 
diff --git 
a/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-0join-query.test
 
b/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-0join-query.test
--- 
a/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-0join-query.test
+++ 
b/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-0join-query.test
@@ -6,7 +6,7 @@ query T nosort
 ----
 project (
 | group by (
-| | table("sys"."fk") [ "fk"."%fk_fk2_fkey" NOT NULL JOINIDX 
"sys"."fk"."fk_fk2_fkey" ] COUNT 
+| | table("sys"."fk") [ "fk"."%fk_fk2_fkey" JOINIDX "sys"."fk"."fk_fk2_fkey" ] 
COUNT 
 | ) [  ] [ "sys"."count"() NOT NULL as "%1"."%1" ]
 ) [ "%1"."%1" NOT NULL ]
 
diff --git 
a/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-1join-query.test
 
b/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-1join-query.test
--- 
a/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-1join-query.test
+++ 
b/sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-plan-1join-query.test
@@ -6,7 +6,7 @@ query T nosort
 ----
 project (
 | group by (
-| | table("sys"."fk") [ "fk"."%fk_fk2_fkey" NOT NULL JOINIDX 
"sys"."fk"."fk_fk2_fkey" ] COUNT 
+| | table("sys"."fk") [ "fk"."%fk_fk2_fkey" JOINIDX "sys"."fk"."fk_fk2_fkey" ] 
COUNT 
 | ) [  ] [ "sys"."count"() NOT NULL as "%1"."%1" ]
 ) [ "%1"."%1" NOT NULL ]
 
@@ -22,9 +22,9 @@ query T nosort
 ----
 project (
 | left outer join (
-| | table("sys"."fk") [ "fk"."id" NOT NULL HASHCOL , "fk"."%fk_fk1_fkey" NOT 
NULL JOINIDX "sys"."fk"."fk_fk1_fkey" ] COUNT ,
+| | table("sys"."fk") [ "fk"."id" NOT NULL HASHCOL , "fk"."%fk_fk1_fkey" 
JOINIDX "sys"."fk"."fk_fk1_fkey" ] COUNT ,
 | | table("sys"."pk1") [ "pk1"."v1", "pk1"."%TID%" NOT NULL ] COUNT 
-| ) [ "fk"."%fk_fk1_fkey" NOT NULL = "pk1"."%TID%" NOT NULL JOINIDX 
"sys"."fk"."fk_fk1_fkey" ]
+| ) [ "fk"."%fk_fk1_fkey" = "pk1"."%TID%" NOT NULL JOINIDX 
"sys"."fk"."fk_fk1_fkey" ]
 ) [ "fk"."id" NOT NULL HASHCOL , "pk1"."v1" ] [ "fk"."id" ASC NOT NULL HASHCOL 
 ]
 
 query T nosort
@@ -32,9 +32,9 @@ query T nosort
 ----
 project (
 | left outer join (
-| | table("sys"."fk") [ "fk"."id" NOT NULL HASHCOL , "fk"."%fk_fk2_fkey" NOT 
NULL JOINIDX "sys"."fk"."fk_fk2_fkey" ] COUNT ,
+| | table("sys"."fk") [ "fk"."id" NOT NULL HASHCOL , "fk"."%fk_fk2_fkey" 
JOINIDX "sys"."fk"."fk_fk2_fkey" ] COUNT ,
 | | table("sys"."pk2") [ "pk2"."v2", "pk2"."%TID%" NOT NULL ] COUNT 
-| ) [ "fk"."%fk_fk2_fkey" NOT NULL = "pk2"."%TID%" NOT NULL JOINIDX 
"sys"."fk"."fk_fk2_fkey" ]
+| ) [ "fk"."%fk_fk2_fkey" = "pk2"."%TID%" NOT NULL JOINIDX 
"sys"."fk"."fk_fk2_fkey" ]
 ) [ "fk"."id" NOT NULL HASHCOL , "pk2"."v2" ] [ "fk"."id" ASC NOT NULL HASHCOL 
 ]
 
 query T nosort
@@ -42,7 +42,7 @@ query T nosort
 ----
 project (
 | group by (
-| | table("sys"."fk") [ "fk"."%fk_fk2_fkey" NOT NULL JOINIDX 
"sys"."fk"."fk_fk2_fkey" ] COUNT 
+| | table("sys"."fk") [ "fk"."%fk_fk2_fkey" JOINIDX "sys"."fk"."fk_fk2_fkey" ] 
COUNT 
 | ) [  ] [ "sys"."count"() NOT NULL as "%1"."%1" ]
 ) [ "%1"."%1" NOT NULL ]
 
@@ -59,8 +59,8 @@ query T nosort
 project (
 | right outer join (
 | | table("sys"."pk1") [ "pk1"."v1", "pk1"."%TID%" NOT NULL ] COUNT ,
-| | table("sys"."fk") [ "fk"."id" NOT NULL HASHCOL , "fk"."%fk_fk1_fkey" NOT 
NULL JOINIDX "sys"."fk"."fk_fk1_fkey" ] COUNT 
-| ) [ "fk"."%fk_fk1_fkey" NOT NULL = "pk1"."%TID%" NOT NULL JOINIDX 
"sys"."fk"."fk_fk1_fkey" ]
+| | table("sys"."fk") [ "fk"."id" NOT NULL HASHCOL , "fk"."%fk_fk1_fkey" 
JOINIDX "sys"."fk"."fk_fk1_fkey" ] COUNT 
+| ) [ "fk"."%fk_fk1_fkey" = "pk1"."%TID%" NOT NULL JOINIDX 
"sys"."fk"."fk_fk1_fkey" ]
 ) [ "fk"."id" NOT NULL HASHCOL , "pk1"."v1" ] [ "fk"."id" ASC NOT NULL HASHCOL 
 ]
 
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to