Changeset: 988026412964 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/988026412964
Modified Files:
        gdk/gdk_join.c
        sql/test/2024/Tests/distinct_from.test
        sql/test/2024/Tests/groupby_primary_key_project_unique_key.test
Branch: distinct_from
Log Message:

Implemented "nil_matches" for thetajoin when condition is not "=".


diffs (224 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3697,7 +3697,8 @@ joincost(BAT *r, BUN lcount, struct cand
 #define MASK_NE                (MASK_LT | MASK_GT)
 
 static gdk_return
-thetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int opcode, 
BUN estimate, const char *reason, lng t0)
+thetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int opcode,
+         BUN estimate, bool nil_matches, const char *reason, lng t0)
 {
        struct canditer lci, rci;
        const char *lvals, *rvals;
@@ -3737,23 +3738,29 @@ thetajoin(BAT **r1p, BAT **r2p, BAT *l, 
 
        if (BATtvoid(l)) {
                if (!BATtdensebi(&li)) {
-                       /* trivial: nils don't match anything */
-                       bat_iterator_end(&li);
-                       bat_iterator_end(&ri);
-                       return nomatch(r1p, r2p, NULL, l, r, &lci,
-                                      0, false, false, __func__, t0);
+                       if (!nil_matches) {
+                               /* trivial: nils don't match anything */
+                               bat_iterator_end(&li);
+                               bat_iterator_end(&ri);
+                               return nomatch(r1p, r2p, NULL, l, r, &lci,
+                                              0, false, false, __func__, t0);
+                       }
+               } else {
+                       loff = (lng) l->tseqbase - (lng) l->hseqbase;
                }
-               loff = (lng) l->tseqbase - (lng) l->hseqbase;
        }
        if (BATtvoid(r)) {
                if (!BATtdensebi(&ri)) {
-                       /* trivial: nils don't match anything */
-                       bat_iterator_end(&li);
-                       bat_iterator_end(&ri);
-                       return nomatch(r1p, r2p, NULL, l, r, &lci,
-                                      0, false, false, __func__, t0);
+                       if (!nil_matches) {
+                               /* trivial: nils don't match anything */
+                               bat_iterator_end(&li);
+                               bat_iterator_end(&ri);
+                               return nomatch(r1p, r2p, NULL, l, r, &lci,
+                                              0, false, false, __func__, t0);
+                       }
+               } else {
+                       roff = (lng) r->tseqbase - (lng) r->hseqbase;
                }
-               roff = (lng) r->tseqbase - (lng) r->hseqbase;
        }
 
        BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, 
false, false,
@@ -3782,18 +3789,18 @@ thetajoin(BAT **r1p, BAT **r2p, BAT *l, 
                lo = canditer_next(&lci);
                if (lvals)
                        vl = VALUE(l, lo - l->hseqbase);
-               else
+               else if (!BATtdensebi(&li))
                        lval = (oid) ((lng) lo + loff);
                nr = 0;
-               if (cmp(vl, nil) != 0) {
+               if (nil_matches || cmp(vl, nil) != 0) {
                        canditer_reset(&rci);
                        TIMEOUT_LOOP(rci.ncand, qry_ctx) {
                                ro = canditer_next(&rci);
                                if (rvals)
                                        vr = VALUE(r, ro - r->hseqbase);
-                               else
+                               else if (!BATtdensebi(&ri))
                                        rval = (oid) ((lng) ro + roff);
-                               if (cmp(vr, nil) == 0)
+                               if (!nil_matches && cmp(vr, nil) == 0)
                                        continue;
                                c = cmp(vl, vr);
                                if (!((opcode & MASK_LT && c < 0) ||
@@ -4437,7 +4444,7 @@ BATthetajoin(BAT **r1p, BAT **r2p, BAT *
        if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
                return GDK_FAIL;
 
-       return thetajoin(r1p, r2p, l, r, sl, sr, opcode, estimate,
+       return thetajoin(r1p, r2p, l, r, sl, sr, opcode, estimate, nil_matches,
                         __func__, t0);
 }
 
@@ -5063,14 +5070,14 @@ BATrangejoin(BAT **r1p, BAT **r2p, BAT *
                if (!anti)
                        return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, 
false,
                                       __func__, t0);
-               return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate,
+               return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate, 
false,
                                 __func__, t0);
        }
        if (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)) {
                if (!anti)
                        return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, 
false,
                                       __func__, t0);
-               return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate,
+               return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate, 
false,
                                 __func__, t0);
        }
 
diff --git a/sql/test/2024/Tests/distinct_from.test 
b/sql/test/2024/Tests/distinct_from.test
--- a/sql/test/2024/Tests/distinct_from.test
+++ b/sql/test/2024/Tests/distinct_from.test
@@ -1,4 +1,3 @@
-
 query I nosort
 SELECT 10 IS NOT DISTINCT FROM 20
 ----
@@ -9,7 +8,6 @@ SELECT 10 IS DISTINCT FROM 20
 ----
 1
 
-
 query I nosort
 SELECT 10 IS DISTINCT FROM NULL
 ----
@@ -44,56 +42,56 @@ statement ok
 create table foo(s) as values (10), (20), (NULL)
 
 query I nosort
-select s IS NOT DISTINCT FROM 20 FROM foo;
+select s IS NOT DISTINCT FROM 20 FROM foo
 ----
 0
 1
 0
 
 query I nosort
-select s IS DISTINCT FROM 20 FROM foo;
+select s IS DISTINCT FROM 20 FROM foo
 ----
 1
 0
 1
 
 query I nosort
-select s IS NOT DISTINCT FROM NULL FROM foo;
+select s IS NOT DISTINCT FROM NULL FROM foo
 ----
 0
 0
 1
 
 query I nosort
-select s IS DISTINCT FROM NULL FROM foo;
+select s IS DISTINCT FROM NULL FROM foo
 ----
 1
 1
 0
 
 query I nosort
-select 20 IS NOT DISTINCT FROM s FROM foo;
+select 20 IS NOT DISTINCT FROM s FROM foo
 ----
 0
 1
 0
 
 query I nosort
-select 20 IS DISTINCT FROM s FROM foo;
+select 20 IS DISTINCT FROM s FROM foo
 ----
 1
 0
 1
 
 query I nosort
-select NULL IS NOT DISTINCT FROM s FROM foo;
+select NULL IS NOT DISTINCT FROM s FROM foo
 ----
 0
 0
 1
 
 query I nosort
-select NULL IS DISTINCT FROM s FROM foo;
+select NULL IS DISTINCT FROM s FROM foo
 ----
 1
 1
@@ -141,3 +139,22 @@ NULL
 NULL
 0
 1
+
+query II rowsort
+select foo.s, bar.s from foo, bar where foo.s is distinct from bar.s
+----
+10
+20
+10
+30
+10
+NULL
+20
+30
+20
+NULL
+NULL
+20
+NULL
+30
+
diff --git a/sql/test/2024/Tests/groupby_primary_key_project_unique_key.test 
b/sql/test/2024/Tests/groupby_primary_key_project_unique_key.test
--- a/sql/test/2024/Tests/groupby_primary_key_project_unique_key.test
+++ b/sql/test/2024/Tests/groupby_primary_key_project_unique_key.test
@@ -54,10 +54,11 @@ 600
 statement error
 SELECT product.product_id, sum(product_part.num) as sum_num
 FROM
-        (SELECT * FROM product UNION ALL VALUES (1, 'telephone5', 
'telephone5')) AS product 
+        (SELECT * FROM product UNION ALL VALUES (1, 'telephone5', 
'telephone5')) AS product
     JOIN
         product_part
     ON product.product_id = product_part.product_id
 GROUP BY product.product_id
 ORDER BY product.product_code, product.product_id
 ----
+
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to