Changeset: 43e157bb2be3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/43e157bb2be3
Modified Files:
        gdk/gdk_join.c
        sql/test/SQLancer/Tests/sqlancer22.test
        sql/test/SQLancer/Tests/sqlancer23.test
        sql/test/subquery/Tests/subquery3.test
        sql/test/subquery/Tests/subquery5.test
Branch: Jun2023
Log Message:

Extend selectjoin implementation to also do semi, min_one, max_one options.


diffs (266 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -301,11 +301,13 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
 static gdk_return
 selectjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
           struct canditer *lci, struct canditer *rci,
-          bool nil_matches, lng t0, bool swapped, const char *reason)
+          bool nil_matches, bool semi, bool max_one, bool min_one,
+          lng t0, bool swapped, const char *reason)
 {
        BATiter li = bat_iterator(l);
        const void *v;
        BAT *bn = NULL;
+       BUN bncount;
 
        assert(lci->ncand > 0);
        assert(lci->ncand == 1 || (li.sorted && li.revsorted));
@@ -334,32 +336,46 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l,
        if (bn == NULL) {
                return GDK_FAIL;
        }
-       if (BATcount(bn) == 0) {
+       bncount = BATcount(bn);
+       if (bncount == 0) {
                BBPunfix(bn->batCacheid);
+               if (min_one) {
+                       GDKerror("not enough matches");
+                       return GDK_FAIL;
+               }
                return nomatch(r1p, r2p, l, r, lci, false, false,
                               reason, t0);
        }
-       BAT *r1 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT);
+       if (bncount > 1) {
+               if (semi)
+                       bncount = 1;
+               if (max_one) {
+                       BBPunfix(bn->batCacheid);
+                       GDKerror("more than one match");
+                       return GDK_FAIL;
+               }
+       }
+       BAT *r1 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
        if (r1 == NULL) {
                BBPunfix(bn->batCacheid);
                return GDK_FAIL;
        }
        r1->tsorted = true;
        r1->trevsorted = lci->ncand == 1;
-       r1->tseqbase = BATcount(bn) == 1 && lci->tpe == cand_dense ? o : 
oid_nil;
-       r1->tkey = BATcount(bn) == 1;
+       r1->tseqbase = bncount == 1 && lci->tpe == cand_dense ? o : oid_nil;
+       r1->tkey = bncount == 1;
        r1->tnil = false;
        r1->tnonil = true;
        BAT *r2 = NULL;
        if (r2p) {
-               r2 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT);
+               r2 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
                if (r2 == NULL) {
                        BBPunfix(bn->batCacheid);
                        BBPreclaim(r1);
                        return GDK_FAIL;
                }
-               r2->tsorted = lci->ncand == 1 || BATcount(bn) == 1;
-               r2->trevsorted = BATcount(bn) == 1;
+               r2->tsorted = lci->ncand == 1 || bncount == 1;
+               r2->trevsorted = bncount == 1;
                r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase 
: oid_nil;
                r2->tkey = lci->ncand == 1;
                r2->tnil = false;
@@ -369,7 +385,7 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l,
                oid *o1p = (oid *) Tloc(r1, 0);
                oid *o2p = r2 ? (oid *) Tloc(r2, 0) : NULL;
                oid bno = bn->tseqbase;
-               BUN p, q = BATcount(bn);
+               BUN p, q = bncount;
 
                do {
                        GDK_CHECK_TIMEOUT(timeoffset, counter,
@@ -388,7 +404,7 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l,
                oid *o1p = (oid *) Tloc(r1, 0);
                oid *o2p = r2 ? (oid *) Tloc(r2, 0) : NULL;
                const oid *bnp = (const oid *) Tloc(bn, 0);
-               BUN p, q = BATcount(bn);
+               BUN p, q = bncount;
 
                do {
                        GDK_CHECK_TIMEOUT(timeoffset, counter,
@@ -404,10 +420,10 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l,
                        o = canditer_next(lci);
                } while (!is_oid_nil(o));
        }
-       BATsetcount(r1, lci->ncand * BATcount(bn));
+       BATsetcount(r1, lci->ncand * bncount);
        *r1p = r1;
        if (r2p) {
-               BATsetcount(r2, lci->ncand * BATcount(bn));
+               BATsetcount(r2, lci->ncand * bncount);
                *r2p = r2;
        }
        BBPunfix(bn->batCacheid);
@@ -3865,12 +3881,13 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                goto doreturn;
        }
 
-       if (!nil_on_miss && !semi && !max_one && !min_one && !only_misses && 
!not_in &&
+       if (!nil_on_miss && !only_misses && !not_in &&
            (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) ||
             (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) {
                /* single value to join, use select */
                rc = selectjoin(r1p, r2p, l, r, &lci, &rci,
-                               nil_matches, t0, false, func);
+                               nil_matches, semi, max_one, min_one,
+                               t0, false, func);
                goto doreturn;
        } else if (BATtdense(r) && rci.tpe == cand_dense) {
                /* use special implementation for dense right-hand side */
@@ -4239,12 +4256,14 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
        if (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype 
== TYPE_void && is_oid_nil(l->tseqbase))) {
                /* single value to join, use select */
                rc = selectjoin(r1p, r2p, l, r, &lci, &rci,
-                               nil_matches, t0, false, __func__);
+                               nil_matches, false, false, false,
+                               t0, false, __func__);
                goto doreturn;
        } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || 
(r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) {
                /* single value to join, use select */
                rc = selectjoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci,
-                               nil_matches, t0, true, __func__);
+                               nil_matches, false, false, false,
+                               t0, true, __func__);
                if (rc == GDK_SUCCEED && r2p == NULL)
                        BBPunfix(r2->batCacheid);
                goto doreturn;
diff --git a/sql/test/SQLancer/Tests/sqlancer22.test 
b/sql/test/SQLancer/Tests/sqlancer22.test
--- a/sql/test/SQLancer/Tests/sqlancer22.test
+++ b/sql/test/SQLancer/Tests/sqlancer22.test
@@ -101,23 +101,23 @@ INSERT INTO mct20 VALUES (INTERVAL '1' D
 statement error GDK reported error: BATsubcross: more than one match
 select 1 from mct20 where ((select true) union all (select true))
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 select 1 from mct20 inner join (select x.x from (values (true)) x(x)) as 
nort0(norc0)
 on true and greatest(((select nort0.norc0) union all (select true)), true)
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 select 1 from mct20 inner join (select x.x from (values (true)) x(x)) as 
nort0(norc0)
 on ((select nort0.norc0) union all (select true))
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 select 1 from mct20 inner join (select x.x from (values (true)) x(x)) as 
nort0(norc0)
 on true where ((select nort0.norc0) union all (select true))
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 select rank() over (order by ((select mct20.c0) union all (select interval '1' 
day))) from mct20
 ----
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 select sum(((select mct20.c0) union all (select interval '1' day))) over () 
from mct20
 ----
 
@@ -125,7 +125,7 @@ statement error GDK reported error: BATs
 select sum(((select interval '2' day) union all (select interval '1' day))) 
over () from mct20
 ----
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 select max(((select mct20.c0) union all (select interval '1' day))) over () 
from mct20
 ----
 
@@ -519,7 +519,7 @@ statement ok rowcount 4
 INSERT INTO rt3 VALUES (DATE '4153-10-28'),(DATE '2471-08-17'),(DATE 
'2022-02-08'),(DATE '1970-01-20')
 
 query T nosort
-SELECT (SELECT DATE '2011-10-01' FROM rt3 y RIGHT OUTER JOIN (VALUES (DATE 
'2010-10-01'), (z.c0)) AS x(x) ON 
+SELECT (SELECT DATE '2011-10-01' FROM rt3 y RIGHT OUTER JOIN (VALUES (DATE 
'2010-10-01'), (z.c0)) AS x(x) ON
 length('a') > 0 WHERE rt3.c0 = DATE '2012-10-01') FROM rt3, rt3 z
 ----
 NULL
diff --git a/sql/test/SQLancer/Tests/sqlancer23.test 
b/sql/test/SQLancer/Tests/sqlancer23.test
--- a/sql/test/SQLancer/Tests/sqlancer23.test
+++ b/sql/test/SQLancer/Tests/sqlancer23.test
@@ -168,7 +168,7 @@ SELECT 1 FROM t0 GROUP BY t0.c0 HAVING m
 ----
 
 # Postgres doesn't give an error here, but we are confident it must
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT 1 FROM t0 CROSS JOIN LATERAL (SELECT (VALUES (y.y), (y.y)) FROM (SELECT 
1) y(y) WHERE t0.c0 = 2) x(x)
 
 statement ok
@@ -195,10 +195,10 @@ SELECT 1 FROM (SELECT DISTINCT 3) vx(vx)
 statement error 21000!Cardinality violation, scalar value expected
 SELECT 1 FROM (SELECT DISTINCT 3) vx(vx) WHERE 1 <> (SELECT 2 UNION ALL SELECT 
vx.vx)
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT 1 FROM (SELECT 3) vx(vx) WHERE 1 <> (VALUES (2), (vx.vx))
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT 1 FROM (SELECT 3) vx(vx) WHERE 1 <> (SELECT 2 UNION ALL SELECT vx.vx)
 
 statement error 42000!SELECT: aggregate functions not allowed in JOIN 
conditions
diff --git a/sql/test/subquery/Tests/subquery3.test 
b/sql/test/subquery/Tests/subquery3.test
--- a/sql/test/subquery/Tests/subquery3.test
+++ b/sql/test/subquery/Tests/subquery3.test
@@ -555,7 +555,7 @@ SELECT
        (SELECT 1 FROM integers i2 INNER JOIN integers i3 on MAX(i3.i) = 
MIN(i2.i))
 FROM integers i1
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT
     (SELECT SUM(SUM(i1.i) + i2.i) FROM integers i2 GROUP BY i2.i)
 FROM integers i1
@@ -717,7 +717,7 @@ 4
 4
 4
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT
        (SELECT outt FROM evilfunction((SELECT MAX(t1.col1) FROM 
tbl_ProductSales)))
 FROM another_T t1
@@ -741,7 +741,7 @@ SELECT * FROM evilfunction(1)
 1
 1
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT
        (SELECT outt FROM evilfunction((SELECT MIN(col1))))
 FROM another_T
@@ -751,7 +751,7 @@ SELECT
        (SELECT outt FROM evilfunction((SELECT MAX(ColID) FROM 
tbl_ProductSales)))
 FROM another_T
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT
        (SELECT outt FROM evilfunction((SELECT MAX(t1.col1) FROM 
tbl_ProductSales)))
 FROM another_T t1
diff --git a/sql/test/subquery/Tests/subquery5.test 
b/sql/test/subquery/Tests/subquery5.test
--- a/sql/test/subquery/Tests/subquery5.test
+++ b/sql/test/subquery/Tests/subquery5.test
@@ -441,10 +441,10 @@ query I rowsort
 SELECT MAX(i1.i) FROM integers i1 HAVING (VALUES((AVG(i1.i))) EXCEPT 
VALUES(AVG(i1.i))) <> 0
 ----
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT (VALUES(SUM(i1.i)) UNION VALUES(AVG(i1.i))) FROM integers i1
 
-statement error GDK reported error: mergejoin: more than one match
+statement error GDK reported error: selectjoin: more than one match
 SELECT ((SELECT SUM(i1.i)) UNION ALL (SELECT AVG(i1.i))) FROM integers i1
 
 statement error 42000!SELECT: cannot use non GROUP BY column 'i1.i' in query 
results without an aggregate function
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to