Changeset: f0b559953113 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f0b559953113
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        clients/Tests/exports.stable.out
        configure.ag
        gdk/ChangeLog.Jun2020
        gdk/gdk.h
        gdk/gdk_firstn.c
        gdk/gdk_join.c
        monetdb5/ChangeLog.Jun2020
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/algebra.h
        monetdb5/modules/kernel/algebra.mal
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_statement.c
        sql/server/rel_distribute.c
        sql/server/rel_exp.c
        sql/server/rel_optimizer.c
        sql/server/rel_partition.c
        sql/server/rel_propagate.c
        sql/server/rel_rel.c
        sql/server/sql_env.c
        sql/server/sql_env.h
        sql/server/sql_partition.c
        sql/storage/bat/bat_logger.c
        sql/test/BugTracker-2009/Tests/copy_multiple_files.SF-2902320.stable.out
        
sql/test/BugTracker-2009/Tests/copy_multiple_files.SF-2902320.stable.out.Windows
        sql/test/Dependencies/Tests/Dependencies.stable.out
        sql/test/Dependencies/Tests/Dependencies.stable.out.int128
        sql/test/Tests/truncate-statements-extra.stable.out
        sql/test/emptydb/Tests/check.stable.out
        sql/test/emptydb/Tests/check.stable.out.32bit
        sql/test/emptydb/Tests/check.stable.out.int128
        sql/test/miscellaneous/Tests/simple_selects.sql
        sql/test/miscellaneous/Tests/simple_selects.stable.err
Branch: default
Log Message:

Merged with Jun2020


diffs (truncated from 1940 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -591,7 +591,7 @@ stdout of test 'MAL-signatures` in direc
 [ "algebra",   "ilikejoin",    "command algebra.ilikejoin(l:bat[:str], 
r:bat[:str], esc:str, sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ",      "ILIKEjoin;",   "Join the 
string bat L with the pattern bat R\nwith optional candidate lists SL and SR 
using pattern escape string ESC\nand doing a case insensitive match.\nThe 
result is two aligned bats with oids of matching rows."        ]
 [ "algebra",   "ilikeselect",  "command algebra.ilikeselect(b:bat[:str], 
cand:bat[:oid], pat:str, esc:str, anti:bit):bat[:oid] ",      
"PCRElikeselect1;",     ""      ]
 [ "algebra",   "ilikeselect",  "command algebra.ilikeselect(b:bat[:str], 
cand:bat[:oid], pat:str, anti:bit):bat[:oid] ",       "PCRElikeselect4;",     
""      ]
-[ "algebra",   "intersect",    "command algebra.intersect(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng):bat[:oid] ",        "ALGintersect;",        "Intersection of l 
and r with candidate lists (i.e. half of semi-join)" ]
+[ "algebra",   "intersect",    "command algebra.intersect(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, max_one:bit, 
estimate:lng):bat[:oid] ",   "ALGintersect;",        "Intersection of l and r 
with candidate lists (i.e. half of semi-join)" ]
 [ "algebra",   "join", "command algebra.join(l:bat[:any_1], r:bat[:any_1], 
sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng):bat[:oid] ",     
"ALGjoin1;",    "Join; only produce left output"        ]
 [ "algebra",   "join", "command algebra.join(l:bat[:any_1], r:bat[:any_1], 
sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) (X_0:bat[:oid], 
X_1:bat[:oid]) ",        "ALGjoin;",     "Join"  ]
 [ "algebra",   "leftjoin",     "command algebra.leftjoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng):bat[:oid] ", "ALGleftjoin1;",        "Left join with candidate 
lists; only produce left output"      ]
@@ -620,7 +620,7 @@ stdout of test 'MAL-signatures` in direc
 [ "algebra",   "select",       "command algebra.select(b:bat[:any_1], 
s:bat[:oid], low:any_1, high:any_1, li:bit, hi:bit, anti:bit):bat[:oid] ",      
 "ALGselect2;",  "Select all head values of the first input BAT for which the 
tail value\n\tis in range and for which the head value occurs in the tail of 
the\n\tsecond input BAT.\n\tThe first input is a dense-headed BAT, the second 
input is a\n\tdense-headed BAT with sorted tail, output is a dense-headed 
BAT\n\twith in the tail the head value of the input BAT for which the\n\ttail 
value is between the values low and high (inclusive if li\n\trespectively hi is 
set).  The output BAT is sorted on the tail\n\tvalue.  If low or high is nil, 
the boundary is not considered\n\t(effectively - and + infinity).  If anti is 
set, the result is the\n\tcomplement.  Nil values in the tail are never 
matched, unless\n\tlow=nil, high=nil, li=1, hi=1, anti=0.  All non-nil values 
are\n\treturned if low=nil, high=nil, and li, hi are not both 1, or 
anti=1.\n\tNote that th
 e output is suitable as second input for this\n\tfunction."    ]
 [ "algebra",   "select",       "command algebra.select(b:bat[:any_1], 
s:bat[:oid], low:any_1, high:any_1, li:bit, hi:bit, anti:bit, 
unknown:bit):bat[:oid] ",  "ALGselect2nil;",       "With unknown set, each nil 
!= nil"     ]
 [ "algebra",   "selectNotNil", "command 
algebra.selectNotNil(b:bat[:any_2]):bat[:any_2] ",     "ALGselectNotNil;",     
"Select all not-nil values"     ]
-[ "algebra",   "semijoin",     "command algebra.semijoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) 
(X_0:bat[:oid], X_1:bat[:oid]) ",    "ALGsemijoin;", "Semi join with candidate 
lists"        ]
+[ "algebra",   "semijoin",     "command algebra.semijoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, max_one:bit, 
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ",       "ALGsemijoin;", "Semi 
join with candidate lists"        ]
 [ "algebra",   "slice",        "command algebra.slice(b:bat[:any_1], x:lng, 
y:lng):bat[:any_1] ",      "ALGslice;",    "Return the slice with the BUNs at 
position x till y."  ]
 [ "algebra",   "slice",        "command algebra.slice(b:bat[:any_1], x:int, 
y:int):bat[:any_1] ",      "ALGslice_int;",        "Return the slice with the 
BUNs at position x till y."  ]
 [ "algebra",   "slice",        "command algebra.slice(b:bat[:any_1], x:lng, 
y:lng):bat[:any_1] ",      "ALGslice_lng;",        "Return the slice with the 
BUNs at position x till y."  ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -700,7 +700,7 @@ stdout of test 'MAL-signatures` in direc
 [ "algebra",   "ilikejoin",    "command algebra.ilikejoin(l:bat[:str], 
r:bat[:str], esc:str, sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ",      "ILIKEjoin;",   "Join the 
string bat L with the pattern bat R\nwith optional candidate lists SL and SR 
using pattern escape string ESC\nand doing a case insensitive match.\nThe 
result is two aligned bats with oids of matching rows."        ]
 [ "algebra",   "ilikeselect",  "command algebra.ilikeselect(b:bat[:str], 
cand:bat[:oid], pat:str, esc:str, anti:bit):bat[:oid] ",      
"PCRElikeselect1;",     ""      ]
 [ "algebra",   "ilikeselect",  "command algebra.ilikeselect(b:bat[:str], 
cand:bat[:oid], pat:str, anti:bit):bat[:oid] ",       "PCRElikeselect4;",     
""      ]
-[ "algebra",   "intersect",    "command algebra.intersect(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng):bat[:oid] ",        "ALGintersect;",        "Intersection of l 
and r with candidate lists (i.e. half of semi-join)" ]
+[ "algebra",   "intersect",    "command algebra.intersect(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, max_one:bit, 
estimate:lng):bat[:oid] ",   "ALGintersect;",        "Intersection of l and r 
with candidate lists (i.e. half of semi-join)" ]
 [ "algebra",   "join", "command algebra.join(l:bat[:any_1], r:bat[:any_1], 
sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng):bat[:oid] ",     
"ALGjoin1;",    "Join; only produce left output"        ]
 [ "algebra",   "join", "command algebra.join(l:bat[:any_1], r:bat[:any_1], 
sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) (X_0:bat[:oid], 
X_1:bat[:oid]) ",        "ALGjoin;",     "Join"  ]
 [ "algebra",   "leftjoin",     "command algebra.leftjoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng):bat[:oid] ", "ALGleftjoin1;",        "Left join with candidate 
lists; only produce left output"      ]
@@ -729,7 +729,7 @@ stdout of test 'MAL-signatures` in direc
 [ "algebra",   "select",       "command algebra.select(b:bat[:any_1], 
s:bat[:oid], low:any_1, high:any_1, li:bit, hi:bit, anti:bit):bat[:oid] ",      
 "ALGselect2;",  "Select all head values of the first input BAT for which the 
tail value\n\tis in range and for which the head value occurs in the tail of 
the\n\tsecond input BAT.\n\tThe first input is a dense-headed BAT, the second 
input is a\n\tdense-headed BAT with sorted tail, output is a dense-headed 
BAT\n\twith in the tail the head value of the input BAT for which the\n\ttail 
value is between the values low and high (inclusive if li\n\trespectively hi is 
set).  The output BAT is sorted on the tail\n\tvalue.  If low or high is nil, 
the boundary is not considered\n\t(effectively - and + infinity).  If anti is 
set, the result is the\n\tcomplement.  Nil values in the tail are never 
matched, unless\n\tlow=nil, high=nil, li=1, hi=1, anti=0.  All non-nil values 
are\n\treturned if low=nil, high=nil, and li, hi are not both 1, or 
anti=1.\n\tNote that th
 e output is suitable as second input for this\n\tfunction."    ]
 [ "algebra",   "select",       "command algebra.select(b:bat[:any_1], 
s:bat[:oid], low:any_1, high:any_1, li:bit, hi:bit, anti:bit, 
unknown:bit):bat[:oid] ",  "ALGselect2nil;",       "With unknown set, each nil 
!= nil"     ]
 [ "algebra",   "selectNotNil", "command 
algebra.selectNotNil(b:bat[:any_2]):bat[:any_2] ",     "ALGselectNotNil;",     
"Select all not-nil values"     ]
-[ "algebra",   "semijoin",     "command algebra.semijoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) 
(X_0:bat[:oid], X_1:bat[:oid]) ",    "ALGsemijoin;", "Semi join with candidate 
lists"        ]
+[ "algebra",   "semijoin",     "command algebra.semijoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, max_one:bit, 
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ",       "ALGsemijoin;", "Semi 
join with candidate lists"        ]
 [ "algebra",   "slice",        "command algebra.slice(b:bat[:any_1], x:lng, 
y:lng):bat[:any_1] ",      "ALGslice;",    "Return the slice with the BUNs at 
position x till y."  ]
 [ "algebra",   "slice",        "command algebra.slice(b:bat[:any_1], x:int, 
y:int):bat[:any_1] ",      "ALGslice_int;",        "Return the slice with the 
BUNs at position x till y."  ]
 [ "algebra",   "slice",        "command algebra.slice(b:bat[:any_1], x:lng, 
y:lng):bat[:any_1] ",      "ALGslice_lng;",        "Return the slice with the 
BUNs at position x till y."  ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -148,7 +148,7 @@ BUN BATgrows(BAT *b);
 gdk_return BAThash(BAT *b);
 void BAThseqbase(BAT *b, oid o);
 gdk_return BATimprints(BAT *b);
-BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN 
estimate);
+BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool 
max_one, BUN estimate);
 BAT *BATintersectcand(BAT *a, BAT *b);
 gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
 gdk_return BATkey(BAT *b, bool onoff);
@@ -179,7 +179,7 @@ BAT *BATsample(BAT *b, BUN n);
 BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed);
 gdk_return BATsave(BAT *b) __attribute__((__warn_unused_result__));
 BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool 
hi, bool anti);
-gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
+gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, bool max_one, BUN estimate) 
__attribute__((__warn_unused_result__));
 gdk_return BATsetaccess(BAT *b, restrict_t mode);
 void BATsetcapacity(BAT *b, BUN cnt);
 void BATsetcount(BAT *b, BUN cnt);
@@ -810,7 +810,7 @@ str ALGfetchoid(ptr ret, const bat *bid,
 str ALGfind(oid *ret, const bat *bid, ptr val);
 str ALGfirstn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
 str ALGgroupby(bat *res, const bat *gids, const bat *cnts);
-str ALGintersect(bat *r1, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid, const bit *nil_matches, const lng *estimate);
+str ALGintersect(bat *r1, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid, const bit *nil_matches, const bit *max_one, const lng 
*estimate);
 str ALGjoin(bat *r1, bat *r2, const bat *l, const bat *r, const bat *sl, const 
bat *sr, const bit *nil_matches, const lng *estimate);
 str ALGjoin1(bat *r1, const bat *l, const bat *r, const bat *sl, const bat 
*sr, const bit *nil_matches, const lng *estimate);
 str ALGleftjoin(bat *r1, bat *r2, const bat *l, const bat *r, const bat *sl, 
const bat *sr, const bit *nil_matches, const lng *estimate);
@@ -831,7 +831,7 @@ str ALGselect1nil(bat *result, const bat
 str ALGselect2(bat *result, const bat *bid, const bat *sid, const void *low, 
const void *high, const bit *li, const bit *hi, const bit *anti);
 str ALGselect2nil(bat *result, const bat *bid, const bat *sid, const void 
*low, const void *high, const bit *li, const bit *hi, const bit *anti, const 
bit *unknown);
 str ALGselectNotNil(bat *result, const bat *bid);
-str ALGsemijoin(bat *r1, bat *r2, const bat *l, const bat *r, const bat *sl, 
const bat *sr, const bit *nil_matches, const lng *estimate);
+str ALGsemijoin(bat *r1, bat *r2, const bat *l, const bat *r, const bat *sl, 
const bat *sr, const bit *nil_matches, const bit *max_one, const lng *estimate);
 str ALGslice(bat *ret, const bat *bid, const lng *start, const lng *end);
 str ALGslice_int(bat *ret, const bat *bid, const int *start, const int *end);
 str ALGslice_lng(bat *ret, const bat *bid, const lng *start, const lng *end);
diff --git a/configure.ag b/configure.ag
--- a/configure.ag
+++ b/configure.ag
@@ -1561,19 +1561,19 @@ AS_VAR_IF([have_zlib], [yes], [
        AC_DEFINE([HAVE_LIBZ], 1, [Define if you have the z library])])
 AM_CONDITIONAL([HAVE_LIBZ], [test x"$have_zlib" != xno])
 
-org_have_lzma=yes
-have_lzma=$org_have_lzma
+org_have_liblzma=yes
+have_liblzma=$org_have_liblzma
 AC_ARG_WITH([lzma],
        [AS_HELP_STRING([--with-lzma],
                [include lzma support (default=yes)])],
-       [have_lzma=$withval])
-AS_VAR_IF([have_lzma], [no], [], [
-       PKG_CHECK_MODULES([liblzma], [liblzma], [have_lzma=yes], [have_lzma=no; 
why_not_liblzma="(liblzma not found, install xz-libs and xz-devel)"])
-       AS_VAR_IF([have_lzma], [yes], [
+       [have_liblzma=$withval])
+AS_VAR_IF([have_liblzma], [no], [], [
+       PKG_CHECK_MODULES([liblzma], [liblzma], [have_liblzma=yes], 
[have_liblzma=no; why_not_liblzma="(liblzma not found, install xz-libs and 
xz-devel)"])
+       AS_VAR_IF([have_liblzma], [yes], [
                AC_SUBST([PKG_LIBLZMA], [liblzma])])
-       AS_VAR_IF([have_lzma], [yes], [
+       AS_VAR_IF([have_liblzma], [yes], [
                AC_DEFINE([HAVE_LIBLZMA], 1, [Define if you have the lzma 
library])])])
-AM_CONDITIONAL([HAVE_LIBLZMA], [test x"$have_lzma" != xno])
+AM_CONDITIONAL([HAVE_LIBLZMA], [test x"$have_liblzma" != xno])
 
 org_have_valgrind=no
 have_valgrind=$org_have_valgrind
diff --git a/gdk/ChangeLog.Jun2020 b/gdk/ChangeLog.Jun2020
--- a/gdk/ChangeLog.Jun2020
+++ b/gdk/ChangeLog.Jun2020
@@ -1,6 +1,11 @@
 # ChangeLog file for GDK
 # This file is updated with Maddlog
 
+* Tue Apr 28 2020 Sjoerd Mullender <sjo...@acm.org>
+- The functions BATintersect and BATsemijoin have an extra argument,
+  bool max_one, which indicates that there must be no more than one
+  match in the join.
+
 * Mon Apr 20 2020 Sjoerd Mullender <sjo...@acm.org>
 - The "unique" property on BATs was removed.  The property indicated
   that all values in a BAT *had* to be distinct, but this was not
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2056,9 +2056,9 @@ gdk_export gdk_return BATouterjoin(BAT *
        __attribute__((__warn_unused_result__));
 gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int op, bool nil_matches, BUN estimate)
        __attribute__((__warn_unused_result__));
-gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool nil_matches, BUN estimate)
+gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool nil_matches, bool max_one, BUN estimate)
        __attribute__((__warn_unused_result__));
-gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool 
nil_matches, BUN estimate);
+gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool 
nil_matches, bool max_one, BUN estimate);
 gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, 
bool not_in, BUN estimate);
 gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, bool nil_matches, BUN estimate)
        __attribute__((__warn_unused_result__));
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -1048,7 +1048,7 @@ BATfirstn_grouped(BAT **topn, BAT **gids
 
                        bn1 = bn;
                        BBPunfix(s->batCacheid);
-                       bn = BATintersect(b, b, su, bn1, true, BUN_NONE);
+                       bn = BATintersect(b, b, su, bn1, true, false, BUN_NONE);
                        BBPunfix(bn1->batCacheid);
                        if (bn == NULL)
                                return GDK_FAIL;
@@ -1125,7 +1125,7 @@ BATfirstn_grouped_with_groups(BAT **topn
                        BBPunfix(bn2->batCacheid);
                        return GDK_FAIL;
                }
-               bn4 = BATintersect(s, bn2, NULL, NULL, false, BUN_NONE);
+               bn4 = BATintersect(s, bn2, NULL, NULL, false, false, BUN_NONE);
                BBPunfix(bn2->batCacheid);
                if (bn4 == NULL) {
                        BBPunfix(bn1->batCacheid);
@@ -1207,7 +1207,7 @@ BATfirstn_grouped_with_groups(BAT **topn
        if (gids) {
                BAT *bn1, *bn2, *bn3, *bn4, *bn5, *bn6, *bn7, *bn8;
 
-               if ((bn1 = BATintersect(s, bn, NULL, NULL, false, BUN_NONE)) == 
NULL) {
+               if ((bn1 = BATintersect(s, bn, NULL, NULL, false, false, 
BUN_NONE)) == NULL) {
                        BBPunfix(bn->batCacheid);
                        return  GDK_FAIL;
                }
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -1644,13 +1644,17 @@ mergejoin_cand(BAT **r1p, BAT **r2p, BAT
  * there is a match of l in r, no matter how many matches there are in
  * r; otherwise all matches are returned.
  *
+ * If max_one is set, only a single match is allowed.  This is like
+ * semi, but enforces the single match.
+ *
  * t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
  */
 static gdk_return
 mergejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
          struct canditer *restrict lci, struct canditer *restrict rci,
          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
-         bool not_in, BUN estimate, lng t0, bool swapped, const char *reason)
+         bool not_in, bool max_one, BUN estimate, lng t0, bool swapped,
+         const char *reason)
 {
        /* [lr]scan determine how far we look ahead in l/r in order to
         * decide whether we want to do a binary search or a scan */
@@ -1681,7 +1685,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
 
        if (lci->tpe == cand_dense && lci->ncand == BATcount(l) &&
            rci->tpe == cand_dense && rci->ncand == BATcount(r) &&
-           !nil_on_miss && !semi && !only_misses && !not_in &&
+           !nil_on_miss && !semi && !max_one && !only_misses && !not_in &&
            l->tsorted && r->tsorted) {
                /* special cases with far fewer options */
                if (r->ttype == TYPE_void && r->tvheap)
@@ -1756,8 +1760,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
        }
 
        BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand,
-                                     l->tkey, r->tkey, semi, nil_on_miss,
-                                     only_misses, estimate);
+                                     l->tkey, r->tkey, semi | max_one,
+                                     nil_on_miss, only_misses, estimate);
        if (maxsize == BUN_NONE)
                return GDK_FAIL;
        BAT *r1 = *r1p;
@@ -2159,6 +2163,9 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
                                r2->tseqbase = oid_nil;
                                r2->tkey = false;
                        }
+               } else if (nr > 1 && max_one) {
+                       GDKerror("more than one match");
+                       goto bailout;
                } else if (only_misses) {
                        /* we had a match, so we're not interested */
                        lskipped = BATcount(r1) > 0;
@@ -2394,7 +2401,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
                                                break;                  \
                                        }                               \
                                        HASHLOOPBODY();                 \
-                                       if (semi)                       \
+                                       if (semi && !max_one)           \
                                                break;                  \
                                }                                       \
                        } else if (rci->tpe != cand_dense) {            \
@@ -2409,7 +2416,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
                                                        break;          \
                                                }                       \
                                                HASHLOOPBODY();         \
-                                               if (semi)               \
+                                               if (semi && !max_one)   \
                                                        break;          \
                                        }                               \
                                }                                       \
@@ -2425,7 +2432,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
                                                }                       \
                                                ro = (oid) (rb - roff + rseq); \
                                                HASHLOOPBODY();         \
-                                               if (semi)               \
+                                               if (semi && !max_one)   \
                                                        break;          \
                                        }                               \
                                }                                       \
@@ -2434,7 +2441,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
                                if (only_misses) {                      \
                                        nr = 1;                         \
                                        if (maybeextend(r1, r2, 1, lci->next, 
lci->ncand, maxsize) != GDK_SUCCEED) \
-                                               goto bailout;                   
\
+                                               goto bailout;           \
                                        APPEND(r1, lo);                 \
                                        if (lskipped)                   \
                                                r1->tseqbase = oid_nil; \
@@ -2444,12 +2451,15 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
                                        r2->tnonil = false;             \
                                        r2->tkey = false;               \
                                        if (maybeextend(r1, r2, 1, lci->next, 
lci->ncand, maxsize) != GDK_SUCCEED) \
-                                               goto bailout;                   
\
+                                               goto bailout;           \
                                        APPEND(r1, lo);                 \
                                        APPEND(r2, oid_nil);            \
                                } else {                                \
                                        lskipped = BATcount(r1) > 0;    \
                                }                                       \
+                       } else if (nr > 1 && max_one) {                 \
+                               GDKerror("more than one match");        \
+                               goto bailout;                           \
                        } else if (only_misses) {                       \
                                lskipped = BATcount(r1) > 0;            \
                        } else {                                        \
@@ -2476,7 +2486,7 @@ static gdk_return
 hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
         struct canditer *restrict lci, struct canditer *restrict rci,
         bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
-        bool not_in,
+        bool not_in, bool max_one,
         BUN estimate, lng t0, bool swapped, bool hash, bool phash,
         const char *reason)
 {
@@ -2522,8 +2532,8 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                               nil_on_miss, only_misses, __func__, t0);
 
        BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand,
-                                     l->tkey, r->tkey, semi, nil_on_miss,
-                                     only_misses, estimate);
+                                     l->tkey, r->tkey, semi | max_one,
+                                     nil_on_miss, only_misses, estimate);
        if (maxsize == BUN_NONE)
                return GDK_FAIL;
 
@@ -2661,7 +2671,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                                                break;
                                        }
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to