Changeset: aa39c7c20bfc for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/aa39c7c20bfc
Modified Files:
        sql/backends/monet5/rel_bin.c
        sql/server/rel_optimize_proj.c
Branch: default
Log Message:

merged with mar2025


diffs (281 lines):

diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -834,7 +834,8 @@ COLcopy(BAT *b, int tt, bool writable, r
                bn->tnil = bi.nil;
                bn->tminpos = bi.minpos;
                bn->tmaxpos = bi.maxpos;
-               bn->tunique_est = bi.unique_est;
+               if (!bi.key)
+                       bn->tunique_est = bi.unique_est;
        } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
                   ATOMcompare(tt) == ATOMcompare(b->ttype)) {
                BUN h = bi.count;
@@ -861,7 +862,8 @@ COLcopy(BAT *b, int tt, bool writable, r
                }
                bn->tminpos = bi.minpos;
                bn->tmaxpos = bi.maxpos;
-               bn->tunique_est = bi.unique_est;
+               if (!bi.key)
+                       bn->tunique_est = bi.unique_est;
        } else {
                bn->tsorted = bn->trevsorted = false; /* set based on count 
later */
                bn->tnonil = bn->tnil = false;
@@ -873,6 +875,7 @@ COLcopy(BAT *b, int tt, bool writable, r
                bn->tsorted = ATOMlinear(b->ttype);
                bn->trevsorted = ATOMlinear(b->ttype);
                bn->tkey = true;
+               bn->tunique_est = (double) bn->batCount;
        }
        bat_iterator_end(&bi);
        if (!writable)
@@ -1665,8 +1668,12 @@ BUNinplacemulti(BAT *b, const oid *posit
        BUN nunique = b->thash ? b->thash->nunique : 0;
        MT_rwlock_wrunlock(&b->thashlock);
        MT_lock_set(&b->theaplock);
-       if (nunique != 0)
+       if (nunique != 0) {
                b->tunique_est = (double) nunique;
+               if (nunique == b->batCount && !b->tkey)
+                       BATkey(b, true);
+       } else if (b->tkey)
+               b->tunique_est = (double) b->batCount;
        b->tminpos = bi.minpos;
        b->tmaxpos = bi.maxpos;
        b->theap->dirty = true;
@@ -1954,8 +1961,10 @@ BATkey(BAT *b, bool flag)
        b->tkey = flag;
        if (!flag) {
                b->tseqbase = oid_nil;
-       } else
+       } else {
                b->tnokey[0] = b->tnokey[1] = 0;
+               b->tunique_est = (double) b->batCount;
+       }
        gdk_return rc = GDK_SUCCEED;
        if (flag && VIEWtparent(b)) {
                /* if a view is key, then so is the parent if the two
@@ -2021,6 +2030,7 @@ BATtseqbase(BAT *b, oid o)
                                b->trevsorted = b->batCount <= 1;
                                if (!b->trevsorted)
                                        b->tnorevsorted = 1;
+                               b->tunique_est = (double) b->batCount;
                        }
                }
        } else {
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1469,6 +1469,7 @@ BATmaskedcands(oid hseq, BUN nr, BAT *ma
                GDKfree(msks);
        }
        BATsetcount(bn, cnt);
+       bn->tunique_est = (double) cnt;
        TRC_DEBUG(ALGO, "hseq=" OIDFMT ", masked=" ALGOBATFMT ", selected=%s"
                  " -> " ALGOBATFMT "\n",
                  hseq, ALGOBATPAR(masked),
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3259,8 +3259,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
                locked = false;
                MT_rwlock_rdunlock(&r->thashlock);
        }
-       bat_iterator_end(&li);
-       bat_iterator_end(&ri);
 
        if (hash_cand) {
                HEAPfree(&hsh->heaplink, true);
@@ -3269,7 +3267,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
        }
        /* also set other bits of heap to correct value to indicate size */
        BATsetcount(r1, BATcount(r1));
-       r1->tunique_est = MIN(l->tunique_est, r->tunique_est);
+       r1->tunique_est = MIN(li.unique_est, ri.unique_est);
        if (BATcount(r1) <= 1) {
                r1->tsorted = true;
                r1->trevsorted = true;
@@ -3285,14 +3283,16 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
                        r2->tkey = true;
                        r2->tseqbase = 0;
                }
-               r2->tunique_est = MIN(l->tunique_est, r->tunique_est);
+               r2->tunique_est = MIN(li.unique_est, ri.unique_est);
        }
        if (r3) {
                r3->tnonil = !r3->tnil;
                BATsetcount(r3, BATcount(r3));
                assert(BATcount(r1) == BATcount(r3));
-               r3->tunique_est = MIN(l->tunique_est, r->tunique_est);
+               r3->tunique_est = MIN(li.unique_est, ri.unique_est);
        }
+       bat_iterator_end(&li);
+       bat_iterator_end(&ri);
        if (BATcount(r1) > 0) {
                if (BATtdense(r1))
                        r1->tseqbase = ((oid *) r1->theap->base)[0];
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -572,8 +572,8 @@ project_str(BATiter *restrict li, struct
        bn->tnonil = r1i->nonil & r2i->nonil;
        bn->tkey = false;
        bn->tunique_est =
-               MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b),
-                  r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b));
+               MIN(li->unique_est ? li->unique_est : BATcount(li->b),
+                   r1i->unique_est ? r1i->unique_est : BATcount(r1i->b));
        TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT
                  " -> " ALGOBATFMT "%s " LLFMT "us\n",
                  ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b),
@@ -824,8 +824,8 @@ BATproject2(BAT *restrict l, BAT *restri
        }
 
        bn->tunique_est =
-               MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b),
-                  r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b));
+               MIN(li.unique_est ? li.unique_est : BATcount(li.b),
+                   r1i.unique_est ? r1i.unique_est : BATcount(r1i.b));
        if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i)))
                BATtseqbase(bn, oid_nil);
 
diff --git a/sql/backends/monet5/UDF/udf/udf.c 
b/sql/backends/monet5/UDF/udf/udf.c
--- a/sql/backends/monet5/UDF/udf/udf.c
+++ b/sql/backends/monet5/UDF/udf/udf.c
@@ -366,7 +366,7 @@ UDFBATfuse_(BAT **ret, BAT *bone, BAT *b
                        bres->trevsorted = true;
                else
                        bres->trevsorted = (BATcount(bres) <= 1);
-               /* result tail is key (unique), iff both input tails are */
+               /* result tail is key (unique), if both input tails are */
                BATkey(bres, BATtkey(bone) || BATtkey(btwo));
 
                *ret = bres;
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1735,6 +1735,7 @@ exp_bin(backend *be, sql_exp *e, stmt *l
                if (attr && attr->h) {
                        node *en;
                        list *l = sa_list(sql->sa);
+                       stmt *next = NULL;
 
                        for (en = attr->h; en; en = en->next) {
                                sql_exp *at = en->data;
@@ -1756,7 +1757,7 @@ exp_bin(backend *be, sql_exp *e, stmt *l
                        if (need_distinct(e) && (grp || list_length(l) > 1)){
                                list *nl = sa_list(sql->sa);
                                stmt *ngrp = grp;
-                               stmt *next = ext;
+                               next = ext;
                                stmt *ncnt = cnt;
                                if (nl == NULL)
                                        return NULL;
@@ -1799,6 +1800,10 @@ exp_bin(backend *be, sql_exp *e, stmt *l
                                        for (node *n = obe->h; n; n = n->next) {
                                                sql_exp *oe = n->data;
                                                stmt *os = exp_bin(be, oe, 
left, right, NULL, NULL, NULL, sel, depth+1, 0, push);
+                                               if (!os)
+                                                       return NULL;
+                                               if (next)
+                                                       os = stmt_project(be, 
next, os);
                                                if (orderby)
                                                        orderby = 
stmt_reorder(be, os, is_ascending(oe), nulls_last(oe), orderby_ids, 
orderby_grp);
                                                else
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2431,7 +2431,6 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt
                nr_parts = *getArgReference_int(stk, pci, 5);
        }
        BAT *b = store->storage_api.bind_cands(tr, t, nr_parts, part_nr);
-       b->tunique_est = (double)BATcount(b);
        if (b) {
                *res = b->batCacheid;
                BBPkeepref(b);
diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -2814,6 +2814,8 @@ rel_groupby_distinct(visitor *v, sql_rel
                        if (need_distinct(e)) {
                                distinct = n->data;
                                nr++;
+                               if (e->r) /* distinct and order by */
+                                       nr++;
                        }
                        anr += is_aggr(e->type);
                }
diff --git a/sql/server/sql_parser.y b/sql/server/sql_parser.y
--- a/sql/server/sql_parser.y
+++ b/sql/server/sql_parser.y
@@ -594,6 +594,7 @@ int yydebug=1;
        opt_best_effort
        opt_brackets
        opt_chain
+       all_distinct
        opt_distinct
        opt_escape
        opt_grant_for
@@ -3424,6 +3425,12 @@ opt_distinct:
  |  DISTINCT           { $$ = TRUE; }
  ;
 
+all_distinct:
+    ALL                        { $$ = FALSE; }
+ |  DISTINCT           { $$ = TRUE; }
+ ;
+
+
 assignment_commalist:
     assignment         { $$ = append_symbol(L(), $1 ); }
  |  assignment_commalist ',' assignment
@@ -5106,20 +5113,20 @@ aggr_or_window_ref:
                  else
                        append_symbol(l, $6);
                  $$ = _symbol_create_list( SQL_NOP, l ); }
- |  qfunc '(' DISTINCT expr_list ')'
+ |  qfunc '(' all_distinct expr_list opt_order_by_clause ')' opt_within_group
                { dlist *l = L();
                  append_list(l, $1);
-                 append_int(l, TRUE);
+                 append_int(l, $3);
+                 if ($5 && $7) {
+                       yyerror(m, "Cannot have both order by clause and within 
group clause");
+                       YYABORT;
+                 }
                  append_list(l, $4);
-                 $$ = _symbol_create_list( SQL_NOP, l );
-               }
- |  qfunc '(' ALL expr_list ')'
-               { dlist *l = L();
-                 append_list(l, $1);
-                 append_int(l, FALSE);
-                 append_list(l, $4);
-                 $$ = _symbol_create_list( SQL_NOP, l );
-               }
+                 if ($5)
+                       append_symbol(l, $5);
+                 else
+                       append_symbol(l, $7);
+                 $$ = _symbol_create_list( SQL_NOP, l ); }
  |  XML_aggregate
  ;
 
diff --git a/sql/test/proto_loader/odbc/Tests/monetodbc.test 
b/sql/test/proto_loader/odbc/Tests/monetodbc.test
--- a/sql/test/proto_loader/odbc/Tests/monetodbc.test
+++ b/sql/test/proto_loader/odbc/Tests/monetodbc.test
@@ -26,3 +26,14 @@ select id from 'odbc:DSN=MonetDB-Test;QU
 2067
 2115
 
+-- test with unicode characters (see 
https://en.wikipedia.org/wiki/List_of_Unicode_characters) both as value and as 
result column name
+query T
+select 
'\u019CM\u0238\u1E9E\u02AC\u0376\u0398\u03A9\u03C0\u0488\u069E\u0BF5\u1402\u2021\u2030\u2042\u213C\u221C\u2329\u250C\u251C\u252C\u253C'
 as "M\u0238"
+----
+ƜMȸẞʬͶΘΩπ҈ڞ௵ᐂ‡‰⁂ℼ∜〈┌├┬┼
+
+query T
+select * from 'odbc:DSN=MonetDB-Test;QUERY=select 
''\u019CM\u0238\u1E9E\u02AC\u0376\u0398\u03A9\u03C0\u0488\u069E\u0BF5\u1402\u2021\u2030\u2042\u213C\u221C\u2329\u250C\u251C\u252C\u253C''
 as "M\u0238"'
+----
+ƜMȸẞʬͶΘΩπ҈ڞ௵ᐂ‡‰⁂ℼ∜〈┌├┬┼
+
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to