;-)

Dank je wel! --- Niet alleen voor deze, maar met naam ook voor de hele
nieuwe group-by code en al die andere nieuwe schone code!!!

Stefan

On Tue, Aug 21, 2012 at 12:43:42PM +0200, Sjoerd Mullender wrote:
> Changeset: 6f001c03cd8d for MonetDB
> URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6f001c03cd8d
> Modified Files:
>       gdk/gdk_group.c
> Branch: default
> Log Message:
> 
> Implemented another trivial case for grouping: all equal values.
> 
> 
> diffs (122 lines):
> 
> diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
> --- a/gdk/gdk_group.c
> +++ b/gdk/gdk_group.c
> @@ -44,11 +44,14 @@
>   * is always created.  In other words, the groups argument may not be
>   * NULL, but the extents and histo arguments may be NULL.
>   *
> - * There are five different implementations of the grouping code.
> + * There are six different implementations of the grouping code.
>   *
>   * If it can be trivially determined that all groups are singletons,
>   * we can produce the outputs trivially.
>   *
> + * If all values in b are known to be equal (both sorted and reverse
> + * sorted), we produce a single group or copy the input group.
> + *
>   * If the input bats b and g are sorted, or if the subsorted flag is
>   * set (only used by BATsubsort), we only need to compare consecutive
>   * values.
> @@ -102,6 +105,7 @@ BATgroup_internal(BAT **groups, BAT **ex
>  
>       if (b->tkey || BATcount(b) <= 1 || (g && (g->tkey || BATtdense(g)))) {
>               /* grouping is trivial: 1 element per group */
> +             ALGODEBUG fprintf(stderr, "#BATgroup: trivial case: 1 element 
> per group\n");
>               if (BATcount(b) == 1 && b->htype == TYPE_oid)
>                       ngrp = * (oid *) Hloc(b, BUNfirst(b));
>               else
> @@ -132,6 +136,62 @@ BATgroup_internal(BAT **groups, BAT **ex
>               }
>               return GDK_SUCCEED;
>       }
> +     if (b->tsorted && b->trevsorted) {
> +             /* all values are equal */
> +             if (g == NULL) {
> +                     /* there's only a single group: 0 */
> +                     ALGODEBUG fprintf(stderr, "#BATgroup: trivial case: 
> single output group\n");
> +                     ngrp = 0;
> +                     gn = BATconstant(TYPE_oid, &ngrp, BATcount(b));
> +                     if (gn == NULL)
> +                             goto error;
> +                     BATseqbase(gn, b->hseqbase);
> +                     *groups = gn;
> +                     if (extents) {
> +                             ngrp = gn->hseqbase;
> +                             en = BATconstant(TYPE_void, &ngrp, 1);
> +                             if (en == NULL)
> +                                     goto error;
> +                             BATseqbase(BATmirror(en), ngrp);
> +                             *extents = en;
> +                     }
> +                     if (histo) {
> +                             wrd cnt = (wrd) BATcount(b);
> +
> +                             hn = BATconstant(TYPE_wrd, &cnt, 1);
> +                             if (hn == NULL)
> +                                     goto error;
> +                             *histo = hn;
> +                     }
> +                     return GDK_SUCCEED;
> +             }
> +             if ((extents == NULL) == (e == NULL) &&
> +                 (histo == NULL) == (h == NULL)) {
> +                     /* inherit given grouping; note that if
> +                      * extents/histo is to be returned, we need
> +                      * e/h available in order to copy them,
> +                      * otherwise we will need to calculate them
> +                      * which we will do using the "normal" case */
> +                     ALGODEBUG fprintf(stderr, "#BATgroup: trivial case: 
> copy input groups\n");
> +                     gn = BATcopy(g, g->htype, g->ttype, 0);
> +                     if (gn == NULL)
> +                             goto error;
> +                     *groups = gn;
> +                     if (extents) {
> +                             en = BATcopy(e, e->htype, e->ttype, 0);
> +                             if (en == NULL)
> +                                     goto error;
> +                             *extents = en;
> +                     }
> +                     if (histo) {
> +                             hn = BATcopy(h, h->htype, h->ttype, 0);
> +                             if (hn == NULL)
> +                                     goto error;
> +                             *histo = hn;
> +                     }
> +                     return GDK_SUCCEED;
> +             }
> +     }
>       assert(g == NULL || !BATtdense(g)); /* i.e. g->ttype == TYPE_oid */
>       bi = bat_iterator(b);
>       cmp = BATatoms[b->ttype].atomCmp;
> @@ -167,6 +227,7 @@ BATgroup_internal(BAT **groups, BAT **ex
>            (g == NULL || g->tsorted || g->trevsorted)) ||
>           subsorted) {
>               /* we only need to compare each entry with the previous */
> +             ALGODEBUG fprintf(stderr, "#BATgroup: compare consecutive 
> values\n");
>               if (grps)
>                       prev = *grps++;
>               pv = BUNtail(bi, BUNfirst(b));
> @@ -220,6 +281,7 @@ BATgroup_internal(BAT **groups, BAT **ex
>               /* for each value, we need to scan all previous equal
>                * values (a consecutive, possibly empty, range) to
>                * see if we can find one in the same old group */
> +             ALGODEBUG fprintf(stderr, "#BATgroup: subscan old groups\n");
>               pv = BUNtail(bi, BUNfirst(b));
>               ngrps[0] = ngrp;
>               if (extents)
> @@ -284,6 +346,7 @@ BATgroup_internal(BAT **groups, BAT **ex
>               }
>       } else if (b->T->hash) {
>               /* we already have a hash table on b */
> +             ALGODEBUG fprintf(stderr, "#BATgroup: use existing hash 
> table\n");
>               hs = b->T->hash;
>               for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) {
>                       v = BUNtail(bi, p);
> @@ -343,6 +406,7 @@ BATgroup_internal(BAT **groups, BAT **ex
>                * build an incomplete hash table on the fly--also see
>                * BATassertHeadProps and BATderiveHeadProps for
>                * similar code */
> +             ALGODEBUG fprintf(stderr, "#BATgroup: create partial hash 
> table\n");
>               nme = BBP_physical(b->batCacheid);
>               nmelen = strlen(nme);
>               if ((hp = GDKzalloc(sizeof(Heap))) == NULL ||
> _______________________________________________
> Checkin-list mailing list
> checkin-l...@monetdb.org
> http://mail.monetdb.org/mailman/listinfo/checkin-list
> 

-- 
| Stefan.Manegold @ CWI.nl | DB Architectures (INS1) |
| http://CWI.nl/~manegold/ | Science Park 123 (L321) |
| Tel.: +31 (0)20 592-4212 | 1098 XG Amsterdam  (NL) |

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Monetdb-developers mailing list
Monetdb-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/monetdb-developers

Reply via email to