Changeset: 4edd9147773b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4edd9147773b
Modified Files:
        monetdb5/modules/mal/groupby.c
        monetdb5/modules/mal/groupby.h
        monetdb5/modules/mal/groupby.mal
Branch: default
Log Message:

Second round of groupby API
RFC on this interface set for the both improved mergetable
and distributed processing.


diffs (176 lines):

diff --git a/monetdb5/modules/mal/groupby.c b/monetdb5/modules/mal/groupby.c
--- a/monetdb5/modules/mal/groupby.c
+++ b/monetdb5/modules/mal/groupby.c
@@ -19,17 +19,15 @@
 
 /*
  * (c) Martin Kersten
- * Group-by support
+ * Multicolumn group-by support
  * The group-by support module is meant to replace and speedup the kernel 
grouping routines.
  * The latter was originally designed in a memory constraint setting and an 
exercise in
  * performing column-wise grouping incrementally. The effect is that these 
routines are
  * now a major performance hindrances.
  *
- * This module again takes the columnar approach to grouping, but supports for 
more
- * parallelism in achieving these goals.
- *
- * The target is to support SQL-like group_by operations, which are lists of
- * attributes (reduced by a pivot list) followed by a group aggregate function.
+ * The target is to support SQL-like multicolumngroup_by operations, which are 
lists of
+ * attributes and a group aggregate function.
+ * Each group can be represented with an oid into the n-ary table.
  * Consider the query "select count(*), max(A) from R group by A, B,C." whose 
code
  * snippet in MAL would become something like:
  * @verbatim
@@ -39,15 +37,18 @@
  * ...
  * _9 := algebra.select(_1,0,100);
  * ..
- * grp:bat[:oid,:oid] := groupby.id(_9, _1, _2, _3);
- * grp_4:bat[:oid,:wrd] := groupby.count(_9, _1, _2, _3);
- * grp_5:bat[:oid,:lng] := groupby.max(_9,_2, _3, _1);
+ * (grp_4:bat[:oid,:wrd], gid:bat[:oid,:oid]) := groupby.count(_9,  _1,_2 _3);
+ * (grp_5:bat[:oid,:lng], gid:bat[:oid,:oid]) := groupby.max(_9,_2, _1,_2,_3);
  * @end verbatim
  *
+ * All instructions have a candidate oid list.
  * The id() function merely becomes the old-fashioned oid-based group 
identification list.
  * This way related values can be obtained from the attribute columns. It can 
be the input
  * for the count() function, which saves some re-computation.
  *
+ * Aside the group ids, we also provide options to return the value based 
aggregate table
+ * to ease development of parallel plans.
+ *
  * The implementation is optimized for a limited number of groups. The default 
is
  * to fall back on the old code sequences.
  *
@@ -163,6 +164,46 @@ GROUPid(Client cntxt, MalBlkPtr mb, MalS
 }
 
 str
+GROUPcountTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+       (void) stk;
+       (void) pci;
+       return MAL_SUCCEED;
+}
+
+str
+GROUPmaxTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+       (void) stk;
+       (void) pci;
+       return MAL_SUCCEED;
+}
+
+str
+GROUPminTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+       (void) stk;
+       (void) pci;
+       return MAL_SUCCEED;
+}
+
+str
+GROUPavgTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+       (void) stk;
+       (void) pci;
+       return MAL_SUCCEED;
+}
+
+str
 GROUPcount(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
        int *ret = (int*) getArgReference(stk,pci,0);
diff --git a/monetdb5/modules/mal/groupby.h b/monetdb5/modules/mal/groupby.h
--- a/monetdb5/modules/mal/groupby.h
+++ b/monetdb5/modules/mal/groupby.h
@@ -35,11 +35,16 @@
 #define group_by_export extern
 #endif
 
+group_by_export str GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr 
stk, InstrPtr pci);
+
 group_by_export str GROUPid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 group_by_export str GROUPcount(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+group_by_export str GROUPcountTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 group_by_export str GROUPmax(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+group_by_export str GROUPmaxTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 group_by_export str GROUPmin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+group_by_export str GROUPminTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 group_by_export str GROUPavg(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
-group_by_export str GROUPmulticolumn(Client cntxt, MalBlkPtr mb, MalStkPtr 
stk, InstrPtr pci);
+group_by_export str GROUPavgTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 
 #endif /* _GROUPBY_H */
diff --git a/monetdb5/modules/mal/groupby.mal b/monetdb5/modules/mal/groupby.mal
--- a/monetdb5/modules/mal/groupby.mal
+++ b/monetdb5/modules/mal/groupby.mal
@@ -17,27 +17,46 @@
 
 module group;
 
-pattern group.subgroup(b:bat[:oid,:any]...)(ref:bat[:oid,:oid], 
grp:bat[:oid,:any],ext:bat[:oid,:any])
+pattern group.subgroup(b:bat[:oid,:any]...)(ref:bat[:oid,:oid], 
grp:bat[:oid,:any], ext:bat[:oid,:any])
 address GROUPmulticolumn
-comment "Old-fashioned derivation of a group index over multiple columns.";
+comment "Old-fashioned derivation of a group index over multiple columns. A 
first oid- argument is interpreted as a candidate list";
+
+pattern group.id(cand:bat[:oid,:oid],b:bat[:oid,:any]...) :bat[:oid,:oid]
+address GROUPid
+comment "Derive a group id for all n-tuples. A first oid-argument is 
interpreted as a candidate list or NULL";
 
 pattern group.id(b:bat[:oid,:any]...) :bat[:oid,:oid]
 address GROUPid
-comment "Derive the grouping of all tuples in the BATs.";
+comment "Derive a group id for all n-tuples.";
 
-pattern group.count(b:bat[:oid,:any]...) (o:bat[:oid,:oid],cnt:bat[:oid,:wrd])
+pattern group.count(cand:bat[:oid,:oid],b:bat[:oid,:any]...) 
(cnt:bat[:oid,:wrd], b:bat[:oid,:any]...)
+address GROUPcountTable
+comment "Derive a group table for all n-tuples and their count. The first 
argument is a candidate list";
+
+pattern group.count(cand:bat[:oid,:oid],b:bat[:oid,:any]...) 
(cnt:bat[:oid,:wrd], gid:bat[:oid,:oid])
 address GROUPcount
-comment "Derive the grouping of all tuples in the BATs and derive their count 
them.";
+comment "Derive a group id for all n-tuples and their count. The first 
argument is a candidate list";
 
-pattern group.max(target:bat[:oid,:any_1],b:bat[:oid,:any]...) 
(o:bat[:oid,:oid],:bat[:oid,:any_1])
+pattern 
group.max(cand:bat[:oid,:oid],target:bat[:oid,:any_1],b:bat[:oid,:any]...) 
(mx:bat[:oid,:any_1], o:bat[:oid,:any]...)
+address GROUPmaxTable
+comment "Derive the group table of all n-tuples and determine their maximum 
value. The first argument is a candidate list";
+
+pattern 
group.max(cand:bat[:oid,:oid],target:bat[:oid,:any_1],b:bat[:oid,:any]...) 
(mx:bat[:oid,:any_1], gid:bat[:oid,:oid])
 address GROUPmax
-comment "Derive the grouping of all tuples in the BATs and determine their 
maximum value.";
+comment "Derive the group id of all n-tuples and determine their maximum 
value. The first argument is a candidate list";
 
-pattern group.min(target:bat[:oid,:any_1],b:bat[:oid,:any]...) 
(o:bat[:oid,:oid],mi:bat[:oid,:any_1])
+pattern 
group.min(cand:bat[:oid,:oid],target:bat[:oid,:any_1],b:bat[:oid,:any]...) 
(mi:bat[:oid,:any_1], o:bat[:oid,:any]...)
+address GROUPminTable
+comment "Derive the grop id of all n-tuples and determine their minimum value. 
The first argument is a candidate list";
+
+pattern group.min(cand:bat[:oid,:oid], 
target:bat[:oid,:any_1],b:bat[:oid,:any]...) (mi:bat[:oid,:any_1], 
gid:bat[:oid,:oid])
 address GROUPmin
-comment "Derive the grouping of all tuples in the BATs and their determine 
minimum value.";
+comment "Derive the group id of all n-tuples and determin their minimum value. 
The first argument is a candidate list";
 
-pattern group.avg(pivot:bat[:oid,:any_1],b:bat[:oid,:any_2]...) 
(o:bat[:oid,:oid],a:bat[:oid,:any_2])
+pattern group.avg(cand:bat[:oid,:oid], 
target:bat[:oid,:any_1],b:bat[:oid,:any_2]...) (a:bat[:oid,:dbl])
 address GROUPavg
-comment "Derive the grouping of all tuples in the BATs and determine their 
average value.";
+comment "Derive the group id of all n-tuples and determine their average 
value. The first argument is a candidate list";
 
+pattern group.avg(cand:bat[:oid,:oid], 
target:bat[:oid,:any_1],b:bat[:oid,:any_2]...) (a:bat[:oid,:dbl], 
o:bat[:oid,:any]...)
+address GROUPavgTable
+comment "Derive the group id of all n-tuples and determine their average 
value. The first argument is a candidate list";
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to