Changeset: f7df3c9722e1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f7df3c9722e1
Modified Files:
gdk/gdk_arrays.c
monetdb5/modules/kernel/arrays.c
monetdb5/modules/kernel/arrays.h
monetdb5/modules/kernel/arrays.mal
Branch: arrays
Log Message:
group by with one group per cell
diffs (275 lines):
diff --git a/gdk/gdk_arrays.c b/gdk/gdk_arrays.c
--- a/gdk/gdk_arrays.c
+++ b/gdk/gdk_arrays.c
@@ -320,7 +320,7 @@ BAT *projectCells(gdk_array* dimCands, g
return NULL;
resOIDs = (oid*)Tloc(resBAT, BUNfirst(resBAT));
- /* the oids if all but the first dimension are 0 */
+ /* the oids in all but the first dimension are 0 */
dim = dimCands->dims[0];
if(dim->idxs) {
for(i=0; i<dim->elsNum ; i++, j++)
diff --git a/monetdb5/modules/kernel/arrays.c b/monetdb5/modules/kernel/arrays.c
--- a/monetdb5/modules/kernel/arrays.c
+++ b/monetdb5/modules/kernel/arrays.c
@@ -1213,17 +1213,22 @@ str ALGsubrangejoin1(ptr *dimsResL, ptr
return ALGsubrangejoin2(dimsResL, dimsResR, dimL, dimsL, dimR1, dimR2,
dimsR, NULL, NULL, li, hi, estimate);
}
-
-str ARRgroup(ptr *groupsRes, ptr *arrayRes, const ptr *dim, const ptr *dims,
const int *l, const int *h) {
-
+str ARRgroup2(ptr *groupsRes, ptr *arrayRes, const ptr *groupsCands, const ptr
*dim, const ptr *dims, const int *l, const int *h) {
gdk_analytic_dimension *dimension = (gdk_analytic_dimension*)*dim;
gdk_array *array = (gdk_array*)*dims;
+ gdk_dimension_group *dimGrp;
- gdk_array_groups *groups = array2groups(array);
+ gdk_array_groups *groups = NULL;
+ if(groupsCands)
+ groups = (gdk_array_groups*)*groupsCands;
+ else
+ groups = array2groups(array);
/*update the limits for the current dimension */
- groups->groups[dimension->dimNum]->min = *l;
- groups->groups[dimension->dimNum]->max = *h;
+ dimGrp = groups->groups[dimension->dimNum];
+ dimGrp->min = *l;
+ dimGrp->max = *h;
+ dimGrp->elsNum = (dimGrp->max - dimGrp->min + 1)/dimGrp->step;
*groupsRes = groups;
*arrayRes = arrayCopy(array);
@@ -1231,20 +1236,179 @@ str ARRgroup(ptr *groupsRes, ptr *arrayR
return MAL_SUCCEED;
}
-str ARRprojectGroups(bat *res, const ptr *groups_in, const ptr *dim, const ptr
*dims) {
- (void)*res;
- (void)*groups_in;
- (void)*dim;
- (void)*dims;
+str ARRgroup1(ptr *groupsRes, ptr *arrayRes, const ptr *dim, const ptr *dims,
const int *l, const int *h) {
+ return ARRgroup2(groupsRes, arrayRes, NULL, dim, dims, l, h);
+}
+
+str ARRprojectGroups(bat *groupOidsRes, ptr *groupsRes, const ptr *groups_in,
const ptr *dims) {
+ gdk_array_groups *groups = (gdk_array_groups*)*groups_in;
+ gdk_array *array = (gdk_array*)*dims;
+
+ BAT *resBAT = NULL;
+ oid *els = NULL;
+
+
+ gdk_dimension *dim = NULL;
+ gdk_dimension_group *dimGrp = NULL;
+
+ int j;
+ oid jumpSize = 1, repeatElement=1, repeatGroup=1, repeatGroupElement=1;
+ oid idx =0, grp=0, rE, rG;
+ unsigned int k;
+
+ /* compute the number of elements in each group */
+ unsigned short i;
+ oid groupSize = 1;
+ oid cellsNum = 1;
+ for(i=0; i<groups->dimsNum; i++)
+ groupSize *= groups->groups[i]->elsNum;
+ for(i=0; i<array->dimsNum; i++)
+ cellsNum *= array->dims[i]->elsNum;
+ repeatGroup = cellsNum;
+
+ if((resBAT = BATnew(TYPE_void, TYPE_oid, groupSize*cellsNum,
TRANSIENT)) == NULL)
+ return createException(MAL, "arrays.projectGroups", "Problem creating
BAT");
+ els = (oid*) Tloc(resBAT, BUNfirst(resBAT));
+
+ /* find the oids that belong to the same group (with the minimum oid
being 0) */
+ /* initialise the oids considering only the first dimension */
+ dim = array->dims[0];
+ dimGrp = groups->groups[0];
+ repeatGroup/=array->dims[0]->elsNum;
+
+ for(rG = 0; rG <repeatGroup; rG++) {
+ for(k=dim->min; k<=dim->max; k+=dim->step) { /*for each value
of x*/
+ for(grp=0; grp<groupSize; grp+=dimGrp->elsNum) { /*the
group of the cell*/
+ for(j=dimGrp->min; j<=dimGrp->max;
j+=dimGrp->step, idx++) {
+ /* if it is out of the limits of the
array either on the left
+ * or the right then add nill */
+ if(((j<0) & ((unsigned int)-j>k)) ||
((j>0) & (k+j>dim->max)))
+ els[idx] = oid_nil;
+ else
+ els[idx] = k+j;
+ }
+ }
+ }
+ }
+
+ /* consider the rest of the dimensions */
+ for(i=1; i<groups->dimsNum; i++) {
+ /*the repeatElement is increased according to
+ * the number of elements in the dimension just processed */
+ repeatElement*=dim->elsNum;
+ repeatGroupElement*=dimGrp->elsNum;
+ jumpSize*=dim->elsNum;
+
+ dim = array->dims[i];
+ dimGrp = groups->groups[i];
+
+ /*the repeatGroup is increased according to
+ * the number of elements in the grouped dimension to be
processed */
+ repeatGroup /= dimGrp->elsNum;
+
+ idx=0;
+ for(rG = 0; rG <repeatGroup; rG++) {
+ for(k=dim->min; k<=dim->max; k+=dim->step) { /*for each
value of x*/
+ for(grp=0; grp<repeatElement*groupSize;
grp+=repeatGroupElement*dimGrp->elsNum) { /*the group of the cell*/
+ for(j=dimGrp->min; j<=dimGrp->max;
j+=dimGrp->step) {
+ /* if it is out of the limits
of the array either on the left
+ * or the right then add nill */
+ if(((j<0) & ((unsigned
int)-j>k)) || ((j>0) & (k+j>dim->max)))
+ for(rE=0;
rE<repeatGroupElement; rE++, idx++)
+ els[idx] =
oid_nil;
+ else
+ for(rE=0;
rE<repeatGroupElement; rE++, idx++)
+ if(els[idx] !=
oid_nil)
+
els[idx] += jumpSize*(k+j);
+ }
+ }
+ }
+ }
+ }
+
+ BATsetcount(resBAT, groupSize*cellsNum);
+ resBAT->tsorted = 1;
+ resBAT->trevsorted = resBAT->batCount <= 1;
+ resBAT->tkey = 1;
+/* b->tdense = (b->batCount <= 1 || b->batCount == b->batCount);
+ if (b->batCount == 1 || b->batCount == b->batCount)
+ b->tseqbase = b->hseqbase;
+*/
+ resBAT->tseqbase = 0;
+ resBAT->hsorted = 1;
+ resBAT->hdense = 1;
+ resBAT->hseqbase = 0;
+ resBAT->hkey = 1;
+ resBAT->hrevsorted = resBAT->batCount <= 1;
+
+ BBPkeepref(*groupOidsRes = resBAT->batCacheid);
+ *groupsRes = groups;
+
+ GDKfree(array);
return MAL_SUCCEED;
}
-str ARRsubsum(bat *res, const bat *vals, const ptr* groups_in, const ptr
*array_in) {
- (void)*res;
- (void)*vals;
- (void)*groups_in;
- (void)*array_in;
+str ARRsubsum(bat *res, const bat *vals, const bat* groupsOids, const ptr*
groupsRanges) {
+
+ BAT *valsBAT, *groupsBAT, *resBAT;
+ oid valsNum, groupSize;
+ int *values; /* TODO: Should change this to consider all types */
+ oid *grpVals;
+ hge *resVals; /*TODO: Change this to consider all possible cases */
+
+ oid i, j;
+ unsigned short k;
+
+ gdk_array_groups *groups = (gdk_array_groups*)*groupsRanges;
+ groupSize = 1;
+ for(k=0; k<groups->dimsNum; k++)
+ groupSize *= groups->groups[k]->elsNum;
+
+ if ((valsBAT = BATdescriptor(*vals)) == NULL) {
+ throw(MAL, "aggr.subsum", RUNTIME_OBJECT_MISSING);
+ }
+
+ if ((groupsBAT = BATdescriptor(*groupsOids)) == NULL) {
+ BBPunfix(valsBAT->batCacheid);
+ throw(MAL, "aggr.subsum", RUNTIME_OBJECT_MISSING);
+ }
+
+ valsNum = BATcount(valsBAT);
+
+ values = (int*)Tloc(valsBAT, BUNfirst(valsBAT));
+ grpVals = (oid*)Tloc(groupsBAT, BUNfirst(groupsBAT));
+
+ if((resBAT = BATnew(TYPE_void, TYPE_hge, valsNum, TRANSIENT)) == NULL)
+ return createException(MAL, "aggr.subsum", "Problem creating BAT");
+ resVals = (hge*) Tloc(resBAT, BUNfirst(resBAT));
+
+ for(i=0; i<valsNum; i++) { /* for each cell */
+ resVals[i] = 0;
+ for(j=0; j<groupSize; j++) { /* consider all cells in its group
*/
+ if(grpVals[j+i*groupSize] != oid_nil) { /* if the oid
is valid (i.e. inside the array)*/
+ resVals[i] += values[grpVals[j+i*groupSize]];
+ }
+ }
+ }
+
+ BATsetcount(resBAT, valsNum);
+ resBAT->tsorted = 0;
+ resBAT->trevsorted = resBAT->batCount <= 1;
+ resBAT->tkey = 0;
+ resBAT->tseqbase = 0;
+ resBAT->hsorted = 1;
+ resBAT->hdense = 1;
+ resBAT->hseqbase = 0;
+ resBAT->hkey = 1;
+ resBAT->hrevsorted = resBAT->batCount <= 1;
+
+ BBPkeepref(*res = resBAT->batCacheid);
+
+ BBPunfix(valsBAT->batCacheid);
+ BBPunfix(groupsBAT->batCacheid);
+
+ GDKfree(groups);
return MAL_SUCCEED;
}
diff --git a/monetdb5/modules/kernel/arrays.h b/monetdb5/modules/kernel/arrays.h
--- a/monetdb5/modules/kernel/arrays.h
+++ b/monetdb5/modules/kernel/arrays.h
@@ -71,9 +71,10 @@ algebra_export str ALGouterjoin(bat *res
algebra_export str ALGarrayCount(wrd *res, const ptr *array);
//algebra_export str ALGproject(bat *result, const ptr* candDims, const bat*
candBAT);
-algebra_export str ARRsubsum(bat *res, const bat *vals, const ptr* groups,
const ptr *array);
+algebra_export str ARRsubsum(bat *res, const bat *vals, const bat* diffs,
const ptr* groups);
-algebra_export str ARRgroup(ptr *groupsRes, ptr *arrayRes, const ptr *dim,
const ptr *dims, const int *l, const int *h);
-algebra_export str ARRprojectGroups(bat *res, const ptr *groups, const ptr
*dim, const ptr *dims);
+algebra_export str ARRgroup1(ptr *groupsRes, ptr *arrayRes, const ptr *dim,
const ptr *dims, const int *l, const int *h);
+algebra_export str ARRgroup2(ptr *groupsRes, ptr *arrayRes, const ptr
*groupsCands, const ptr *dim, const ptr *dims, const int *l, const int *h);
+algebra_export str ARRprojectGroups(bat *resOids, ptr* groupsRes, const ptr
*groups, const ptr *dims);
#endif /* _ARRAYS_H */
diff --git a/monetdb5/modules/kernel/arrays.mal
b/monetdb5/modules/kernel/arrays.mal
--- a/monetdb5/modules/kernel/arrays.mal
+++ b/monetdb5/modules/kernel/arrays.mal
@@ -124,14 +124,17 @@ address ALGouterjoin;
command aggr.count(array:ptr) :wrd
address ALGarrayCount;
-command aggr.subsum(X_7:bat[:oid,:any],X_8:ptr,Y_8:ptr) :bat[:oid,:hge]
+command aggr.subsum(vals:bat[:oid,:any],oids:bat[:oid,:oid], groups:ptr)
:bat[:oid,:hge]
address ARRsubsum;
module group;
command group.subgroup(dim:ptr, array:ptr, l:int, h:int) (:ptr,:ptr)
-address ARRgroup;
+address ARRgroup1;
-command group.projectGroups(groups:ptr, dim:ptr, dims:ptr ) :bat[:oid,:any]
+command group.subgroup(groupsCand:ptr, dim:ptr, array:ptr, l:int, h:int)
(:ptr,:ptr)
+address ARRgroup2;
+
+command group.projectGroups(groups:ptr, array:ptr) (:bat[:oid,:oid], :ptr)
address ARRprojectGroups;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list