Changeset: 71ba9385dbfa for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=71ba9385dbfa
Modified Files:
monetdb5/modules/mal/array.mx
Branch: SciQL-2
Log Message:
ARRAYtiles*(): cleaned-up & extended sanity checks
expensive checks that require data scans are only done
when assertions or property checking are enabled
Our current implementation of tiled array aggregations is restricted to:
- all dimensions must be of the same type
- only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are
supported
- dimensions must be ascending, i.e., start <= stop && step > 0
- only step-size 1 is supported
- arrays must be stored "canonically", i.e., sorted (ascending) on first
dimension,
and each subsequent dimension sub-sorted (ascending) within each value of its
preceding dimension
diffs (214 lines):
diff --git a/monetdb5/modules/mal/array.mx b/monetdb5/modules/mal/array.mx
--- a/monetdb5/modules/mal/array.mx
+++ b/monetdb5/modules/mal/array.mx
@@ -558,6 +558,15 @@ ARRAYfiller(Client cntxt, MalBlkPtr mb,
* dim_1:BAT,tile_member_offset_dim_1:BAT,size_dim_1, ...,
* dim_n:BAT,tile_member_offset_dim_n:BAT,size_dim_n)
*/
+/*
+ * CAVEATs:
+ * - all dimensions must be of the same type
+ * - only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are
supported
+ * - dimensions must be ascending, i.e., start <= stop && step > 0
+ * - only step-size 1 is supported
+ * - array must be stored "canonically", i.e., sorted (ascending) on first
dimension,
+ * and each subsequent dimension sub-sorted (ascending) within each value of
its preceding dimension
+ */
str
ARRAYtiles_@4_@1_@8(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
@@ -577,6 +586,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
if ( (pci->argc - pci->retc - 1) % 3 != 0)
throw(MAL, "array.@4", "Unbalanced argument sets");
+ /* allocate local data structures */
ndims = (pci->argc - pci->retc - 1) / 3;
bDims = (BAT**) GDKzalloc(sizeof(BAT*) * ndims);
bDimsT = (@8**) GDKzalloc(sizeof(@8*) * ndims);
@@ -589,6 +599,8 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
AGGR_CLEANUP();
throw(MAL, "array.@4", MAL_MALLOC_FAIL);
}
+
+ /* handle & chack arguments */
if (!(bVal = BATdescriptor(*(bat*)getArgReference(stk,pci,1)))) {
AGGR_CLEANUP();
throw(MAL, "array.@4", RUNTIME_OBJECT_MISSING);
@@ -602,20 +614,21 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
throw(MAL, "array.@4", RUNTIME_OBJECT_MISSING);
}
dSize[i] = *(int*)getArgReference(stk,pci,2+i*3+2);
- if (dSize[i] == 0) {
+ if (dSize[i] <= 0) {
AGGR_CLEANUP();
- throw(MAL, "array.@4", "size dimension %d must not be
0", i);
+ throw(MAL, "array.@4", "size of dimension %d (%d) must
not be <= 0", i, (int) dSize[i]);
}
arrsze *= dSize[i];
}
- /* type check the shapes, prepare iterators, and compute the min/max of
the dimensions */
+
+ /* check sanity of value BAT */
if (!BAThdense(bVal)) {
AGGR_CLEANUP();
- throw(MAL, "array.@4", "head of value BAT must be dense");
+ throw(MAL, "array.@4", "head of value BAT is not dense");
}
if (bVal->ttype != TYPE_@1) {
AGGR_CLEANUP();
- throw(MAL, "array.@4", "tail of value BAT must be of type @1");
+ throw(MAL, "array.@4", "tail type of value BAT is not type @1");
}
arrcnt = BATcount(bVal);
if (arrcnt != arrsze) {
@@ -623,6 +636,10 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
throw(MAL, "array.@4", "count of value BAT ("BUNFMT") !=
product of dimension sizes ("BUNFMT")",
arrcnt, arrsze);
}
+ /* access tail as array */
+ bValT = (@1*) Tloc(bVal, BUNfirst(bVal));
+
+ /* check sanity of dimension & offset BATs properties */
arrbase = bVal->hseqbase;
offbase = bOffsets[0]->hseqbase;
offcnt = BATcount(bOffsets[0]);
@@ -643,69 +660,81 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
AGGR_CLEANUP();
throw(MAL, "array.@4", "head of offset BAT %d is not
aligned with head of offset BAT 0", i);
}
- if (bDims[i]->ttype != TYPE_@8 || bDims[i]->ttype !=
bOffsets[i]->ttype) {
+ if (bDims[i]->ttype != TYPE_@8) {
AGGR_CLEANUP();
throw(MAL, "array.@4", "tail type of dimension BAT %d
is not type @8", i);
}
- /* ! might require 2 full scans ! */
- BATmin(bDims[i], &(dMin[i]));
- BATmax(bDims[i], &(dMax[i]));
+ if (bOffsets[i]->ttype != TYPE_@8) {
+ AGGR_CLEANUP();
+ throw(MAL, "array.@4", "tail type of offset BAT %d is
not type @8", i);
+ }
+ }
+
+ /* check sanity of dimension BATs content */
+ for (i = 0; i < ndims; i++) {
+ /* access tails as arrays */
+ bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i]));
+ bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i]));
+
+ /* be optimistic */
+ dMin[i] = bDimsT[i][0];
+ dMax[i] = bDimsT[i][arrcnt-1];
+
+#ifdef NDEBUG
+ PROPDEBUG
+#endif
+ {
+ /* this might require several full scans; hence,
+ * only done when assertions or property checking
+ * are enabled */
+
+ BATmin(bDims[i], &(dMin[i]));
+ BATmax(bDims[i], &(dMax[i]));
+
+ for (p = 0, r = arrsze; p < arrcnt; p += arrsze, r +=
arrsze) {
+ BAT *slice = BATslice(bDims[i],p,r);
+
+ BATderiveHeadProps(BATmirror(slice),0);
+ if (!BATtordered(slice)) {
+ BBPunfix(slice->batCacheid);
+ AGGR_CLEANUP();
+ if (i == 0) {
+ throw(MAL, "array.@4", "values
of dimension %d are not sorted", i);
+ } else {
+ throw(MAL, "array.@4", "values
of dimension %d are not sorted"
+ " within value "BUNFMT"
of preceeding dimension", i, p / arrsze);
+ }
+ }
+ BBPunfix(slice->batCacheid);
+
+ if (bDimsT[i][p] != dMin[i]) {
+ AGGR_CLEANUP();
+ throw(MAL, "array.@4", "first value of
slice "BUNFMT" of dimension %d (%d) is not the minimum (%d)",
+ p / arrsze, i, (int)
bDimsT[i][p], (int) dMin[i]);
+ }
+ if (bDimsT[i][r-1] != dMax[i]) {
+ AGGR_CLEANUP();
+ throw(MAL, "array.@4", "last value of
slice "BUNFMT" of dimension %d (%d) is not the maximum (%d)",
+ p / arrsze, i, (int)
bDimsT[i][r-1], (int) dMax[i]);
+ }
+ }
+ }
+
+ if (dMin[i] > dMax[i]) {
+ AGGR_CLEANUP();
+ throw(MAL, "array.@4", "minimum value of dimension BAT
%d (%d) must not be larger than its maximum value (%d)",
+ i, (int) dMin[i], (int) dMax[i]);
+ }
if ((int) (dMax[i] - dMin[i] + 1) != dSize[i]) {
AGGR_CLEANUP();
throw(MAL, "array.@4", "range of dimension BAT %d (%d)
does not match its size (%d)",
i, (int) (dMax[i] - dMin[i] + 1), dSize[i]);
}
- /* might require (partial) scans; hence, only done when
- * assertions or property checking enabled */
-#ifdef NDEBUG
- PROPDEBUG
-#endif
- {
- BAT *slice;
- slice = BATslice(bDims[i],0,arrsze);
- BATderiveHeadProps(BATmirror(slice),0);
- if (!BATtordered(slice)) {
- BBPunfix(slice->batCacheid);
- AGGR_CLEANUP();
- throw(MAL, "array.@4", "values of dimension %d
are not sorted %s",
- i, i ? "within first value of
preceeding dimension" : "");
- }
- BBPunfix(slice->batCacheid);
- if (arrsze < arrcnt) {
- slice = BATslice(bDims[i],arrcnt-arrsze,arrcnt);
- BATderiveHeadProps(BATmirror(slice),0);
- if (!BATtordered(slice)) {
- BBPunfix(slice->batCacheid);
- AGGR_CLEANUP();
- throw(MAL, "array.@4", "values of
dimension %d are not sorted %s",
- i, i ? "within last value of
preceeding dimension" : "");
- }
- BBPunfix(slice->batCacheid);
- }
- }
-
arrsze /= dSize[i];
assert(arrsze);
}
- /* access tails as arrays */
- bValT = (@1*) Tloc(bVal, BUNfirst(bVal));
- for (i = 0; i < ndims; i++) {
- bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i]));
- bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i]));
- if (bDimsT[i][0] != dMin[i]) {
- AGGR_CLEANUP();
- throw(MAL, "array.@4", "first value of dimension %d
(%d) is not its minimum (%d)",
- i, (int) bDimsT[i][0], (int) dMin[i]);
- }
- if (bDimsT[i][arrcnt-1] != dMax[i]) {
- AGGR_CLEANUP();
- throw(MAL, "array.@4", "last value of dimension %d (%d)
is not its maximum (%d)",
- i, (int) bDimsT[i][arrcnt-1], (int) dMax[i]);
- }
- }
-
/* For each anchor piont, compute all cells belong to this tile
(bVal.head
* is the group nr.) and compute the SUM */
bRes = BATnew(TYPE_void, TYPE_@3, BATcount(bVal));
@@ -752,6 +781,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
bResT[p] = (cnt ? @7 : @3_nil);
nils |= !cnt;
}
+
AGGR_CLEANUP();
BATsetcount(bRes, arrcnt);
BATseqbase(bRes, arrbase);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list