Changeset: d6b63e4815ff for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d6b63e4815ff
Modified Files:
gdk/gdk_arrays.c
monetdb5/modules/kernel/arrays.c
Branch: arrays
Log Message:
point selections with anti that destroy the dimension property combined with
other point
and range selections
diffs (truncated from 504 to 300 lines):
diff --git a/gdk/gdk_arrays.c b/gdk/gdk_arrays.c
--- a/gdk/gdk_arrays.c
+++ b/gdk/gdk_arrays.c
@@ -9,6 +9,17 @@ gdk_dimension* createDimension_##TPE(int
void *maxVoid = GDKmalloc(sizeof(TPE)); \
void *stepVoid = GDKmalloc(sizeof(TPE)); \
\
+ if(!dim || !minVoid || !maxVoid || !stepVoid) { \
+ if(dim) \
+ GDKfree(dim); \
+ if(minVoid) \
+ GDKfree(minVoid); \
+ if(maxVoid) \
+ GDKfree(maxVoid); \
+ if(stepVoid) \
+ GDKfree(stepVoid); \
+ return NULL; \
+ } \
memcpy(minVoid, &min, sizeof(TPE)); \
memcpy(maxVoid, &max, sizeof(TPE)); \
memcpy(stepVoid, &step, sizeof(TPE)); \
diff --git a/monetdb5/modules/kernel/arrays.c b/monetdb5/modules/kernel/arrays.c
--- a/monetdb5/modules/kernel/arrays.c
+++ b/monetdb5/modules/kernel/arrays.c
@@ -7,10 +7,18 @@
static gdk_dimension* getDimension(gdk_cells *dims, int dimNum) {
dim_node *n;
- for(n=dims->h; n->data->dimNum<dimNum; n=n->next);
+ for(n=dims->h; n && n->data->dimNum<dimNum; n=n->next);
return n->data;
}
+static int jumpSize(gdk_array *array, int dimNum) {
+ int i=0;
+ BUN skip = 1;
+ for(i=0; i<dimNum; i++)
+ skip *= array->dimSizes[i];
+ return skip;
+}
+
/*takes a set of dimensions of any type and returns the correspondin
* dimensions in indices format */
static gdk_cells* sizesToDimensions(gdk_array *array) {
@@ -24,7 +32,7 @@ static gdk_cells* sizesToDimensions(gdk_
return resDims;
}
-static bool compatibleRanges(gdk_dimension *dim1, gdk_dimension *dim2) {
+static bool overlapingRanges(gdk_dimension *dim1, gdk_dimension *dim2) {
if(*(oid*)dim1->max < *(oid*)dim2->min || *(oid*)dim2->max <
*(oid*)dim1->min) {
//disjoint ranges. empty result
return 0;
@@ -32,25 +40,18 @@ static bool compatibleRanges(gdk_dimensi
return 1;
}
-static gdk_dimension* merge2CandidateDimensions(gdk_dimension *dim1,
gdk_dimension *dim2) {
+static gdk_dimension* mergeCandidateDimensions(gdk_dimension *dim1,
gdk_dimension *dim2) {
oid min, max, step;
- //check whether the merge creates a dimension or not
- //In case it does not return NULL and then a BAT should be created
-
/*the biggest of the mins and the smallest of the maximums */
min = *(oid*)dim1->min > *(oid*)dim2->min ? *(oid*)dim1->min :
*(oid*)dim2->min;
max = *(oid*)dim1->max < *(oid*)dim2->max ? *(oid*)dim1->max :
*(oid*)dim2->max;
step = *(oid*)dim1->step ; //step is always 1
//the dimensions that are merged should have the same order
- //they also have the same number of initial elements because the came
from the same dimension
+ //they also have the same number of initial elements because they came
from the same dimension
return createDimension_oid(dim1->dimNum, dim1->initialElementsNum, min,
max, step); }
-static gdk_dimension* mergeCandidateDimensions(gdk_dimension *dim1,
gdk_dimension *dim2) {
- return merge2CandidateDimensions(dim1, dim2);
-}
-
static BUN oidToIdx(oid oidVal, int dimNum, int currentDimNum, BUN skipCells,
gdk_array *dims) {
BUN oid = 0;
@@ -69,7 +70,7 @@ static BUN oidToIdx(oid oidVal, int dimN
return oid%skipCells;
}
-static BUN qualifyingOIDs(int dimNum, int skipSize, gdk_cells* oidDims, oid
**resOIDs ) {
+static BUN qualifyingOIDs(int dimNum, int skipSize, gdk_cells* oidDims, BAT*
oidsBAT, oid **resOIDs ) {
BUN sz = 0;
BUN j;
oid io;
@@ -82,25 +83,65 @@ static BUN qualifyingOIDs(int dimNum, in
oidsDim = n->data;
if(dimNum == oidDims->dimsNum-1) { //last dimension
- qOIDS = GDKmalloc(sizeof(oid)*oidsDim->elementsNum);
- for(io=*(oid*)oidsDim->min, sz=0; io<=*(oid*)oidsDim->max;
io+=*(oid*)oidsDim->step, sz++) {
- qOIDS[sz] = skipSize*io;
+ if(oidsDim->elementsNum > 0) {
+ qOIDS = GDKmalloc(sizeof(oid)*oidsDim->elementsNum);
+ for(io=*(oid*)oidsDim->min, sz=0;
io<=*(oid*)oidsDim->max; io+=*(oid*)oidsDim->step, sz++) {
+ qOIDS[sz] = skipSize*io;
fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]);
+ }
+ } else {
+ //the oids are in the BAT
+ //iterate over the elements in the BAT to find the
indices of the dimension that have survided
+ //and add and idx for each one of these elements
+ oid* candOIDs = (oid*)Tloc(oidsBAT, BUNfirst(oidsBAT));
+ int previousIdx = -1;
+ BUN i;
+ qOIDS =
GDKmalloc(sizeof(oid)*oidsDim->initialElementsNum);
+
+ for(i=0; i<BATcount(oidsBAT); i++) {
+ int idx = candOIDs[i]/skipSize;
+ if(idx > previousIdx) {
+ qOIDS[sz] = skipSize*idx;
+fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]);
+ sz++;
+ previousIdx = idx;
+ }
+ }
}
*resOIDs = qOIDS;
} else {
oid *resOIDs_local = NULL;
- BUN addedEls = qualifyingOIDs(dimNum+1,
skipSize*oidsDim->initialElementsNum, oidDims, &resOIDs_local);
+ BUN addedEls = qualifyingOIDs(dimNum+1,
skipSize*oidsDim->initialElementsNum, oidDims, oidsBAT, &resOIDs_local);
if(dimNum == 0)
qOIDS = *resOIDs;
- else
- qOIDS =
GDKmalloc(sizeof(oid)*oidsDim->elementsNum*addedEls);
+ else {
+ if(oidsDim->elementsNum > 0)
+ qOIDS =
GDKmalloc(sizeof(oid)*oidsDim->elementsNum*addedEls);
+ else
+ qOIDS =
GDKmalloc(sizeof(oid)*oidsDim->initialElementsNum*addedEls);
+ }
for(j=0, sz=0; j<addedEls; j++) {
fprintf(stderr, "-> %u = %u\n", (unsigned int)j, (unsigned
int)resOIDs_local[j]);
- for(io=*(oid*)oidsDim->min; io<=*(oid*)oidsDim->max;
io+=*(oid*)oidsDim->step, sz++) {
- qOIDS[sz] = resOIDs_local[j] + skipSize*io;
+ if(oidsDim->elementsNum > 0) {
+ for(io=*(oid*)oidsDim->min;
io<=*(oid*)oidsDim->max; io+=*(oid*)oidsDim->step, sz++) {
+ qOIDS[sz] = resOIDs_local[j] +
skipSize*io;
fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]);
+ }
+ } else {
+ //check the BAT
+ oid* candOIDs = (oid*)Tloc(oidsBAT,
BUNfirst(oidsBAT));
+ int previousIdx = -1;
+ BUN i;
+ for(i=0; i<BATcount(oidsBAT); i++) {
+ int idx =
(candOIDs[i]%(skipSize*oidsDim->initialElementsNum))/skipSize;
+ if(idx > previousIdx) {
+ qOIDS[sz] =
resOIDs_local[j]+skipSize*idx;
+fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]);
+ sz++;
+ previousIdx = idx;
+ }
+ }
}
}
*resOIDs = qOIDS;
@@ -196,20 +237,216 @@ static str emptyCandidateResults(ptr *ca
return MAL_SUCCEED;
}
-static bool updateCandidateResults(gdk_cells** dimensionsCandidates_out, BAT**
candidatesBAT_out,
+static BAT* joinBATs(BAT *candsBAT, BAT* dimBAT, gdk_array *array, int dimNum)
{
+ oid *candsOIDs, *dimOIDs, *mergedOIDs;
+ BAT* mergedBAT;
+
+ BUN i=0, j=0, k=0, minPos;
+ BUN minIdx =0 ;
+ BUN dimSkip = jumpSize(array, dimNum);
+
+ oid dimOIDs_min = 0;
+ bool set = 0;
+ BUN moduloRes;
+
+ candsOIDs = (oid*)Tloc(candsBAT, BUNfirst(candsBAT));
+ dimOIDs = (oid*)Tloc(dimBAT, BUNfirst(dimBAT));
+
+ //the oids in dimBAT have been computed assuming that 0 is allowed in
all other dimensions
+ //is this really true? I can verify that using the first oid in candsBAT
+ //if a dimension after filtering is expressed again as a dimension the
min might
+ //not be 0 but I do not care about it when it comes to the BAT. I will
resolve tha at the end
+ //when projecting the cells where dimensions and BAT will be combined
+ for(j=0; j< (unsigned long)dimNum; j++) {
+ BUN skipCells = 0;
+ //find the min oid of this dimension in the the first
qualifying oid
+ minIdx = oidToIdx(candsOIDs[0], j+1, 0, 1, array);
+ if(minIdx == 0)
+ continue;
+ //all oids in the dimOIDs should be updated to comply with the
min oid of the dimension
+ skipCells = jumpSize(array, j);
+ for(i=0; i<BATcount(dimBAT); i++)
+ dimOIDs[i] += skipCells*minIdx;
+ }
+
+ if(!(mergedBAT = BATnew(TYPE_void, TYPE_oid,
BATcount(candsBAT)+BATcount(dimBAT), TRANSIENT)))
+ return NULL;
+ mergedOIDs = (oid*)Tloc(mergedBAT, BUNfirst(mergedBAT));
+
+ moduloRes = dimOIDs[0]%dimSkip;
+ /* find the oids in cands that are there to reflect dimNum and keep
only those that dim and cand have in common */
+ for(i=0, j=0; i<BATcount(candsBAT) && j<BATcount(dimBAT);) {
+ /* oids in this dimension should be multiples of dimSkip */
+ if(candsOIDs[i]%dimSkip == moduloRes) {
+ if(candsOIDs[i] < dimOIDs[j]) //it exists in one but
not in the other
+ i++;
+ else if (candsOIDs[i] > dimOIDs[j])
+ j++;
+ else { //common
+ mergedOIDs[k] = candsOIDs[i];
+
+ if(!set) {
+ dimOIDs_min = candsOIDs[i];
+ minPos = k;
+ set = 1;
+ }
+
+ i++;
+ j++;
+ k++;
+ }
+ } else {
+ /*not related with the dimension. send it to the
output*/
+ mergedOIDs[k] = candsOIDs[i];
+ i++;
+ k++;
+ }
+ }
+
+ BATseqbase(mergedBAT, 0);
+ BATsetcount(mergedBAT, k);
+ BATderiveProps(mergedBAT, FALSE);
+
+ //adapt the candidates BAT to reflect the minimum value for the new
dimension
+ //only the ones that are not reflecting the y dimension should be
updates
+ minIdx = oidToIdx(dimOIDs_min, dimNum, 0, 1, array);
+ if(minIdx > 0) {
+ BUN skipCells = jumpSize(array, dimNum);
+ /*split it in 2 parts. Firts update all oids that are before
the minPos position
+ * all those oids will be increased and become greater than the
oid in minPos */
+ if(minPos > 0) {
+ for(i=minPos-1; i>0; i--) {
+ /*all will change because they are above the
minimum oid regarding the dimension*/
+ mergedOIDs[i+1] =
mergedOIDs[i]+skipCells*minIdx;
+ }
+ /* excluded from the loop because i>=0 always true
(infinite loop)*/
+ mergedOIDs[1] = mergedOIDs[0]+skipCells*minIdx;
+ }
+ mergedOIDs[0] = dimOIDs_min;
+ for(i=minPos+1; i<BATcount(mergedBAT) ; i++)
+ if(mergedOIDs[i]%dimSkip != moduloRes)
+ mergedOIDs[i] += skipCells*minIdx;
+ }
+
+ return mergedBAT;
+}
+
+static BAT* mergeBATs(BAT *candsBAT, BAT* dimBAT, gdk_array *array, int
dimNum) {
+ oid *candsOIDs, *dimOIDs, *mergedOIDs;
+ BAT* mergedBAT;
+
+ BUN i=0, j=0, k=0;
+ BUN minIdx =0 ;
+
+ candsOIDs = (oid*)Tloc(candsBAT, BUNfirst(candsBAT));
+ dimOIDs = (oid*)Tloc(dimBAT, BUNfirst(dimBAT));
+
+ //the oids in dimBAT have been computed assuming that 0 is allowed in
all other dimensions
+ //is this really true? I can verify that using the first oid in candsBAT
+ //if a dimension after filtering is expressed again as a dimension the
min might
+ //not be 0 but I do not care about it when it comes to the BAT. I will
resolve tha at the end
+ //when projecting the cells where dimensions and BAT will be combined
+ for(j=0; j< (unsigned long)dimNum; j++) {
+ BUN skipCells = 0;
+ //find the min oid of this dimension in the the first
qualifying oid
+ minIdx = oidToIdx(candsOIDs[0], j+1, 0, 1, array);
+ if(minIdx == 0)
+ continue;
+ //all oids in the dimOIDs should be updated to comply with the
min oid of the dimension
+ skipCells = jumpSize(array, j+1);
+ for(i=0; i<BATcount(dimBAT); i++)
+ dimOIDs[i] += skipCells*minIdx;
+ }
+ //adapt the candidates BAT to reflect the minimum value for the new
dimension
+ minIdx = oidToIdx(dimOIDs[0], dimNum, 0, 1, array);
+ if(minIdx > 0) {
+ BUN skipCells = jumpSize(array, dimNum);
+ for(i=0; i<BATcount(candsBAT); i++)
+ candsOIDs[i] += skipCells*minIdx;
+ }
+
+ //finaly merge the two BATs
+ if(!(mergedBAT = BATnew(TYPE_void, TYPE_oid,
BATcount(candsBAT)+BATcount(dimBAT), TRANSIENT)))
+ return NULL;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list