Changeset: b275a8466c95 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b275a8466c95
Modified Files:
geom/monetdb5/grid.c
Branch: grid
Log Message:
Added a function that performs a selection using the grid
diffs (truncated from 578 to 300 lines):
diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c
--- a/geom/monetdb5/grid.c
+++ b/geom/monetdb5/grid.c
@@ -13,30 +13,387 @@
#include "grid.h"
-str
-GRIDdistance(bit * res, lng * x1, lng * y1, lng * x2, lng * y2, int * distance)
+#define GRID_VERSION 1
+#define POINTSPERCELL 100000
+
+#define BITSNUM 64
+#define SHIFT 6 /* division with 64 */
+#define ONES ((1<<(SHIFT))-1) /* 63 */
+#define get(bitVector, bitPos) (((bitVector) >> (bitPos)) & 0x01)
+#define set(bitVector, bitPos, value) ((bitVector) |= ((value)<<(bitPos)))
+#define setbv(bitVector, bitPos, value) set((bitVector)[(bitPos) >> SHIFT],
(bitPos) & ONES, (value))
+#define unset(bitVector, bitPos) ((bitVector) &= ~((uint64_t)1<<(bitPos)))
+#define common(bitVector, bitPos, value) ((bitVector) &= (0xFFFFFFFFFFFFFFFF ^
(((1-value))<<(bitPos))))
+
+#define maximumNumberOfCells(max, bitsNum, add) \
+do { \
+ int i = 0; \
+ size_t res = 0; \
+ for(i = 0; i < bitsNum; i++) \
+ res = (res << 1) | 1; \
+ max = res + add; \
+} while (0)
+
+typedef struct grid {
+ lng xmin; /* grid universe: minimum X value */
+ lng ymin; /* grid universe: minimum Y value */
+ lng xmax; /* grid universe: maximum X value */
+ lng ymax; /* grid universe: maximum Y value */
+ bte shift;
+ size_t cellsNum; /* number of cells */
+ size_t cellsPerAxis; /* number of cells per axis */
+ bat xbat; /* bat id for X coordinates
*/
+ bat ybat; /* bat id for Y coordinates
*/
+ size_t * dir; /* the grid directory */
+ oid * oids; /* heap where the index is stored */
+} grid;
+
+static size_t
+countSetBits(uint64_t *resBitvector, size_t vectorSize)
{
- (void)res;
- (void)x1;
- (void)y1;
- (void)x2;
- (void)y2;
- (void)distance;
+ size_t num = 0 ;
+ for(size_t k = 0; k < vectorSize; k++) {
+ uint64_t b = resBitvector[k];
+ for(uint64_t j = 0; j < BITSNUM; j++) {
+ num += b & 0x01;
+ b >>= 1;
+ }
+ }
- return MAL_SUCCEED;
+ return num;
+}
+
+static grid *
+grid_create(BAT *bx, BAT *by)
+{
+ grid * g;
+ lng *xVals, *yVals;
+ size_t i, cnt;
+ dbl fxa, fxb, fya, fyb;
+
+ assert(BATcount(bx) == BATcount(by));
+ assert(BATcount(bx) > 0);
+
+ if ((g = GDKmalloc(sizeof(grid))) == NULL)
+ return g;
+
+ g->xbat = bx->batCacheid;
+ g->ybat = by->batCacheid;
+ xVals = (lng*)Tloc(bx, BUNfirst(bx));
+ yVals = (lng*)Tloc(by, BUNfirst(by));
+
+ /* determine the appropriate number of cells */
+ g->shift = 2;
+ maximumNumberOfCells(g->cellsNum, g->shift*2, 1);
+ maximumNumberOfCells(g->cellsPerAxis, g->shift, 0);
+
+ cnt = BATcount(bx);
+ while(cnt/g->cellsNum > POINTSPERCELL) {
+ /* use one more bit per axis */
+ g->shift++;
+ maximumNumberOfCells(g->cellsNum, g->shift*2, 1);
+ maximumNumberOfCells(g->cellsPerAxis, g->shift, 0);
+ }
+
+ /* find min and max values for X and y coordinates */
+ g->xmin = g->xmax = xVals[0];
+ for (i = 1; i < cnt; i++) {
+ lng val = xVals[i];
+ if(g->xmin > val)
+ g->xmin = val;
+ if(g->xmax < val)
+ g->xmax = val;
+ }
+ g->ymin = g->ymax = yVals[0];
+ for (i = 1; i < cnt; i++) {
+ lng val = yVals[i];
+ if(g->ymin > val)
+ g->ymin = val;
+ if(g->ymax < val)
+ g->ymax = val;
+ }
+
+ /* allocate space for the directory */
+ if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(size_t))) == NULL) {
+ GDKfree(g);
+ g = NULL;
+ return g;
+ }
+ for (i = 0; i < g->cellsNum; i++)
+ g->dir[i] = 0;
+
+ /* allocate space for the index */
+ if((g->oids = GDKmalloc(BATcount(bx)*sizeof(oid))) == NULL) {
+ GDKfree(g);
+ g = NULL;
+ return g;
+ }
+
+ /* compute the index */
+ /* step 1: compute the histogram of cell frequencies */
+ fxa = ((double)g->cellsPerAxis/(double)(g->xmax-g->xmin));
+ fxb = (double)g->xmin*fxa;
+ fya = ((double)g->cellsPerAxis/(double)(g->ymax-g->ymin));
+ fyb = (double)g->ymin*fya;
+
+ cnt = BATcount(bx);
+ for (i = 0; i < cnt; i++) {
+ oid cellx = (double)xVals[i]*fxa - fxb;
+ oid celly = (double)yVals[i]*fya - fyb;
+ oid cell = ((cellx << g->shift) | celly);
+ g->dir[cell+1]++;
+ }
+
+ /* step 2: compute the directory pointers */
+ for (i = 1; i < g->cellsNum; i++)
+ g->dir[i] += g->dir[i-1];
+
+ /* step 3: fill in the oid array */
+ for (size_t i = 0; i < cnt; i++) {
+ oid cellx = (double)xVals[i]*fxa - fxb;
+ oid celly = (double)yVals[i]*fya - fyb;
+ oid cell = ((cellx << g->shift) | celly);
+// assert(cell < g->cellsNum);
+// assert(position < offsetVals[g->cellsNum]);
+ g->oids[g->dir[cell]++] = i;
+ }
+
+ /* step 4: adjust the directory pointers */
+ for (size_t i = g->cellsNum; i > 0; i--)
+ g->dir[i+1] = g->dir[i];
+ g->dir[0] = 0;
+
+ /* TODO: move here the code for compressing the index */
+
+ return g;
}
str
-GRIDdistancesubselect(bat * res, bat * x1, bat * y1, bat * cand1, lng * x2,
lng * y2, int * distance, bit * anti)
+GRIDdistance(bit * res, lng * x1, lng * y1, lng * x2, lng * y2, int * d)
{
- (void)res;
- (void)x1;
- (void)y1;
- (void)cand1;
- (void)x2;
- (void)y2;
- (void)distance;
- (void)anti;
+ str r = MAL_SUCCEED;
+
+ if ((res = GDKmalloc(sizeof(bit))) == NULL)
+ r = createException(MAL, "grid.distance", MAL_MALLOC_FAIL);
+ else {
+ *res = ((*x2-*x1)*(*x2-*x1)+(*y2-*y1)*(*y2-*y1))<(*d)*(*d);
+ }
+
+ return r;
+}
+
+str
+GRIDdistancesubselect(bat * res, bat * x1, bat * y1, bat * cand1, lng * x2,
lng * y2, int * d, bit * anti)
+{
+ size_t minCellx, minCelly, maxCellx, maxCelly, cellx, celly;
+ size_t *borderCells, *internalCells;
+ size_t borderCellsNum, internalCellsNum, totalCellsNum;
+ size_t i, j, bvsize, num;
+ uint64_t * bv, * cbv;
+ double fxa, fxb, fya, fyb;
+ BAT *x1BAT = NULL, *y1BAT = NULL, *cBAT = NULL;
+ lng * x1Vals = NULL, * y1Vals = NULL;
+ oid * resVals = NULL;
+ grid * g = NULL;
+ mbr mbb = (mbr) {.xmin = *x2 - *d, .ymin = *y2 - *d, .xmax = *x2 + *d,
.ymax = *y2 + *d};
+ BAT *r;
+ BUN p, q;
+ BATiter pi;
+ BUN resNum = 0;
+ assert (*d > 0);
+
+ /* get the X and Y BATs*/
+ if((x1BAT = BATdescriptor(*x1)) == NULL)
+ throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+ if((y1BAT = BATdescriptor(*y1)) == NULL) {
+ BBPunfix(x1BAT->batCacheid);
+ throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+ }
+ if((y1BAT = BATdescriptor(*y1)) == NULL) {
+ BBPunfix(x1BAT->batCacheid);
+ throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+ }
+ if((cBAT = BATdescriptor(*cand1)) == NULL) {
+ BBPunfix(x1BAT->batCacheid);
+ BBPunfix(y1BAT->batCacheid);
+ throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+ }
+ num = BATcount(x1BAT);
+
+ /* check if the BATs have dense heads and are aligned */
+ if (!BAThdense(x1BAT) || !BAThdense(y1BAT)) {
+ BBPunfix(x1BAT->batCacheid);
+ BBPunfix(y1BAT->batCacheid);
+ BBPunfix(cBAT->batCacheid);
+ return createException(MAL, "grid.distance", "BATs must have
dense heads");
+ }
+ if(x1BAT->hseqbase != y1BAT->hseqbase || BATcount(x1BAT) !=
BATcount(y1BAT)) {
+ BBPunfix(x1BAT->batCacheid);
+ BBPunfix(y1BAT->batCacheid);
+ BBPunfix(cBAT->batCacheid);
+ return createException(MAL, "grid.distance", "BATs must be
aligned");
+ }
+
+ assert(x1BAT->ttype == TYPE_lng);
+ assert(y1BAT->ttype == TYPE_lng);
+ x1Vals = (lng*)Tloc(x1BAT, BUNfirst(x1BAT));
+ y1Vals = (lng*)Tloc(y1BAT, BUNfirst(y1BAT));
+
+ /* initialize the bit vectors */
+ bvsize = (num >> SHIFT) + ((num & ONES) > 0);
+ if ((bv = GDKmalloc(bvsize * sizeof(uint64_t))) == NULL)
+ throw(MAL, "grid.distance", MAL_MALLOC_FAIL);
+ for (i = 0; i < bvsize; i++)
+ bv[i] = 0;
+
+ if ((cbv = GDKmalloc(bvsize * sizeof(uint64_t))) == NULL) {
+ GDKfree(bv);
+ throw(MAL, "grid.distance", MAL_MALLOC_FAIL);
+ }
+ for (i = 0; i < bvsize; i++)
+ cbv[i] = 0;
+
+ pi = bat_iterator(cBAT);
+ BATloop(cBAT, p, q) {
+ oid o = *(oid*)BUNtail(pi, p);
+ size_t blockNum = o >> SHIFT;
+ uint64_t bitPos = o & ONES;
+ set(cbv[blockNum], bitPos, 1);
+ }
+ BBPunfix(cBAT->batCacheid);
+
+ /* compute the grid index */
+ if((g = grid_create(x1BAT, y1BAT)) == NULL) {
+ BBPunfix(x1BAT->batCacheid);
+ BBPunfix(y1BAT->batCacheid);
+ GDKfree(bv);
+ GDKfree(cbv);
+ return createException(MAL, "grid.distance", "Could not compute
the grid index");
+ }
+
+ /* find which cells have to be examined */
+ fxa = ((double)g->cellsPerAxis/(double)(g->xmax-g->xmin));
+ fxb = (double)g->xmin*fxa;
+ fya = ((double)g->cellsPerAxis/(double)(g->ymax-g->ymin));
+ fyb = (double)g->ymin*fya;
+
+ minCellx = (double)(mbb.xmin<g->xmin?g->xmin:mbb.xmin)*fxa - fxb;
+ maxCellx = (double)(mbb.xmax>g->xmax?g->xmax:mbb.xmax)*fxa - fxb;
+ minCelly = (double)(mbb.ymin<g->ymin?g->ymin:mbb.ymin)*fya - fyb;
+ maxCelly = (double)(mbb.ymax>g->ymax?g->ymax:mbb.ymax)*fya - fyb;
+
+ /* split the cells in border and internal ones */
+ totalCellsNum = (maxCellx - minCellx + 1)*(maxCelly - minCelly + 1);
+ borderCellsNum = (maxCellx - minCellx + 1) + (maxCelly - minCelly + 1)
- 1; /* per axis, remove the corner cell that has been added twice */
+ if(maxCellx > minCellx && maxCelly > minCelly)
+ borderCellsNum = borderCellsNum*2 - 2; /* subtract the two
corner cells that have been added twice */
+ internalCellsNum = totalCellsNum - borderCellsNum;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list