Changeset: b275a8466c95 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b275a8466c95
Modified Files:
        geom/monetdb5/grid.c
Branch: grid
Log Message:

Added a function that performs a selection using the grid


diffs (truncated from 578 to 300 lines):

diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c
--- a/geom/monetdb5/grid.c
+++ b/geom/monetdb5/grid.c
@@ -13,30 +13,387 @@
 
 #include "grid.h"
 
-str
-GRIDdistance(bit * res, lng * x1, lng * y1, lng * x2, lng * y2, int * distance)
+#define GRID_VERSION 1
+#define POINTSPERCELL 100000
+
+#define BITSNUM 64
+#define SHIFT 6  /* division with 64 */
+#define ONES ((1<<(SHIFT))-1) /* 63 */
+#define get(bitVector, bitPos) (((bitVector) >> (bitPos)) & 0x01) 
+#define set(bitVector, bitPos, value) ((bitVector) |= ((value)<<(bitPos))) 
+#define setbv(bitVector, bitPos, value) set((bitVector)[(bitPos) >> SHIFT], 
(bitPos) & ONES, (value))
+#define unset(bitVector, bitPos) ((bitVector) &= ~((uint64_t)1<<(bitPos)))
+#define common(bitVector, bitPos, value) ((bitVector) &= (0xFFFFFFFFFFFFFFFF ^ 
(((1-value))<<(bitPos))))
+
+#define maximumNumberOfCells(max, bitsNum, add) \
+do {                                            \
+    int i = 0;                                  \
+    size_t res = 0;                             \
+    for(i = 0; i < bitsNum; i++)                \
+        res = (res << 1) | 1;                   \
+    max = res + add;                            \
+} while (0)
+
+typedef struct grid {
+       lng xmin;                       /* grid universe: minimum X value */
+       lng ymin;                       /* grid universe: minimum Y value */
+       lng xmax;                       /* grid universe: maximum X value */
+       lng ymax;                       /* grid universe: maximum Y value */
+       bte shift;
+       size_t cellsNum;                /* number of cells                 */
+       size_t cellsPerAxis;    /* number of cells per axis        */
+       bat xbat;                               /* bat id for X coordinates     
   */
+       bat ybat;                               /* bat id for Y coordinates     
   */
+       size_t * dir;                   /* the grid directory              */
+       oid * oids;                     /* heap where the index is stored  */
+} grid;
+
+static size_t
+countSetBits(uint64_t *resBitvector, size_t vectorSize)
 {
-       (void)res;
-       (void)x1;
-       (void)y1;
-       (void)x2;
-       (void)y2;
-       (void)distance;
+       size_t num = 0 ;
+       for(size_t k = 0; k < vectorSize; k++) {
+               uint64_t b = resBitvector[k];
+               for(uint64_t j = 0; j < BITSNUM; j++) {
+                       num += b & 0x01;
+                       b >>= 1;
+               }
+       }
 
-       return MAL_SUCCEED;
+       return num;
+}
+
+static grid *
+grid_create(BAT *bx, BAT *by)
+{
+       grid * g;
+       lng *xVals, *yVals;
+       size_t i, cnt;
+       dbl fxa, fxb, fya, fyb;
+
+       assert(BATcount(bx) == BATcount(by));
+       assert(BATcount(bx) > 0);
+
+       if ((g = GDKmalloc(sizeof(grid))) == NULL)
+               return g;
+
+       g->xbat = bx->batCacheid;
+       g->ybat = by->batCacheid;
+       xVals = (lng*)Tloc(bx, BUNfirst(bx));
+       yVals = (lng*)Tloc(by, BUNfirst(by));
+
+       /* determine the appropriate number of cells */
+       g->shift = 2;
+       maximumNumberOfCells(g->cellsNum, g->shift*2, 1);
+       maximumNumberOfCells(g->cellsPerAxis, g->shift, 0);
+
+       cnt = BATcount(bx);
+       while(cnt/g->cellsNum > POINTSPERCELL) {
+               /* use one more bit per axis */
+               g->shift++;
+               maximumNumberOfCells(g->cellsNum, g->shift*2, 1);
+               maximumNumberOfCells(g->cellsPerAxis, g->shift, 0);
+       }
+
+       /* find min and max values for X and y coordinates */
+       g->xmin = g->xmax = xVals[0];
+       for (i = 1; i < cnt; i++) {
+               lng val = xVals[i];
+               if(g->xmin > val)
+                       g->xmin = val;
+               if(g->xmax < val)
+                       g->xmax = val;
+       }
+       g->ymin = g->ymax = yVals[0];
+       for (i = 1; i < cnt; i++) {
+               lng val = yVals[i];
+               if(g->ymin > val)
+                       g->ymin = val;
+               if(g->ymax < val)
+                       g->ymax = val;
+       }
+
+       /* allocate space for the directory */
+       if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(size_t))) == NULL) {
+               GDKfree(g);
+               g = NULL;
+               return g;
+       }
+       for (i = 0; i < g->cellsNum; i++)
+               g->dir[i] = 0;
+
+       /* allocate space for the index */
+       if((g->oids = GDKmalloc(BATcount(bx)*sizeof(oid))) == NULL) {
+               GDKfree(g);
+               g = NULL;
+               return g;
+       }
+
+       /* compute the index */
+       /* step 1: compute the histogram of cell frequencies */
+       fxa = ((double)g->cellsPerAxis/(double)(g->xmax-g->xmin));
+       fxb = (double)g->xmin*fxa;
+       fya = ((double)g->cellsPerAxis/(double)(g->ymax-g->ymin));
+       fyb = (double)g->ymin*fya;
+
+       cnt = BATcount(bx);
+       for (i = 0; i < cnt; i++) {
+               oid cellx = (double)xVals[i]*fxa - fxb;
+               oid celly = (double)yVals[i]*fya - fyb;
+               oid cell = ((cellx << g->shift) | celly);
+               g->dir[cell+1]++;
+       }
+
+       /* step 2: compute the directory pointers */
+       for (i = 1; i < g->cellsNum; i++)
+               g->dir[i] += g->dir[i-1];
+
+       /* step 3: fill in the oid array */
+       for (size_t i = 0; i < cnt; i++) {
+               oid cellx = (double)xVals[i]*fxa - fxb;
+               oid celly = (double)yVals[i]*fya - fyb;
+               oid cell = ((cellx << g->shift) | celly);
+//             assert(cell < g->cellsNum);
+//             assert(position < offsetVals[g->cellsNum]);
+               g->oids[g->dir[cell]++] = i;
+       }
+
+       /* step 4: adjust the directory pointers */
+       for (size_t i = g->cellsNum; i > 0; i--)
+               g->dir[i+1] = g->dir[i];
+       g->dir[0] = 0;
+
+       /* TODO: move here the code for compressing the index */
+
+       return g;
 }
 
 str
-GRIDdistancesubselect(bat * res, bat * x1, bat * y1, bat * cand1, lng * x2, 
lng * y2, int * distance, bit * anti)
+GRIDdistance(bit * res, lng * x1, lng * y1, lng * x2, lng * y2, int * d)
 {
-       (void)res;
-       (void)x1;
-       (void)y1;
-       (void)cand1;
-       (void)x2;
-       (void)y2;
-       (void)distance;
-       (void)anti;
+       str r = MAL_SUCCEED;
+
+       if ((res = GDKmalloc(sizeof(bit))) == NULL)
+               r = createException(MAL, "grid.distance", MAL_MALLOC_FAIL);
+       else {
+               *res = ((*x2-*x1)*(*x2-*x1)+(*y2-*y1)*(*y2-*y1))<(*d)*(*d);
+       }
+
+       return r;
+}
+
+str
+GRIDdistancesubselect(bat * res, bat * x1, bat * y1, bat * cand1, lng * x2, 
lng * y2, int * d, bit * anti)
+{
+       size_t minCellx, minCelly, maxCellx, maxCelly, cellx, celly;
+       size_t *borderCells, *internalCells;
+       size_t borderCellsNum, internalCellsNum, totalCellsNum;
+       size_t i, j, bvsize, num;
+       uint64_t * bv, * cbv;
+       double fxa, fxb, fya, fyb;
+       BAT *x1BAT = NULL, *y1BAT = NULL, *cBAT = NULL;
+       lng * x1Vals = NULL, * y1Vals = NULL;
+       oid * resVals = NULL;
+       grid * g = NULL;
+       mbr mbb = (mbr) {.xmin = *x2 - *d, .ymin = *y2 - *d, .xmax = *x2 + *d, 
.ymax = *y2 + *d};
+       BAT *r;
+       BUN p, q;
+       BATiter pi;
+       BUN resNum = 0;
+       assert (*d > 0);
+
+       /* get the X and Y BATs*/
+       if((x1BAT = BATdescriptor(*x1)) == NULL)
+               throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+       if((y1BAT = BATdescriptor(*y1)) == NULL) {
+               BBPunfix(x1BAT->batCacheid);
+               throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+       }
+       if((y1BAT = BATdescriptor(*y1)) == NULL) {
+               BBPunfix(x1BAT->batCacheid);
+               throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+       }
+       if((cBAT = BATdescriptor(*cand1)) == NULL) {
+               BBPunfix(x1BAT->batCacheid);
+               BBPunfix(y1BAT->batCacheid);
+               throw(MAL, "grid.distance", RUNTIME_OBJECT_MISSING);
+       }
+       num = BATcount(x1BAT);
+
+       /* check if the BATs have dense heads and are aligned */
+       if (!BAThdense(x1BAT) || !BAThdense(y1BAT)) {
+               BBPunfix(x1BAT->batCacheid);
+               BBPunfix(y1BAT->batCacheid);
+               BBPunfix(cBAT->batCacheid);
+               return createException(MAL, "grid.distance", "BATs must have 
dense heads");
+       }
+       if(x1BAT->hseqbase != y1BAT->hseqbase || BATcount(x1BAT) != 
BATcount(y1BAT)) {
+               BBPunfix(x1BAT->batCacheid);
+               BBPunfix(y1BAT->batCacheid);
+               BBPunfix(cBAT->batCacheid);
+               return createException(MAL, "grid.distance", "BATs must be 
aligned");
+       }
+
+       assert(x1BAT->ttype == TYPE_lng);
+       assert(y1BAT->ttype == TYPE_lng);
+       x1Vals = (lng*)Tloc(x1BAT, BUNfirst(x1BAT));
+       y1Vals = (lng*)Tloc(y1BAT, BUNfirst(y1BAT));
+
+       /* initialize the bit vectors */
+       bvsize = (num >> SHIFT) + ((num & ONES) > 0);
+       if ((bv = GDKmalloc(bvsize * sizeof(uint64_t))) == NULL)
+               throw(MAL, "grid.distance", MAL_MALLOC_FAIL);
+       for (i = 0; i < bvsize; i++)
+               bv[i] = 0;
+
+       if ((cbv = GDKmalloc(bvsize * sizeof(uint64_t))) == NULL) {
+               GDKfree(bv);
+               throw(MAL, "grid.distance", MAL_MALLOC_FAIL);
+       }
+       for (i = 0; i < bvsize; i++)
+               cbv[i] = 0;
+
+       pi = bat_iterator(cBAT);
+       BATloop(cBAT, p, q) {
+               oid o = *(oid*)BUNtail(pi, p);
+               size_t blockNum = o >> SHIFT;
+               uint64_t bitPos = o & ONES;
+               set(cbv[blockNum], bitPos, 1);
+       }
+       BBPunfix(cBAT->batCacheid);
+
+       /* compute the grid index */
+       if((g = grid_create(x1BAT, y1BAT)) == NULL) {
+               BBPunfix(x1BAT->batCacheid);
+               BBPunfix(y1BAT->batCacheid);
+               GDKfree(bv);
+               GDKfree(cbv);
+               return createException(MAL, "grid.distance", "Could not compute 
the grid index");
+       }
+
+       /* find which cells have to be examined */
+       fxa = ((double)g->cellsPerAxis/(double)(g->xmax-g->xmin));
+       fxb = (double)g->xmin*fxa; 
+       fya = ((double)g->cellsPerAxis/(double)(g->ymax-g->ymin));
+       fyb = (double)g->ymin*fya; 
+
+       minCellx = (double)(mbb.xmin<g->xmin?g->xmin:mbb.xmin)*fxa - fxb;
+       maxCellx = (double)(mbb.xmax>g->xmax?g->xmax:mbb.xmax)*fxa - fxb;
+       minCelly = (double)(mbb.ymin<g->ymin?g->ymin:mbb.ymin)*fya - fyb;
+       maxCelly = (double)(mbb.ymax>g->ymax?g->ymax:mbb.ymax)*fya - fyb;
+
+       /* split the cells in border and internal ones */
+       totalCellsNum = (maxCellx - minCellx + 1)*(maxCelly - minCelly + 1);
+       borderCellsNum = (maxCellx - minCellx + 1) + (maxCelly - minCelly + 1) 
- 1; /* per axis, remove the corner cell that has been added twice */
+       if(maxCellx > minCellx && maxCelly > minCelly)
+               borderCellsNum = borderCellsNum*2 - 2; /* subtract the two 
corner cells that have been added twice */
+       internalCellsNum = totalCellsNum - borderCellsNum;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to