Changeset: 66e1531dd8d5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=66e1531dd8d5
Modified Files:
geom/monetdb5/geom.c
geom/monetdb5/geom.h
geom/monetdb5/grid.c
geom/monetdb5/grid.h
geom/monetdb5/grid.mal
geom/sql/41_grid.sql
Branch: grid
Log Message:
Grid based distance join added
diffs (truncated from 774 to 300 lines):
diff --git a/geom/monetdb5/geom.c b/geom/monetdb5/geom.c
--- a/geom/monetdb5/geom.c
+++ b/geom/monetdb5/geom.c
@@ -1873,7 +1873,7 @@ str geom_epilogue(void *ret) {
}
/* Check if fixed-sized atom mbr is null */
-static int mbr_isnil(mbr *m) {
+int mbr_isnil(mbr *m) {
if (!m || m->xmin == flt_nil || m->ymin == flt_nil ||
m->xmax == flt_nil || m->ymax == flt_nil)
return 1;
@@ -4357,6 +4357,47 @@ str mbrOverlaps(bit *out, mbr **b1, mbr
return MAL_SUCCEED;
}
+/*returns the intersection of two mbrs */
+str mbrIntersection(mbr **r, mbr **b1, mbr **b2) {
+ bit overlap;
+ mbrOverlaps(&overlap, b1, b2);
+ if (overlap == 0) {
+ *r = mbr_nil;
+ } else {
+ (*r)->xmin = ((*b1)->xmin < (*b2)->xmin) ? (*b2)->xmin :
(*b1)->xmin;
+ (*r)->ymin = ((*b1)->ymin < (*b2)->ymin) ? (*b2)->ymin :
(*b1)->ymin;
+ (*r)->xmax = ((*b1)->xmax > (*b2)->xmax) ? (*b2)->xmax :
(*b1)->xmax;
+ (*r)->ymax = ((*b1)->ymax > (*b2)->ymax) ? (*b2)->ymax :
(*b1)->ymax;
+ }
+ return MAL_SUCCEED;
+}
+
+/*returns the union of two mbrs */
+str mbrUnion(mbr **r, mbr **b1, mbr **b2) {
+ bit overlap;
+ mbrOverlaps(&overlap, b1, b2);
+ if (mbr_isnil(*b1) && mbr_isnil(*b2)) {
+ *r = mbr_nil;
+ } else if (mbr_isnil(*b1)) {
+ (*r)->xmin = (*b2)->xmin;
+ (*r)->ymin = (*b2)->ymin;
+ (*r)->xmax = (*b2)->xmax;
+ (*r)->ymax = (*b2)->ymax;
+ } else if (mbr_isnil(*b2)) {
+ (*r)->xmin = (*b1)->xmin;
+ (*r)->ymin = (*b1)->ymin;
+ (*r)->xmax = (*b1)->xmax;
+ (*r)->ymax = (*b1)->ymax;
+ } else {
+ (*r)->xmin = ((*b1)->xmin > (*b2)->xmin) ? (*b2)->xmin :
(*b1)->xmin;
+ (*r)->ymin = ((*b1)->ymin > (*b2)->ymin) ? (*b2)->ymin :
(*b1)->ymin;
+ (*r)->xmax = ((*b1)->xmax < (*b2)->xmax) ? (*b2)->xmax :
(*b1)->xmax;
+ (*r)->ymax = ((*b1)->ymax < (*b2)->ymax) ? (*b2)->ymax :
(*b1)->ymax;
+ }
+
+ return MAL_SUCCEED;
+}
+
/*returns true if the mbrs of the two geometris overlap */
str mbrOverlaps_wkb(bit *out, wkb **geom1WKB, wkb **geom2WKB) {
mbr *geom1MBR = NULL, *geom2MBR = NULL;
diff --git a/geom/monetdb5/geom.h b/geom/monetdb5/geom.h
--- a/geom/monetdb5/geom.h
+++ b/geom/monetdb5/geom.h
@@ -260,6 +260,8 @@ geom_export str mbrEqual(bit *out, mbr *
geom_export str mbrEqual_wkb(bit *out, wkb **geom1WKB, wkb **geom2WKB);
geom_export str mbrDistance(dbl *out, mbr **b1, mbr **b2);
geom_export str mbrDistance_wkb(dbl *out, wkb **geom1WKB, wkb **geom2WKB);
+geom_export str mbrIntersection(mbr **r, mbr **b1, mbr **b2);
+geom_export str mbrUnion(mbr **r, mbr **b1, mbr **b2);
geom_export str wkbCoordinateFromWKB(dbl*, wkb**, int*);
geom_export str wkbCoordinateFromMBR(dbl*, mbr**, int*);
@@ -294,3 +296,6 @@ geom_export str wkbCoordinateFromMBR_bat
geom_export int geom_catalog_upgrade(void *, int);
geom_export str geom_sql_upgrade(int);
+
+geom_export int mbr_isnil(mbr *m);
+
diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c
--- a/geom/monetdb5/grid.c
+++ b/geom/monetdb5/grid.c
@@ -34,19 +34,73 @@ do {
max = res + add; \
} while (0)
-typedef struct grid {
- lng xmin; /* grid universe: minimum X value */
- lng ymin; /* grid universe: minimum Y value */
- lng xmax; /* grid universe: maximum X value */
- lng ymax; /* grid universe: maximum Y value */
+#define GRIDextend(g1, g2, cellR, cellS, r1, r2, msg)
\
+do {
\
+/* make space to cater for the worst case where all points qualify */
\
+BUN maxSize = BATcount((r1)) +
\
+ ((g1)->dir[(cellR)+1]-(g1)->dir[(cellR)])*
\
+ ((g2)->dir[(cellS)+1]-(g2)->dir[(cellS)]);
\
+if (maxSize > GDK_oid_max) {
\
+ (msg) = createException(MAL, "grid.distance", "overflow of head
value"); \
+ goto distancejoin_fail;
\
+}
\
+while (maxSize > BATcapacity((r1))) {
\
+fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1))); \
+ if ((BATextend((r1), BATgrows((r1))) != GDK_SUCCEED) ||
\
+ (BATextend((r2), BATgrows((r2))) != GDK_SUCCEED)) {
\
+ (msg) = createException(MAL, "grid.distance",
\
+ "could not extend BATs for storing the join
results"); \
+ goto distancejoin_fail;
\
+ }
\
+}
\
+/* fprintf(stderr, "Comparing [%zu](%zu)-[%zu](%zu)\n",
\
+ cellR, g1->dir[cellR+1]-g1->dir[cellR],
\
+ cellS, g2->dir[cellS+1]-g2->dir[cellS]); */
\
+} while (0)
+
+
+
+#define GRIDcmp(x1Vals, y1Vals, br1, g1, x2Vals, y2Vals, br2, g2, cellR,
cellS, r1, r2, msg) \
+do {
\
+if ((cellR) > (g1)->cellsNum || (cellS) > (g2)->cellsNum)
\
+ continue;
\
+GRIDextend(g1, g2, cellR, cellS, r1, r2, msg);
\
+/* compare points of R in cellR with points of S in cellS */
\
+/* TODO: the outer relation should be the one with more points */
\
+for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) {
\
+ oid oid1 = m;
\
+ lng x1v = (x1Vals)[oid1];
\
+ lng y1v = (y1Vals)[oid1];
\
+ for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) {
\
+ size_t oid2 = n;
\
+ lng x2v = (x2Vals)[oid2];
\
+ lng y2v = (y2Vals)[oid2];
\
+ double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v);
\
+/* fprintf(stderr, "The distance between [%lld,%lld] and [%lld,%lld] is %f
(requested distance: %f)\n",x1v,y1v,x2v,y2v,ddist,distsqr); */ \
+ if (ddist <= distsqr) {
\
+ bunfastapp_nocheck_inc((r1), (br1), &oid1);
\
+ bunfastapp_nocheck_inc((r2), (br2), &oid2);
\
+ }
\
+ }
\
+}
\
+} while (0)
+
+struct Grid {
+ dbl xmin; /* grid universe: minimum X value */
+ dbl ymin; /* grid universe: minimum Y value */
+ dbl xmax; /* grid universe: maximum X value */
+ dbl ymax; /* grid universe: maximum Y value */
+ mbr mbb;
bte shift;
size_t cellsNum; /* number of cells */
size_t cellsPerAxis; /* number of cells per axis */
+ size_t cellsX;
+ size_t cellsY;
bat xbat; /* bat id for X coordinates
*/
bat ybat; /* bat id for Y coordinates
*/
- size_t * dir; /* the grid directory */
+ oid * dir; /* the grid directory */
oid * oids; /* heap where the index is stored */
-} grid;
+};
static size_t
countSetBits(uint64_t *resBitvector, size_t vectorSize)
@@ -63,10 +117,56 @@ countSetBits(uint64_t *resBitvector, siz
return num;
}
-static grid *
+#ifndef NDEBUG
+static void
+grid_print(Grid * g)
+{
+ int m = 0, n = 0, o = 0;
+ fprintf(stderr, "GRID %p (shift %d). batx: %d, baty: %d\n", g,
g->shift, g->xbat, g->ybat);
+ fprintf(stderr, "- Universe: [%f %f, %f %f]\n", g->xmin, g->ymin,
g->xmax, g->ymax);
+ fprintf(stderr, "- MBB : [%f %f, %f %f]\n", g->mbb.xmin,
g->mbb.ymin, g->mbb.xmax, g->mbb.ymax);
+ fprintf(stderr, "- Cells X: %zu, Y: %zu, total: %zu, per axis: %zu\n",
g->cellsX, g->cellsY, g->cellsNum, g->cellsPerAxis);
+ m = ceil(log10(g->dir[g->cellsNum]));
+ n = ceil(log10(g->cellsY));
+ o = ceil(log10(g->cellsX));
+ m = m > o ? m : o;
+
+ fprintf(stderr, "- Directory\n");
+ for (size_t i = 0; i < g->cellsX*(m+1)+n+4; i++)
+ fprintf(stderr,"-");
+ fprintf(stderr, "\n");
+ for (size_t k = 0; k < g->cellsY; k++) {
+ size_t j = g->cellsY - k - 1;
+ fprintf(stderr,"||%*zu||",n,j);
+ for (size_t i = 0; i < g->cellsX; i++) {
+ oid cell = i + j*g->cellsX;
+ size_t v = g->dir[cell+1]-g->dir[cell];
+ if (v == 0)
+ fprintf(stderr, "%*s|", m, "");
+ else
+ fprintf(stderr, "%*zu|", m, v);
+ }
+ fprintf(stderr,"\n");
+ }
+ for (size_t i = 0; i < g->cellsX*(m+1)+n+4; i++)
+ fprintf(stderr,"-");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "||%*s||", n, "");
+ for (size_t i = 0; i < g->cellsX; i++)
+ fprintf(stderr, "%*zu|", m, i);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "- OIDs\n[");
+ for (size_t i = 0; i < g->dir[g->cellsNum]; i++) {
+ fprintf(stderr, "%zu,", g->oids[i]);
+ }
+ fprintf(stderr, "\r]\n");
+}
+#endif
+
+static Grid *
grid_create(BAT *bx, BAT *by)
{
- grid * g;
+ Grid * g;
lng *xVals, *yVals;
size_t i, cnt;
dbl fxa, fxb, fya, fyb;
@@ -74,7 +174,7 @@ grid_create(BAT *bx, BAT *by)
assert(BATcount(bx) == BATcount(by));
assert(BATcount(bx) > 0);
- if ((g = GDKmalloc(sizeof(grid))) == NULL)
+ if ((g = GDKmalloc(sizeof(Grid))) == NULL)
return g;
g->xbat = bx->batCacheid;
@@ -114,7 +214,7 @@ grid_create(BAT *bx, BAT *by)
}
/* allocate space for the directory */
- if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(size_t))) == NULL) {
+ if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(oid))) == NULL) {
GDKfree(g);
g = NULL;
return g;
@@ -153,8 +253,6 @@ grid_create(BAT *bx, BAT *by)
oid cellx = (double)xVals[i]*fxa - fxb;
oid celly = (double)yVals[i]*fya - fyb;
oid cell = ((cellx << g->shift) | celly);
-// assert(cell < g->cellsNum);
-// assert(position < offsetVals[g->cellsNum]);
g->oids[g->dir[cell]++] = i;
}
@@ -165,11 +263,255 @@ grid_create(BAT *bx, BAT *by)
/* TODO: move here the code for compressing the index */
+#ifndef NDEBUG
+ grid_print(g);
+#endif
return g;
}
+static Grid *
+grid_create_mbr(BAT *bx, BAT *by, mbr *m, dbl * d)
+{
+ Grid * g;
+ lng *xVals, *yVals;
+ size_t i, cnt;
+ dbl fxa, fxb, fya, fyb;
+
+ assert(BATcount(bx) == BATcount(by));
+ assert(BATcount(bx) > 0);
+ assert((*d) > 0);
+
+ if ((g = GDKmalloc(sizeof(Grid))) == NULL)
+ return g;
+
+ g->xbat = bx->batCacheid;
+ g->ybat = by->batCacheid;
+ xVals = (lng*)Tloc(bx, BUNfirst(bx));
+ yVals = (lng*)Tloc(by, BUNfirst(by));
+
+ g->mbb.xmin = m->xmin;
+ g->mbb.ymin = m->ymin;
+ g->mbb.xmax = m->xmax;
+ g->mbb.ymax = m->ymax;
+ g->xmin = m->xmin;
+ g->ymin = m->ymin;
+ g->xmax = m->xmax;
+ g->ymax = m->ymax;
+
+#ifndef NDEBUG
+ fprintf(stderr, "grid borders: [%f %f, %f %f]\n", g->mbb.xmin,
g->mbb.ymin, g->mbb.xmax, g->mbb.ymax);
+#endif
+ /* determine the appropriate number of cells */
+ g->cellsX = (m->xmax - m->xmin)/(*d);
+ g->cellsY = (m->ymax - m->ymin)/(*d);
+
+ /* how many bits do we need? */
+ g->shift = (bte)ceil(log2(g->cellsX>g->cellsY?g->cellsX:g->cellsY));
+ cnt = BATcount(bx);
+ g->cellsNum = g->cellsX * g->cellsY;
+#ifndef NDEBUG
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list