Changeset: 3c54dfe29fd4 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3c54dfe29fd4
Modified Files:
geom/monetdb5/geom.h
geom/monetdb5/geomPoints.c
Branch: geo
Log Message:
Kosti's pbsm implementation
diffs (truncated from 363 to 300 lines):
diff --git a/geom/monetdb5/geom.h b/geom/monetdb5/geom.h
--- a/geom/monetdb5/geom.h
+++ b/geom/monetdb5/geom.h
@@ -270,6 +270,7 @@ geom_export str wkbPointsDistance1_geom_
geom_export str wkbPointsDistance2_geom_bat(bat* outBAT_id, wkb** geomWKB,
bat* xBAT_id, bat* yBAT_id, int* srid);
geom_export str wkbFilterWithImprints_geom_bat(bat*, wkb**, bat*, bat*);
+geom_export str wkbFilterWithPBSM_geom_bat(bat*, wkb**, bat*, bat*);
geom_export int isLeft( double P0x, double P0y, double P1x, double P1y, double
P2x, double P2y);
diff --git a/geom/monetdb5/geomPoints.c b/geom/monetdb5/geomPoints.c
--- a/geom/monetdb5/geomPoints.c
+++ b/geom/monetdb5/geomPoints.c
@@ -23,6 +23,15 @@
#include "geom.h"
#include "time.h"
+typedef struct pbsm_ptr {
+ BUN offset;
+ unsigned long count;
+} pbsm_ptr;
+
+static pbsm_ptr *pbsm_idx = NULL;
+static oid *oids = NULL;
+
+
//it gets two BATs with x,y coordinates and returns a new BAT with the points
static BAT* BATMakePoint2D(BAT* xBAT, BAT* yBAT) {
BAT *outBAT = NULL;
@@ -993,4 +1002,332 @@ str wkbFilterWithImprints_geom_bat(bat*
}
+/* PBSM */
+static char *
+PBSMcreateindex (const dbl *x, const dbl *y, BUN n, double minx, double maxx,
double miny, double maxy) {
+ FILE *f;
+ BAT *pbsm;
+ sht *cells;
+ unsigned long i;
+ int shift = sizeof(sht) * 8 / 2;
+ sht prevCell, cell;
+ unsigned long m = 0, prevO;
+ clock_t t = clock();
+
+ assert (pbsm_idx == NULL && oids == NULL);
+
+ if ((pbsm_idx = GDKmalloc(USHRT_MAX * sizeof(pbsm_ptr))) == NULL)
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+
+ for (i = 0; i < USHRT_MAX; i++) {
+ pbsm_idx[i].count = 0;
+ pbsm_idx[i].offset = 0;
+ }
+
+ /* have we precomputed the grid? */
+ if ((f = fopen("20m-pbsm16.idx", "rb"))) {
+ if (fread(pbsm_idx, sizeof(pbsm_idx[0]), USHRT_MAX, f) !=
USHRT_MAX) {
+ fclose(f);
+ GDKfree(pbsm_idx);
+ throw(MAL, "batpbsm.contains16", "Could not read the
PBSM index from disk (source: 20m-pbsm16.idx)");
+ }
+ fclose(f);
+
+ if ((oids = GDKmalloc(n * sizeof(oid))) == NULL) {
+ GDKfree(pbsm_idx);
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+ }
+
+ if ((f = fopen("20m-pbsm16.dat", "rb"))) {
+ if (fread(oids, sizeof(oids[0]), n, f) != n) {
+ fclose(f);
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ throw(MAL, "batpbsm.contains16", "Could not
read the PBSM index from disk (source: 20m-pbsm16.dat)");
+ }
+
+ fclose(f);
+
+ t = clock() - t;
+ fprintf(stderr, "[PBSM] Index loading: %d clicks - %f
seconds\n", (unsigned int)t, ((float)t)/CLOCKS_PER_SEC);
+
+ return MAL_SUCCEED;
+ } else {
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ throw(MAL, "batpbsm.contains16", "Could not read the
PBSM index (.dat).");
+ }
+ }
+
+ /* No. Let's compute the grid! */
+ // version 1
+ if((pbsm = BATnew(TYPE_void, TYPE_sht, n, TRANSIENT)) == NULL)
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+ cells = (sht*) Tloc(pbsm, BUNfirst(pbsm));
+
+ // version 1: calculate the pbsm values
+ for (i = 0; i < n; i++) {
+ unsigned char cellx = ((x[i] - minx)/(maxx - minx))*UCHAR_MAX;
+ unsigned char celly = ((y[i] - miny)/(maxy - miny))*UCHAR_MAX;
+ cells[i] = ((((unsigned short) cellx) << shift)) | ((unsigned
short) celly);
+ }
+
+ // version 1: order the BAT according to the cell values
+ /* set some properties */
+ BATsetcount(pbsm,n);
+ BATseqbase(pbsm, 0);
+ pbsm->hsorted = 1;
+ pbsm->hrevsorted = (BATcount(pbsm) <= 1);
+ pbsm->tsorted = 0;
+ pbsm->trevsorted = 0;
+ pbsm->hdense = true;
+ pbsm->tdense = false;
+ BATmaterialize(pbsm);
+ //BATderiveProps(pbsm, false);
+ //BATassertProps(pbsm);
+ pbsm = BATorder(BATmirror(pbsm));
+ //BATprint(pbsm);
+
+ // version 1: compress the head
+ cells = (sht*) Hloc(pbsm, BUNfirst(pbsm));
+ oids = (oid*) Tloc(pbsm, BUNfirst(pbsm));
+
+ prevCell = cells[0];
+ cell = cells[0];
+ m = 0;
+ prevO = 0;
+ for (i = 0; i < n; i++) {
+ cell = cells[i];
+
+ if (cell == prevCell) {
+ m++;
+ } else {
+ pbsm_idx[cell - SHRT_MIN - 1].offset = prevO;
+ pbsm_idx[cell - SHRT_MIN - 1].count = m;
+ prevCell = cell;
+ prevO = i;
+ m = 1;
+ }
+ }
+ pbsm_idx[cell - SHRT_MIN - 1].offset = prevO;
+ pbsm_idx[cell - SHRT_MIN - 1].count = m;
+
+ // version 1: clean up
+ pbsm->T->heap.base = NULL; // need to keep the oids array
+ BBPreleaseref(pbsm->batCacheid);
+
+
+ // version 2
+ // do one pass to compute the counts needed
+ // do second pass to assign oid at the appropriate places
+
+ /*
+ *
+ * TODO
+ *
+ */
+
+ t = clock() - t;
+ fprintf(stderr, "[PBSM] Index population: %d clicks - %f seconds\n",
(unsigned int)t, ((float)t)/CLOCKS_PER_SEC);
+
+ /* Save the index for future use (sloppiness acknowledged) */
+ if ((f = fopen("20m-pbsm16.idx", "wb"))) {
+ if (fwrite(pbsm_idx, sizeof(pbsm_idx[0]), USHRT_MAX,f) !=
USHRT_MAX) {
+ fclose(f);
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ throw(MAL, "batpbsm.contains16", "Could not save the
PBSM index to disk (target: 20m-pbsm16.idx)");
+ }
+ fflush(f);
+ fclose(f);
+ }
+
+ if ((f = fopen("20m-pbsm16.dat", "wb"))) {
+ if (fwrite(oids, sizeof(oids[0]), n, f) != n) {
+ fclose(f);
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ throw(MAL, "batpbsm.contains16", "Could not save the
PBSM index to disk (target: 20m-pbsm16.dat)");
+ }
+ fflush(f);
+ fclose(f);
+ }
+
+ return MAL_SUCCEED;
+}
+
+static char *
+PBSMarraycontains16(BAT **bres, const dbl *x, BAT *batx, const dbl *y, BAT
*baty, mbr *mbb, BUN n, double minx, double maxx, double miny, double maxy) {
+ unsigned long csize = 0, u;
+ oid *candidates;
+ unsigned char mbrcellxmin, mbrcellxmax, mbrcellymin, mbrcellymax, k,l;
+ int shift = sizeof(sht) * 8 / 2;
+ unsigned long i;
+ str msg;
+
+ /* assert calling sanity */
+ assert(*bres != NULL && x != NULL && y != NULL && batx != NULL && baty
!= NULL);
+
+ /* read the pbsm index to memory */
+ if (pbsm_idx == NULL || oids == NULL) {
+ /* the index has not been materialized/loaded yet */
+ if ((msg = PBSMcreateindex(x, y, n, minx, maxx, miny, maxy)) !=
MAL_SUCCEED)
+ return msg;
+ }
+
+ /* generate a pbsm value from the geometry */
+ mbrcellxmin = (unsigned char)((mbb->xmin - minx)/(maxx - minx) *
UCHAR_MAX);
+ mbrcellxmax = (unsigned char)((mbb->xmax - minx)/(maxx - minx) *
UCHAR_MAX);
+ mbrcellymin = (unsigned char)((mbb->ymin - miny)/(maxy - miny) *
UCHAR_MAX);
+ mbrcellymax = (unsigned char)((mbb->ymax - miny)/(maxy - miny) *
UCHAR_MAX);
+
+ csize = 0;
+ for (k = mbrcellxmin; k <= mbrcellxmax; k++) {
+ for (l = mbrcellymin; l <= mbrcellymax; l++) {
+ sht mbrc = ((((unsigned short) k) << shift)) |
((unsigned short) l);
+ //sht mbrc = ((((sht) k) << shift)) | ((sht) l);
+ csize += pbsm_idx[mbrc - SHRT_MIN].count;
+ }
+ }
+
+ /* get candidate oid from the pbsm index */
+ if ((candidates = GDKmalloc(csize * sizeof(oid))) == NULL)
+ throw(MAL, "batpbsm.contains16", MAL_MALLOC_FAIL);
+ i = 0;
+ for (k = mbrcellxmin; k <= mbrcellxmax; k++) {
+ for (l = mbrcellymin; l <= mbrcellymax; l++) {
+ sht mbrc = ((((unsigned short) k) << shift)) |
((unsigned short) l);
+ unsigned short mcell = mbrc - SHRT_MIN;
+
+ for (u = 0; u < pbsm_idx[mcell].count; u++) {
+ //fprintf(stderr,"[PBSM] Copying cell %d
(offset %ld, count %ld)\n", mcell,pbsm_idx[mcell].offset,pbsm_idx[mcell].count);
+ candidates[i] = oids[pbsm_idx[mcell].offset +
u];
+ i++;
+ }
+
+ }
+ }
+
+ assert(BAThdense(batx) && BAThdense(baty));
+
+ if ((*bres = BATnew(TYPE_void, TYPE_oid, csize, TRANSIENT)) == NULL) {
+ throw(MAL, "batpbsm.contains16", MAL_MALLOC_FAIL);
+ }
+
+ for (i = 0; i < csize; i++) {
+ oid *o = &(candidates[i]);
+ BUNfastins(*bres, NULL, o);
+ }
+
+ /* candidates are expected to be ordered */
+ BATseqbase(*bres, oid_nil); // avoid materialization of the void head
+ *bres = BATmirror(BATorder(BATmirror(*bres)));
+ BATseqbase(*bres, 0);
+
+ //BATkey(BATmirror(*bres), TRUE);
+ (*bres)->hdense = 1;
+ (*bres)->hsorted = 1;
+ (*bres)->tsorted = 1;
+ //(*bres)->tkey = TRUE;
+ BATderiveProps(*bres, false);
+
+ /* clean up */
+ //GDKfree(pbsm_idx);
+ //GDKfree(oids);
+
+ return MAL_SUCCEED;
+}
+
+static char *
+PBSMselect_(BAT **ret, BAT *bx, BAT *by, mbr *g,
+ double *minx, double *maxx, double *miny, double *maxy)
+{
+ BAT *bres = NULL;
+ BUN n;
+ char *msg = NULL;
+ dbl *x = NULL, *y = NULL;
+
+ assert (ret != NULL);
+ assert (bx != NULL && by != NULL);
+
+ n = BATcount(bx);
+
+ if (bx->ttype != by->ttype)
+ throw(MAL, "batpbsm.contains16", "tails of input BATs must be
identical");
+
+ /* get direct access to the tail arrays */
+ x = (dbl*) Tloc(bx, BUNfirst(bx));
+ y = (dbl*) Tloc(by, BUNfirst(by));
+
+ /* allocate result BAT */
+ bres = BATnew(TYPE_void, TYPE_oid, n, TRANSIENT);
+ if (bres == NULL)
+ throw(MAL, "batpbsm.contains16", MAL_MALLOC_FAIL);
+
+ msg = PBSMarraycontains16( &bres, x, bx, y , by, g, n, *minx, *maxx,
*miny, *maxy);
+
+ if (msg != MAL_SUCCEED) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list