Changeset: 03761b7c2be2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03761b7c2be2
Modified Files:
geom/monetdb5/geom.h
geom/monetdb5/geom.mal
geom/monetdb5/geomPoints.c
geom/sql/40_geom.sql
monetdb5/optimizer/opt_geospatial.c
Branch: geo
Log Message:
Faster way to create PBSM index + command to create PBSM index (not final)
diffs (truncated from 374 to 300 lines):
diff --git a/geom/monetdb5/geom.h b/geom/monetdb5/geom.h
--- a/geom/monetdb5/geom.h
+++ b/geom/monetdb5/geom.h
@@ -276,3 +276,4 @@ geom_export str wkbFilterWithImprintsAnd
geom_export int isLeft( double P0x, double P0y, double P1x, double P1y, double
P2x, double P2y);
+geom_export str pbsmIndex_bat(int *res, int *xBAT_id, int *yBAT_id, double*
xmin, double* ymin, double* xmax, double* ymax, char** filename);
diff --git a/geom/monetdb5/geom.mal b/geom/monetdb5/geom.mal
--- a/geom/monetdb5/geom.mal
+++ b/geom/monetdb5/geom.mal
@@ -443,6 +443,9 @@ geom.prelude();
module batgeom;
+#unsafe prevents mitosis from parallelising it
+command pbsmIndex{unsafe}(x:bat[:oid,:dbl], y:bat[:oid,:dbl], xmin:dbl,
ymin:dbl, xmax:dbl, ymax:dbl, filename:str) :bat[:oid,:int] address
pbsmIndex_bat;
+
#the 1 version of the functions use geos the 2 versions have custom
implementations
command Distance(a:bat[:oid,:wkb], b:bat[:oid,:wkb]) :bat[:oid,:dbl] address
wkbDistance_bat;
diff --git a/geom/monetdb5/geomPoints.c b/geom/monetdb5/geomPoints.c
--- a/geom/monetdb5/geomPoints.c
+++ b/geom/monetdb5/geomPoints.c
@@ -262,7 +262,8 @@ clean:
//Aternative implementation of contains using the winding number method
inline int isLeft( double P0x, double P0y, double P1x, double P1y, double P2x,
double P2y) {
- return ( ((P1x - P0x) * (P2y - P0y) - (P2x - P0x) * (P1y - P0y)) >= 0.0
); //set equal to include borders
+ //borders are not included
+ return ( ((P1x - P0x) * (P2y - P0y) - (P2x - P0x) * (P1y - P0y)) > 0.0 );
}
#define isRight(x0, y0, x1, y1, x2, y2) isLeft(x0, y0, x1, y1, x2, y2)-1
@@ -1009,14 +1010,227 @@ str wkbFilterWithImprints_geom_bat(bat*
/* PBSM */
static char *
-PBSMcreateindex (const dbl *x, const dbl *y, BUN n, double minx, double maxx,
double miny, double maxy) {
- FILE *f;
+PBSMcomputeindex1(const dbl *x, const dbl *y, BUN n, double minx, double maxx,
double miny, double maxy, oid seqbase) {
+ sht *cells;
BAT *pbsm;
- sht *cells;
unsigned long i;
int shift = sizeof(sht) * 8 / 2;
sht prevCell, cell;
unsigned long m = 0, prevO;
+
+
+ if((pbsm = BATnew(TYPE_void, TYPE_sht, n, TRANSIENT)) == NULL)
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+ cells = (sht*) Tloc(pbsm, BUNfirst(pbsm));
+
+ // calculate the pbsm values
+ for (i = 0; i < n; i++) {
+ unsigned char cellx = ((x[i] - minx)/(maxx - minx))*UCHAR_MAX;
+ unsigned char celly = ((y[i] - miny)/(maxy - miny))*UCHAR_MAX;
+ cells[i] = ((((unsigned short) cellx) << shift)) | ((unsigned
short) celly);
+ }
+
+ // order the BAT according to the cell values
+ /* set some properties */
+ BATsetcount(pbsm,n);
+ BATseqbase(pbsm, 0);
+ pbsm->hsorted = 1;
+ pbsm->hrevsorted = (BATcount(pbsm) <= 1);
+ pbsm->tsorted = 0;
+ pbsm->trevsorted = 0;
+ pbsm->hdense = true;
+ pbsm->tdense = false;
+ BATseqbase(pbsm, seqbase);
+ BATmaterialize(pbsm);
+ //BATderiveProps(pbsm, false);
+ //BATassertProps(pbsm);
+ pbsm = BATorder(BATmirror(pbsm));
+ //BATprint(pbsm);
+
+ // compress the head
+ cells = (sht*) Hloc(pbsm, BUNfirst(pbsm));
+ oids = (oid*) Tloc(pbsm, BUNfirst(pbsm));
+
+ prevCell = cells[0];
+ cell = cells[0];
+ m = 0;
+ prevO = 0;
+ for (i = 0; i < n; i++) {
+ cell = cells[i];
+
+ if (cell == prevCell) {
+ m++;
+ } else {
+ pbsm_idx[cell - SHRT_MIN].offset = prevO;
+ pbsm_idx[cell - SHRT_MIN].count = m;
+ prevCell = cell;
+ prevO = i;
+ m = 1;
+ }
+ }
+ pbsm_idx[cell - SHRT_MIN].offset = prevO;
+ pbsm_idx[cell - SHRT_MIN].count = m;
+
+ // clean up
+ pbsm->T->heap.base = NULL; // need to keep the oids array
+ BBPreleaseref(pbsm->batCacheid);
+
+ return MAL_SUCCEED;
+}
+
+
+static char *
+PBSMcomputeindex2(const dbl *x, const dbl *y, BUN n, double minx, double maxx,
double miny, double maxy, oid seqbase) {
+ unsigned long *tmpCount;
+ unsigned long i;
+ int shift = sizeof(sht) * 8 / 2;
+
+ if ((pbsm_idx = GDKmalloc(USHRT_MAX * sizeof(pbsm_ptr))) == NULL)
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+
+ if ((oids = GDKmalloc(n * sizeof(oid))) == NULL) {
+ GDKfree(pbsm_idx);
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+ }
+
+ if ((tmpCount = GDKmalloc(USHRT_MAX * sizeof(unsigned long))) == NULL) {
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+ }
+
+ for (i = 0; i < USHRT_MAX; i++) {
+ pbsm_idx[i].count = 0;
+ pbsm_idx[i].offset = 0;
+ }
+
+ for (i = 0; i < USHRT_MAX; i++) {
+ tmpCount[i] = 0;
+ }
+
+ // count pbsm values per cell
+ for (i = 0; i < n; i++) {
+ unsigned char cellx = ((x[i] - minx)/(maxx - minx))*UCHAR_MAX;
+ unsigned char celly = ((y[i] - miny)/(maxy - miny))*UCHAR_MAX;
+ sht cell = ((((unsigned short) cellx) << shift)) | ((unsigned
short) celly);
+ pbsm_idx[cell - SHRT_MIN].count++;
+ }
+
+ // compute the offset values before filling in the oid array
+ pbsm_idx[0].offset = 0;
+ for (i = 1; i < USHRT_MAX; i++) {
+ pbsm_idx[i].offset = pbsm_idx[i-1].offset + pbsm_idx[i-1].count;
+ }
+
+ // fill in the oid array
+ for (i = 0; i < n; i++) {
+ unsigned char cellx = ((x[i] - minx)/(maxx - minx))*UCHAR_MAX;
+ unsigned char celly = ((y[i] - miny)/(maxy - miny))*UCHAR_MAX;
+ sht cell = ((((unsigned short) cellx) << shift)) | ((unsigned
short) celly);
+ unsigned long position = pbsm_idx[cell - SHRT_MIN].offset +
tmpCount[cell - SHRT_MIN];
+ oids[position] = i + seqbase;
+ tmpCount[cell - SHRT_MIN]++;
+ }
+
+ GDKfree(tmpCount);
+
+ return MAL_SUCCEED;
+}
+
+//creates the pbsm index and stores it in a file (provided by the user)
+str pbsmIndex_bat(int *res, int *xBAT_id, int *yBAT_id, double* xmin, double*
ymin, double* xmax, double* ymax, char** filename1) {
+ BAT *xBAT=NULL, *yBAT=NULL;
+ double *x = NULL, *y=NULL;
+ char *idxFilename = NULL, *dataFilename = NULL;
+ char *idxEnding = ".idx";
+ char *dataEnding= ".dat";
+ FILE *f;
+ BUN n = 0;
+char* filename = *filename1;
+fprintf(stderr, "%s\n", filename);
+ //get the descriptors of the BATs
+ if ((xBAT = BATdescriptor(*xBAT_id)) == NULL) {
+ throw(MAL, "batgeom.pbsmIndex", RUNTIME_OBJECT_MISSING);
+ }
+ if ((yBAT = BATdescriptor(*yBAT_id)) == NULL) {
+ BBPreleaseref(xBAT->batCacheid);
+ throw(MAL, "batgeom.pbsmIndex", RUNTIME_OBJECT_MISSING);
+ }
+
+ //check if the BATs have dense heads and are aligned
+ if (!BAThdense(xBAT) || !BAThdense(yBAT)) {
+ BBPreleaseref(xBAT->batCacheid);
+ BBPreleaseref(yBAT->batCacheid);
+ return createException(MAL, "batgeom.pbsmIndex", "BATs must
have dense heads");
+ }
+ if(xBAT->hseqbase != yBAT->hseqbase || BATcount(xBAT) !=
BATcount(yBAT)) {
+ BBPreleaseref(xBAT->batCacheid);
+ BBPreleaseref(yBAT->batCacheid);
+ return createException(MAL, "batgeom.pbsmIndex", "BATs must be
aligned");
+ }
+
+ n = BATcount(xBAT);
+ *res = 0;
+
+ /* get direct access to the tail arrays */
+ x = (dbl*) Tloc(xBAT, BUNfirst(xBAT));
+ y = (dbl*) Tloc(yBAT, BUNfirst(yBAT));
+
+ //crete the index
+ if (PBSMcomputeindex2(x, y, n, *xmin, *xmax, *ymin, *ymax, 0) !=
MAL_SUCCEED) {
+ return createException(MAL, "batgeom.pbsmIndex", "Failed to
compute index (2).");
+ }
+ //delete the files for the index if they arleady exist
+ idxFilename = (char*)GDKmalloc(sizeof(char)*(strlen(filename)+5));
+ dataFilename = (char*)GDKmalloc(sizeof(char)*(strlen(filename)+5));
+ strcpy(idxFilename, filename);
+ strcpy(idxFilename+strlen(filename), idxEnding);
+ strcpy(dataFilename, filename);
+ strcpy(dataFilename+strlen(filename), dataEnding);
+
+
+ /* Save the index for future use (sloppiness acknowledged) */
+ if ((f = fopen(idxFilename, "wb"))) {
+ if (fwrite(pbsm_idx, sizeof(pbsm_idx[0]), USHRT_MAX,f) !=
USHRT_MAX) {
+ fclose(f);
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ GDKfree(idxFilename);
+ GDKfree(dataFilename);
+
+ throw(MAL, "batgeom.pbsmIndex", "Could not save the
PBSM index to disk (target: 20m-pbsm16.idx)");
+ }
+ fflush(f);
+ fclose(f);
+ }
+
+ if ((f = fopen(dataFilename, "wb"))) {
+ if (fwrite(oids, sizeof(oids[0]), n, f) != n) {
+ fclose(f);
+ GDKfree(pbsm_idx);
+ GDKfree(oids);
+ GDKfree(idxFilename);
+ GDKfree(dataFilename);
+
+ throw(MAL, "batgeom.pbsmIndex", "Could not save the
PBSM index to disk (target: 20m-pbsm16.dat)");
+ }
+ fflush(f);
+ fclose(f);
+ }
+
+ GDKfree(idxFilename);
+ GDKfree(dataFilename);
+
+
+ return MAL_SUCCEED;
+}
+
+
+
+static char *
+PBSMcreateindex (const dbl *x, const dbl *y, BUN n, double minx, double maxx,
double miny, double maxy, oid seqbase) {
+ FILE *f;
+ unsigned long i;
clock_t t = clock();
assert (pbsm_idx == NULL && oids == NULL);
@@ -1065,72 +1279,14 @@ PBSMcreateindex (const dbl *x, const dbl
}
/* No. Let's compute the grid! */
+
// version 1
- if((pbsm = BATnew(TYPE_void, TYPE_sht, n, TRANSIENT)) == NULL)
- throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
- cells = (sht*) Tloc(pbsm, BUNfirst(pbsm));
-
- // version 1: calculate the pbsm values
- for (i = 0; i < n; i++) {
- unsigned char cellx = ((x[i] - minx)/(maxx - minx))*UCHAR_MAX;
- unsigned char celly = ((y[i] - miny)/(maxy - miny))*UCHAR_MAX;
- cells[i] = ((((unsigned short) cellx) << shift)) | ((unsigned
short) celly);
- }
-
- // version 1: order the BAT according to the cell values
- /* set some properties */
- BATsetcount(pbsm,n);
- BATseqbase(pbsm, 0);
- pbsm->hsorted = 1;
- pbsm->hrevsorted = (BATcount(pbsm) <= 1);
- pbsm->tsorted = 0;
- pbsm->trevsorted = 0;
- pbsm->hdense = true;
- pbsm->tdense = false;
- BATmaterialize(pbsm);
- //BATderiveProps(pbsm, false);
- //BATassertProps(pbsm);
- pbsm = BATorder(BATmirror(pbsm));
- //BATprint(pbsm);
-
- // version 1: compress the head
- cells = (sht*) Hloc(pbsm, BUNfirst(pbsm));
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list