Changeset: d001c325af71 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d001c325af71
Modified Files:
        geom/monetdb5/geom.h
        geom/monetdb5/geomPoints.c
Branch: geo
Log Message:

Kosti's PDBS algorithm added


diffs (truncated from 363 to 300 lines):

diff --git a/geom/monetdb5/geom.h b/geom/monetdb5/geom.h
--- a/geom/monetdb5/geom.h
+++ b/geom/monetdb5/geom.h
@@ -270,6 +270,7 @@ geom_export str wkbPointsDistance1_geom_
 geom_export str wkbPointsDistance2_geom_bat(bat* outBAT_id, wkb** geomWKB, 
bat* xBAT_id, bat* yBAT_id, int* srid);
 
 geom_export str wkbFilterWithImprints_geom_bat(bat*, wkb**, bat*, bat*);
+geom_export str wkbFilterWithPBSM_geom_bat(bat*, wkb**, bat*, bat*);
 
 geom_export int isLeft( double P0x, double P0y, double P1x, double P1y, double 
P2x, double P2y);
 
diff --git a/geom/monetdb5/geomPoints.c b/geom/monetdb5/geomPoints.c
--- a/geom/monetdb5/geomPoints.c
+++ b/geom/monetdb5/geomPoints.c
@@ -23,6 +23,15 @@
 #include "geom.h"
 #include "time.h"
 
+typedef struct pbsm_ptr {
+       BUN offset;
+       unsigned long count;
+} pbsm_ptr;
+
+static pbsm_ptr *pbsm_idx = NULL;
+static oid *oids = NULL;
+
+
 //it gets two BATs with x,y coordinates and returns a new BAT with the points
 static BAT* BATMakePoint2D(BAT* xBAT, BAT* yBAT) {
        BAT *outBAT = NULL;
@@ -993,4 +1002,332 @@ str wkbFilterWithImprints_geom_bat(bat* 
 }
 
 
+/* PBSM */
 
+static char *
+PBSMcreateindex (const dbl *x, const dbl *y, BUN n, double minx, double maxx, 
double miny, double maxy) {
+       FILE *f;
+       BAT *pbsm;
+        sht *cells;
+       unsigned long i;
+       int shift = sizeof(sht) * 8 / 2;
+        sht prevCell, cell;
+        unsigned long m = 0, prevO;
+       clock_t t = clock();
+
+       assert (pbsm_idx == NULL && oids == NULL);
+       
+       if ((pbsm_idx = GDKmalloc(USHRT_MAX * sizeof(pbsm_ptr))) == NULL)
+               throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+
+       for (i = 0; i < USHRT_MAX; i++) {
+               pbsm_idx[i].count = 0;
+               pbsm_idx[i].offset = 0;
+       }
+
+       /* have we precomputed the grid? */
+       if ((f = fopen("20m-pbsm16.idx", "rb"))) {
+               if (fread(pbsm_idx, sizeof(pbsm_idx[0]), USHRT_MAX, f) != 
USHRT_MAX) {
+                       fclose(f);
+                       GDKfree(pbsm_idx);
+                       throw(MAL, "batpbsm.contains16", "Could not read the 
PBSM index from disk (source: 20m-pbsm16.idx)");
+               }
+               fclose(f);
+
+               if ((oids = GDKmalloc(n * sizeof(oid))) == NULL) {
+                       GDKfree(pbsm_idx);
+                       throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+               }
+
+               if ((f = fopen("20m-pbsm16.dat", "rb"))) {
+                       if (fread(oids, sizeof(oids[0]), n, f) != n) {
+                               fclose(f);
+                               GDKfree(pbsm_idx);
+                               GDKfree(oids);
+                               throw(MAL, "batpbsm.contains16", "Could not 
read the PBSM index from disk (source: 20m-pbsm16.dat)");
+                       }
+
+                       fclose(f);
+
+                       t = clock() - t;
+                       fprintf(stderr, "[PBSM] Index loading: %d clicks - %f 
seconds\n", (unsigned int)t, ((float)t)/CLOCKS_PER_SEC);
+
+                       return MAL_SUCCEED;
+               } else {
+                       GDKfree(pbsm_idx);
+                       GDKfree(oids);
+                       throw(MAL, "batpbsm.contains16", "Could not read the 
PBSM index (.dat).");
+               }
+       } 
+
+       /* No. Let's compute the grid! */
+       // version 1
+       if((pbsm = BATnew(TYPE_void, TYPE_sht, n, TRANSIENT)) == NULL)
+               throw(MAL, "pbsm.createindex", MAL_MALLOC_FAIL);
+       cells = (sht*) Tloc(pbsm, BUNfirst(pbsm));
+       
+       // version 1: calculate the pbsm values
+       for (i = 0; i < n; i++) {
+               unsigned char cellx = ((x[i] - minx)/(maxx - minx))*UCHAR_MAX;
+                unsigned char celly = ((y[i] - miny)/(maxy - miny))*UCHAR_MAX;
+               cells[i] = ((((unsigned short) cellx) << shift)) | ((unsigned 
short) celly);
+       }
+
+       // version 1: order the BAT according to the cell values
+       /* set some properties */
+       BATsetcount(pbsm,n);
+       BATseqbase(pbsm, 0);
+       pbsm->hsorted = 1; 
+       pbsm->hrevsorted = (BATcount(pbsm) <= 1);
+       pbsm->tsorted = 0;
+       pbsm->trevsorted = 0;
+       pbsm->hdense = true;
+       pbsm->tdense = false;
+       BATmaterialize(pbsm);
+       //BATderiveProps(pbsm, false);
+       //BATassertProps(pbsm);
+       pbsm = BATorder(BATmirror(pbsm));
+       //BATprint(pbsm);
+       
+       // version 1: compress the head
+        cells = (sht*) Hloc(pbsm, BUNfirst(pbsm));
+       oids  = (oid*) Tloc(pbsm, BUNfirst(pbsm));
+
+        prevCell = cells[0];
+        cell = cells[0];
+        m = 0;
+        prevO = 0;
+        for (i = 0; i < n; i++) {
+                cell = cells[i];
+
+                if (cell == prevCell) {
+                        m++;
+                } else {
+                        pbsm_idx[cell - SHRT_MIN - 1].offset = prevO;
+                        pbsm_idx[cell - SHRT_MIN - 1].count = m;
+                        prevCell = cell;
+                        prevO = i;
+                        m = 1;
+                }
+        }
+        pbsm_idx[cell - SHRT_MIN - 1].offset = prevO;
+        pbsm_idx[cell - SHRT_MIN - 1].count = m;
+       
+       // version 1: clean up
+       pbsm->T->heap.base = NULL; // need to keep the oids array
+        BBPreleaseref(pbsm->batCacheid);       
+
+       
+       // version 2
+       // do one pass to compute the counts needed
+       // do second pass to assign oid at the appropriate places
+
+       /* 
+        * 
+        * TODO 
+        *
+        */
+
+       t = clock() - t;
+       fprintf(stderr, "[PBSM] Index population: %d clicks - %f seconds\n", 
(unsigned int)t, ((float)t)/CLOCKS_PER_SEC);
+
+       /* Save the index for future use (sloppiness acknowledged) */
+       if ((f = fopen("20m-pbsm16.idx", "wb"))) {
+               if (fwrite(pbsm_idx, sizeof(pbsm_idx[0]), USHRT_MAX,f) != 
USHRT_MAX) {
+                       fclose(f);
+                       GDKfree(pbsm_idx);
+                       GDKfree(oids);
+                       throw(MAL, "batpbsm.contains16", "Could not save the 
PBSM index to disk (target: 20m-pbsm16.idx)");
+               }
+                fflush(f);
+                fclose(f);
+       }
+
+       if ((f = fopen("20m-pbsm16.dat", "wb"))) {
+               if (fwrite(oids, sizeof(oids[0]), n, f) != n) {
+                       fclose(f);
+                       GDKfree(pbsm_idx);
+                       GDKfree(oids);
+                       throw(MAL, "batpbsm.contains16", "Could not save the 
PBSM index to disk (target: 20m-pbsm16.dat)");
+               }
+                fflush(f);
+                fclose(f);
+       }
+
+       return MAL_SUCCEED;
+}
+
+static char *
+PBSMarraycontains16(BAT **bres, const dbl *x, BAT *batx, const dbl *y,  BAT 
*baty, mbr *mbb, BUN n, double minx, double maxx, double miny, double maxy) {
+       unsigned long csize = 0, u;
+       oid *candidates;
+       unsigned char mbrcellxmin, mbrcellxmax, mbrcellymin, mbrcellymax, k,l;
+       int shift = sizeof(sht) * 8 / 2;
+       unsigned long i;
+       str msg;
+
+        /* assert calling sanity */
+        assert(*bres != NULL && x != NULL && y != NULL && batx != NULL && baty 
!= NULL);
+       
+       /* read the pbsm index to memory */
+       if (pbsm_idx == NULL || oids == NULL) {
+               /* the index has not been materialized/loaded yet */
+               if ((msg = PBSMcreateindex(x, y, n, minx, maxx, miny, maxy)) != 
MAL_SUCCEED)
+                       return msg;
+       }
+
+       /* generate a pbsm value from the geometry */
+       mbrcellxmin = (unsigned char)((mbb->xmin - minx)/(maxx - minx) * 
UCHAR_MAX);
+       mbrcellxmax = (unsigned char)((mbb->xmax - minx)/(maxx - minx) * 
UCHAR_MAX);
+       mbrcellymin = (unsigned char)((mbb->ymin - miny)/(maxy - miny) * 
UCHAR_MAX);
+       mbrcellymax = (unsigned char)((mbb->ymax - miny)/(maxy - miny) * 
UCHAR_MAX);
+
+       csize = 0;
+       for (k = mbrcellxmin; k <= mbrcellxmax; k++) {
+               for (l = mbrcellymin; l <= mbrcellymax; l++) {
+                       sht mbrc = ((((unsigned short) k) << shift)) | 
((unsigned short) l);
+                       //sht mbrc = ((((sht) k) << shift)) | ((sht) l);
+                       csize += pbsm_idx[mbrc - SHRT_MIN].count;
+               }
+       }
+
+       /* get candidate oid from the pbsm index */
+       if ((candidates = GDKmalloc(csize * sizeof(oid))) == NULL)
+               throw(MAL, "batpbsm.contains16", MAL_MALLOC_FAIL);
+       i = 0;
+       for (k = mbrcellxmin; k <= mbrcellxmax; k++) {
+               for (l = mbrcellymin; l <= mbrcellymax; l++) {
+                       sht mbrc = ((((unsigned short) k) << shift)) | 
((unsigned short) l);
+                       unsigned short mcell = mbrc - SHRT_MIN;
+                       
+                       for (u = 0; u < pbsm_idx[mcell].count; u++) {
+                               //fprintf(stderr,"[PBSM] Copying cell %d 
(offset %ld, count %ld)\n", mcell,pbsm_idx[mcell].offset,pbsm_idx[mcell].count);
+                               candidates[i] = oids[pbsm_idx[mcell].offset + 
u];
+                               i++;
+                       }
+
+               }
+       }
+
+       assert(BAThdense(batx) && BAThdense(baty));
+
+       if ((*bres = BATnew(TYPE_void, TYPE_oid, csize, TRANSIENT)) == NULL) {
+               throw(MAL, "batpbsm.contains16", MAL_MALLOC_FAIL);
+       }
+
+       for (i = 0; i < csize; i++) {
+               oid *o = &(candidates[i]);
+               BUNfastins(*bres, NULL, o);
+       }
+
+       /* candidates are expected to be ordered */
+       BATseqbase(*bres, oid_nil); // avoid materialization of the void head
+       *bres = BATmirror(BATorder(BATmirror(*bres)));
+       BATseqbase(*bres, 0);
+
+       //BATkey(BATmirror(*bres), TRUE);
+       (*bres)->hdense = 1;
+       (*bres)->hsorted = 1;
+       (*bres)->tsorted = 1;
+       //(*bres)->tkey = TRUE;
+       BATderiveProps(*bres, false);
+       
+       /* clean up */
+       //GDKfree(pbsm_idx);
+       //GDKfree(oids);
+
+        return MAL_SUCCEED;
+}
+
+static char *
+PBSMselect_(BAT **ret, BAT *bx, BAT *by, mbr *g, 
+          double *minx, double *maxx, double *miny, double *maxy)
+{
+       BAT *bres = NULL;
+       BUN n;
+       char *msg = NULL;
+       dbl *x = NULL, *y = NULL;
+
+       assert (ret != NULL);
+        assert (bx != NULL && by != NULL);
+
+       n = BATcount(bx);
+
+       if (bx->ttype != by->ttype)
+               throw(MAL, "batpbsm.contains16", "tails of input BATs must be 
identical");
+
+       /* get direct access to the tail arrays */
+        x = (dbl*) Tloc(bx, BUNfirst(bx));
+        y = (dbl*) Tloc(by, BUNfirst(by));
+
+       /* allocate result BAT */
+       bres = BATnew(TYPE_void, TYPE_oid, n, TRANSIENT);
+       if (bres == NULL)
+               throw(MAL, "batpbsm.contains16", MAL_MALLOC_FAIL);
+
+       msg = PBSMarraycontains16( &bres, x, bx, y , by, g, n, *minx, *maxx, 
*miny, *maxy);
+
+       if (msg != MAL_SUCCEED) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to