Changeset: aaaebd745aa0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=aaaebd745aa0
Modified Files:
        gdk/gdk_sample.c
Branch: stratified_sampling
Log Message:

remove debug output, improve comments


diffs (118 lines):

diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -64,7 +64,7 @@
        } while (0)
 
 
-//CUSTOM SWAP AND COMPARE FUNCTION
+/* CUSTOM SWAP AND COMPARE FUNCTION */
 #define SWAP3(p1, p2)                          \
        do {                                    \
                item = oids[p1];                \
@@ -243,11 +243,11 @@ BATsample(BAT *b, BUN n)
 /* based on Alg-A-exp from 'Weighted random sampling with a reservoir' by 
Efraimidis and Spirakis (2006) */
 BAT *
 BATweightedsample(BAT *b, BUN n, BAT *w)
-{//TODO: does not create correct samples yet (so it seems! or is the output 
broken?)
+{//TODO test correctness extensively
        BAT* sample;
-       oid* oids;//points to the oids in sample
+       oid* oids;/* points to the oids in sample */
        dbl* w_ptr;//TODO types of w
-       dbl* keys;//keys as defined in Alg-A-exp
+       dbl* keys;/* keys as defined in Alg-A-exp */
        BUN cnt, i, j;
        mtwist *mt_rng;
        BUN pos, childpos;
@@ -263,7 +263,7 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
                                        "BATsample: type of weights not 
castable to doubles\n", NULL);
        ERRORcheck(w->ttype != TYPE_dbl,
                                        "BATsample: type of weights must be 
doubles\n", NULL);//TODO types of w (want to remove this)
-
+       //TODO: handle NULL values in w_ptr
        cnt = BATcount(b);
 
        keys = (dbl*) malloc(sizeof(dbl)*n);
@@ -276,67 +276,49 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
                return NULL;
        }
 
-
-       //oids = (oid *)sample->theap.base;
        oids = (oid *) Tloc(sample, 0);
-       w_ptr = (dbl*) Tloc(w, 0);//TODO is this the right way to get w_ptr?
+       w_ptr = (dbl*) Tloc(w, 0);
 
        mt_rng = mtwist_new();
        mtwist_seed(mt_rng, rand());
 
        BATsetcount(sample, n);
                /* obtain sample */
-       //TODO: reservoir sampling with exponential jumps
-       //Initialize prioqueue
-       i=0;//i indices the initial sample (filled with elements with non-zero 
weight)
-               //j indices the oids and weights
+
+       /* Initialize prioqueue */
+       i=0;/* i indices the initial sample (filled with elements with non-zero 
weight) */
+               /* j indices the oids and weights */
        for(j=0; i < n && j < cnt; j++) {
                if(w_ptr[j] == 0.0)
                        continue;
                oids[i] = (oid)(j+minoid);
-               keys[i] = pow(mtwist_drand(mt_rng),1.0/w_ptr[j]);//TODO cast 
1.0 to dbl? NULL values in w_ptr
+               keys[i] = pow(mtwist_drand(mt_rng),1.0/w_ptr[j]);//TODO cast 
1.0 to dbl?
                i++;
        }
-       if(i < n)//not enough non-zero weights
+       if(i < n)/* not enough non-zero weights: cannot take sample */
                return NULL;
-       heapify(compKeysGT, SWAP3);//NOTE: writes to 'i'
 
-       fprintf(stderr,"\n\n ID | KEY   (INITIAL HEAP)\n");
-       fprintf(stderr,"----+----\n");
-       for(i=0; i<n; i++)
-               fprintf(stderr," %2d | %.2f \n", (int)(oids[i]-minoid), 
keys[i]);
+       heapify(compKeysGT, SWAP3);
 
        while(true) {
                r = mtwist_drand(mt_rng);
                xw = log(r)/log(keys[0]);
-               fprintf(stderr,"THIS SHOULD BE POSITIVE: xw = log(r = 
%f)/log(keys[0] = %f) = %f\n",r,keys[0],xw);
-               for(;j<cnt && xw > w_ptr[j]; j++) {
+               for(;j<cnt && xw > w_ptr[j]; j++)
                        xw -= w_ptr[j];
-                       fprintf(stderr,"j = %d, xw = %f (just subtracted 
%f)\n", (int)j, xw, w_ptr[j]);
-               }
                if(j >= cnt) break;
-               //At this point, w_ptr[c]+w_ptr[c+1]+...+w_ptr[i-1] < xw < 
w_ptr[c]+w_ptr[c+1]+...+w_ptr[i]
+
+               /* At this point, w_ptr[c]+w_ptr[c+1]+...+w_ptr[i-1] < xw < 
w_ptr[c]+w_ptr[c+1]+...+w_ptr[i] */
                tw = pow(keys[0], w_ptr[j]);
                r2 = mtwist_drand(mt_rng)*(1-tw)+tw;
                key = pow(r2, 1/w_ptr[j]);
 
-               //Replace element with lowest key in prioqueue
+               /* Replace element with lowest key in prioqueue */
                oids[0] = (oid)(j+minoid);
                keys[0] = key;
-               siftup(compKeysGT, 0, SWAP3);//NOTE: writes to 'key'
-
-               fprintf(stderr,"\n\n ID | KEY   (<%d,%.2f> INSERTED)\n", 
(int)j, key);
-               fprintf(stderr,"----+----\n");
-               for(i=0; i<n; i++)
-                       fprintf(stderr," %2d | %.2f \n", (int)(oids[i]-minoid), 
keys[i]);
+               siftup(compKeysGT, 0, SWAP3);
 
-               //We have added the jth element, can continue
-               j++;
+               j++;/* Increment j so j=c (c is defined in Alg-A-exp) */
        }
-       fprintf(stderr,"\n\n ID | KEY   (FINAL RESULT)\n");
-       fprintf(stderr,"----+----\n");
-       for(i=0; i<n; i++)
-               fprintf(stderr," %2d | %.2f \n", (int)(oids[i]-minoid), 
keys[i]);
 
        free(keys);
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to