Changeset: eada3ae49019 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=eada3ae49019
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_imprints.c
        gdk/gdk_sample.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/algebra.h
        monetdb5/modules/kernel/algebra.mal
        monetdb5/modules/mal/groupby.c
        monetdb5/modules/mal/joinpath.c
        monetdb5/modules/mal/sample.c
        monetdb5/modules/mal/sample.h
        monetdb5/modules/mal/sample.mal
        sql/backends/monet5/sql_statistics.c
Branch: default
Log Message:

BATsample is now void headed everywhere.


diffs (284 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -177,7 +177,6 @@ BAT *BATreplace(BAT *b, BAT *n, bit forc
 BAT *BATrevert(BAT *b);
 BAT *BATroles(BAT *b, const char *hnme, const char *tnme);
 BAT *BATsample(BAT *b, BUN n);
-BAT *BATsample_(BAT *b, BUN n);
 BAT *BATsave(BAT *b);
 BAT *BATselect(BAT *b, const void *tl, const void *th);
 BAT *BATselect_(BAT *b, const void *tl, const void *th, bit li, bit hi);
@@ -799,7 +798,6 @@ str ALGstdevp(dbl *res, int *bid);
 str ALGsubjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit 
*nil_matches, lng *estimate);
 str ALGsubleftjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit 
*nil_matches, lng *estimate);
 str ALGsubouterjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit 
*nil_matches, lng *estimate);
-str ALGsubsample(int *result, int *bid, int *param);
 str ALGsubselect1(bat *result, bat *bid, const void *low, const void *high, 
const bit *li, const bit *hi, const bit *anti);
 str ALGsubselect2(bat *result, bat *bid, bat *sid, const void *low, const void 
*high, const bit *li, const bit *hi, const bit *anti);
 str ALGsubslice_wrd(int *ret, bat *bid, wrd *start, wrd *end);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3356,7 +3356,6 @@ gdk_export BAT *BATintersectcand(BAT *a,
  *
  */
 gdk_export BAT *BATsample(BAT *b, BUN n);
-gdk_export BAT *BATsample_(BAT *b, BUN n); /* version that expects void head 
and returns oids */
 
 /*
  *
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -568,7 +568,7 @@ BATimprints(BAT *b) {
                }
 
 #define SMP_SIZE 2048
-               s = BATsample_(b, SMP_SIZE);
+               s = BATsample(b, SMP_SIZE);
                smp = BATsubunique(b, s);
                BBPunfix(s->batCacheid);
                s = BATproject(smp,b);
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -48,6 +48,7 @@
  * properties. The sample is without replacement.
  */
 
+/* BATsample implements sampling for void headed BATs */
 BAT *
 BATsample(BAT *b, BUN n)
 {
@@ -55,76 +56,6 @@ BATsample(BAT *b, BUN n)
        BUN cnt;
 
        BATcheck(b, "BATsample");
-       ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n");
-       ALGODEBUG fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n", 
n);
-
-       cnt = BATcount(b);
-       if (cnt <= n) {
-               bn = BATcopy(b, b->htype, b->ttype, TRUE);
-       } else {
-               BUN top = cnt - n;
-               BUN smp = n;
-               BATiter iter = bat_iterator(b);
-               BUN p = BUNfirst(b)-1;
-               bn = BATnew(
-                       b->htype==TYPE_void && 
b->hseqbase!=oid_nil?TYPE_oid:b->htype,
-                       b->ttype==TYPE_void && 
b->tseqbase!=oid_nil?TYPE_oid:b->ttype,
-                       n);
-               if (bn == NULL)
-                       return NULL;
-               if (n == 0)
-                       return bn;
-               while (smp-->1) { /* loop until all but 1 values are sampled */
-                       double v = DRAND;
-                       double quot = (double)top/(double)cnt;
-                       BUN jump = 0;
-                       while (quot > v) { /* determine how many positions to 
jump */
-                               jump++;
-                               top--;
-                               cnt--;
-                               quot *= (double)top/(double)cnt;
-                       }
-                       p += (jump+1);
-                       cnt--;
-                       bunfastins(bn, BUNhead(iter, p), BUNtail(iter,p));
-               }
-               /* 1 left */
-               p += (BUN) rand() % cnt;
-               bunfastins(bn, BUNhead(iter, p+1), BUNtail(iter,p+1));
-
-               /* property management */
-               bn->hsorted = BAThordered(b);
-               bn->tsorted = BATtordered(b);
-               bn->hrevsorted = BAThrevordered(b);
-               bn->trevsorted = BATtrevordered(b);
-               bn->hdense = FALSE;
-               bn->tdense = FALSE;
-               BATkey(bn, BAThkey(b));
-               BATkey(BATmirror(bn), BATtkey(b));
-               bn->H->seq = b->H->seq;
-               bn->T->seq = b->T->seq;
-               bn->H->nil = b->H->nil;
-               bn->T->nil = b->T->nil;
-               bn->H->nonil = b->H->nonil;
-               bn->T->nonil = b->T->nonil;
-               BATsetcount(bn, n);
-       }
-
-       return bn;
-
-bunins_failed:
-       BBPreclaim(bn);
-       return NULL;
-}
-
-/* BATsample_ implements sampling for void headed BATs */
-BAT *
-BATsample_(BAT *b, BUN n)
-{
-       BAT *bn;
-       BUN cnt;
-
-       BATcheck(b, "BATsample");
        assert(BAThdense(b));
        ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n");
        ALGODEBUG fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n", 
n);
diff --git a/monetdb5/modules/kernel/algebra.c 
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -1396,12 +1396,6 @@ ALGsample(bat *result, bat *bid, int *pa
        return ALGbinaryint(result, bid, param, BATsample, "algebra.sample");
 }
 
-str
-ALGsubsample(bat *result, bat *bid, int *param)
-{
-       return ALGbinaryint(result, bid, param, BATsample_, 
"algebra.subsample");
-}
-
 /* add items missing in the kernel */
 str
 ALGtunique(int *result, int *bid)
diff --git a/monetdb5/modules/kernel/algebra.h 
b/monetdb5/modules/kernel/algebra.h
--- a/monetdb5/modules/kernel/algebra.h
+++ b/monetdb5/modules/kernel/algebra.h
@@ -105,7 +105,6 @@ algebra_export str ALGkdiff(int *result,
 algebra_export str ALGtdifference(int *result, int *lid, int *rid);
 algebra_export str ALGtdiff(int *result, int *lid, int *rid);
 algebra_export str ALGsample(int *result, int* bid, int *param);
-algebra_export str ALGsubsample(int *result, int* bid, int *param);
 
 algebra_export str ALGtsort(int *result, int *bid);
 algebra_export str ALGtsort_rev(int *result, int *bid);
diff --git a/monetdb5/modules/kernel/algebra.mal 
b/monetdb5/modules/kernel/algebra.mal
--- a/monetdb5/modules/kernel/algebra.mal
+++ b/monetdb5/modules/kernel/algebra.mal
@@ -215,13 +215,9 @@ address ALGlike
 comment "Selects all elements that have 'substr' as in the tail.";
 
 # @- Sampling
-command sample ( b:bat[:oid,:any_2], num:int ) :bat[:oid,:any_2]
+command sample ( b:bat[:oid,:any_1], num:int ) :bat[:oid,:oid]
 address ALGsample
-comment "Produce a random selection of size 'num' from the input BAT.";
-
-command subsample(b:bat[:oid,:any_1], num:int ) :bat[:oid,:oid]
-address ALGsubsample
-comment "Return the oids of a random selection of size 'num' from the input 
BAT.";
+comment "Returns the oids of a random selection of size 'num' from the input 
BAT.";
 
 # @+ BAT copying
 command copy( b:bat[:any_1,:any_2]) :bat[:any_1,:any_2]
diff --git a/monetdb5/modules/mal/groupby.c b/monetdb5/modules/mal/groupby.c
--- a/monetdb5/modules/mal/groupby.c
+++ b/monetdb5/modules/mal/groupby.c
@@ -103,7 +103,7 @@ GROUPcollect( Client cntxt, MalBlkPtr mb
                sample = BATcount(b) < 1000 ? BATcount(b): 1000;
                bs = BATsample( b, sample);
                if (bs) {
-                       bh = BATkunique(BATmirror(bs));
+                       bh = BATsubunique(b, bs);
                        a->unique[a->last] = BATcount(bh);
                        if ( bh ) BBPreleaseref(bh->batCacheid);
                }
diff --git a/monetdb5/modules/mal/joinpath.c b/monetdb5/modules/mal/joinpath.c
--- a/monetdb5/modules/mal/joinpath.c
+++ b/monetdb5/modules/mal/joinpath.c
@@ -72,10 +72,10 @@ ALGjoinCost(Client cntxt, BAT *l, BAT *r
        /* The sampling method */
        if(flag < 2 && ( lc > 100000 || rc > 100000)){
                lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
-               lsample= BATsample_(l,lsize);
+               lsample= BATsample(l,lsize);
                BBPreclaim(lsample);
                rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
-               rsample= BATsample_(r,rsize);
+               rsample= BATsample(r,rsize);
                BBPreclaim(rsample);
                j= BATjoin(l,r, MAX(lsize,rsize));
                lsize= BATcount(j);
diff --git a/monetdb5/modules/mal/sample.c b/monetdb5/modules/mal/sample.c
--- a/monetdb5/modules/mal/sample.c
+++ b/monetdb5/modules/mal/sample.c
@@ -83,27 +83,10 @@ SAMPLEuniform(bat *r, bat *b, ptr s) {
        BAT *br, *bb;
 
        if ((bb = BATdescriptor(*b)) == NULL) {
-               throw(MAL, "sample.uniform", INTERNAL_BAT_ACCESS);
+               throw(MAL, "sample.subuniform", INTERNAL_BAT_ACCESS);
        }
        br = BATsample(bb,*(BUN *)s);
        if (br == NULL)
-               throw(MAL, "sample.uniform", OPERATION_FAILED);
-
-       BBPunfix(bb->batCacheid);
-       BBPkeepref(*r = br->batCacheid);
-       return MAL_SUCCEED;
-
-}
-
-str
-SAMPLEsubuniform(bat *r, bat *b, ptr s) {
-       BAT *br, *bb;
-
-       if ((bb = BATdescriptor(*b)) == NULL) {
-               throw(MAL, "sample.subuniform", INTERNAL_BAT_ACCESS);
-       }
-       br = BATsample_(bb,*(BUN *)s);
-       if (br == NULL)
                throw(MAL, "sample.subuniform", OPERATION_FAILED);
 
        BBPunfix(bb->batCacheid);
diff --git a/monetdb5/modules/mal/sample.h b/monetdb5/modules/mal/sample.h
--- a/monetdb5/modules/mal/sample.h
+++ b/monetdb5/modules/mal/sample.h
@@ -42,9 +42,6 @@ sample_export str
 SAMPLEuniform(bat *r, bat *b, ptr s);
 
 sample_export str
-SAMPLEsubuniform(bat *r, bat *b, ptr s);
-
-sample_export str
 SAMPLEuniform_dbl(bat *r, bat *b, ptr p);
 
 #endif
diff --git a/monetdb5/modules/mal/sample.mal b/monetdb5/modules/mal/sample.mal
--- a/monetdb5/modules/mal/sample.mal
+++ b/monetdb5/modules/mal/sample.mal
@@ -23,14 +23,11 @@
 
 module sample;
 
-command uniform(b:bat[:oid,:any],s:wrd):bat[:oid,:any]
+command uniform(b:bat[:oid,:any],s:wrd):bat[:oid,:oid]
 address SAMPLEuniform
 comment "Returns a uniform sample of size s"
-
-command subuniform(b:bat[:oid,:any],s:wrd):bat[:oid,:oid]
-address SAMPLEsubuniform
 comment "Returns the oids of a uniform sample of size s"
 
-command uniform(b:bat[:oid,:any],p:dbl):bat[:oid,:any]
+command uniform(b:bat[:oid,:any],p:dbl):bat[:oid,:oid]
 address SAMPLEuniform_dbl
-comment "Returns a uniform sample of size = (p x count(b)), where 0 <= p <= 
1.0"
+comment "Returns the oids of a uniform sample of size = (p x count(b)), where 
0 <= p <= 1.0"
diff --git a/sql/backends/monet5/sql_statistics.c 
b/sql/backends/monet5/sql_statistics.c
--- a/sql/backends/monet5/sql_statistics.c
+++ b/sql/backends/monet5/sql_statistics.c
@@ -103,8 +103,8 @@ sql_analyze(Client cntxt, MalBlkPtr mb, 
                                                if (samplesize > 0) {
                                                        bsample = BATsample(bn, 
(BUN) 25000);
                                                } else
-                                                       bsample = bn;
-                                               br = BATsubselect(bsample, 
NULL, ATOMnilptr(bn->ttype), ATOMnilptr(bn->ttype), 0, 0, 0);
+                                                       bsample = NULL;
+                                               br = BATsubselect(bn, bsample, 
ATOMnilptr(bn->ttype), ATOMnilptr(bn->ttype), 0, 0, 0);
                                                nils = BATcount(br);
                                                BBPunfix(br->batCacheid);
                                                if (bn->tkey)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to