Changeset: eada3ae49019 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=eada3ae49019
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_imprints.c
gdk/gdk_sample.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
monetdb5/modules/mal/groupby.c
monetdb5/modules/mal/joinpath.c
monetdb5/modules/mal/sample.c
monetdb5/modules/mal/sample.h
monetdb5/modules/mal/sample.mal
sql/backends/monet5/sql_statistics.c
Branch: default
Log Message:
BATsample is now void headed everywhere.
diffs (284 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -177,7 +177,6 @@ BAT *BATreplace(BAT *b, BAT *n, bit forc
BAT *BATrevert(BAT *b);
BAT *BATroles(BAT *b, const char *hnme, const char *tnme);
BAT *BATsample(BAT *b, BUN n);
-BAT *BATsample_(BAT *b, BUN n);
BAT *BATsave(BAT *b);
BAT *BATselect(BAT *b, const void *tl, const void *th);
BAT *BATselect_(BAT *b, const void *tl, const void *th, bit li, bit hi);
@@ -799,7 +798,6 @@ str ALGstdevp(dbl *res, int *bid);
str ALGsubjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit
*nil_matches, lng *estimate);
str ALGsubleftjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit
*nil_matches, lng *estimate);
str ALGsubouterjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit
*nil_matches, lng *estimate);
-str ALGsubsample(int *result, int *bid, int *param);
str ALGsubselect1(bat *result, bat *bid, const void *low, const void *high,
const bit *li, const bit *hi, const bit *anti);
str ALGsubselect2(bat *result, bat *bid, bat *sid, const void *low, const void
*high, const bit *li, const bit *hi, const bit *anti);
str ALGsubslice_wrd(int *ret, bat *bid, wrd *start, wrd *end);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3356,7 +3356,6 @@ gdk_export BAT *BATintersectcand(BAT *a,
*
*/
gdk_export BAT *BATsample(BAT *b, BUN n);
-gdk_export BAT *BATsample_(BAT *b, BUN n); /* version that expects void head
and returns oids */
/*
*
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -568,7 +568,7 @@ BATimprints(BAT *b) {
}
#define SMP_SIZE 2048
- s = BATsample_(b, SMP_SIZE);
+ s = BATsample(b, SMP_SIZE);
smp = BATsubunique(b, s);
BBPunfix(s->batCacheid);
s = BATproject(smp,b);
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -48,6 +48,7 @@
* properties. The sample is without replacement.
*/
+/* BATsample implements sampling for void headed BATs */
BAT *
BATsample(BAT *b, BUN n)
{
@@ -55,76 +56,6 @@ BATsample(BAT *b, BUN n)
BUN cnt;
BATcheck(b, "BATsample");
- ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n");
- ALGODEBUG fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n",
n);
-
- cnt = BATcount(b);
- if (cnt <= n) {
- bn = BATcopy(b, b->htype, b->ttype, TRUE);
- } else {
- BUN top = cnt - n;
- BUN smp = n;
- BATiter iter = bat_iterator(b);
- BUN p = BUNfirst(b)-1;
- bn = BATnew(
- b->htype==TYPE_void &&
b->hseqbase!=oid_nil?TYPE_oid:b->htype,
- b->ttype==TYPE_void &&
b->tseqbase!=oid_nil?TYPE_oid:b->ttype,
- n);
- if (bn == NULL)
- return NULL;
- if (n == 0)
- return bn;
- while (smp-->1) { /* loop until all but 1 values are sampled */
- double v = DRAND;
- double quot = (double)top/(double)cnt;
- BUN jump = 0;
- while (quot > v) { /* determine how many positions to
jump */
- jump++;
- top--;
- cnt--;
- quot *= (double)top/(double)cnt;
- }
- p += (jump+1);
- cnt--;
- bunfastins(bn, BUNhead(iter, p), BUNtail(iter,p));
- }
- /* 1 left */
- p += (BUN) rand() % cnt;
- bunfastins(bn, BUNhead(iter, p+1), BUNtail(iter,p+1));
-
- /* property management */
- bn->hsorted = BAThordered(b);
- bn->tsorted = BATtordered(b);
- bn->hrevsorted = BAThrevordered(b);
- bn->trevsorted = BATtrevordered(b);
- bn->hdense = FALSE;
- bn->tdense = FALSE;
- BATkey(bn, BAThkey(b));
- BATkey(BATmirror(bn), BATtkey(b));
- bn->H->seq = b->H->seq;
- bn->T->seq = b->T->seq;
- bn->H->nil = b->H->nil;
- bn->T->nil = b->T->nil;
- bn->H->nonil = b->H->nonil;
- bn->T->nonil = b->T->nonil;
- BATsetcount(bn, n);
- }
-
- return bn;
-
-bunins_failed:
- BBPreclaim(bn);
- return NULL;
-}
-
-/* BATsample_ implements sampling for void headed BATs */
-BAT *
-BATsample_(BAT *b, BUN n)
-{
- BAT *bn;
- BUN cnt;
-
- BATcheck(b, "BATsample");
assert(BAThdense(b));
ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n");
ALGODEBUG fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n",
n);
diff --git a/monetdb5/modules/kernel/algebra.c
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -1396,12 +1396,6 @@ ALGsample(bat *result, bat *bid, int *pa
return ALGbinaryint(result, bid, param, BATsample, "algebra.sample");
}
-str
-ALGsubsample(bat *result, bat *bid, int *param)
-{
- return ALGbinaryint(result, bid, param, BATsample_,
"algebra.subsample");
-}
-
/* add items missing in the kernel */
str
ALGtunique(int *result, int *bid)
diff --git a/monetdb5/modules/kernel/algebra.h
b/monetdb5/modules/kernel/algebra.h
--- a/monetdb5/modules/kernel/algebra.h
+++ b/monetdb5/modules/kernel/algebra.h
@@ -105,7 +105,6 @@ algebra_export str ALGkdiff(int *result,
algebra_export str ALGtdifference(int *result, int *lid, int *rid);
algebra_export str ALGtdiff(int *result, int *lid, int *rid);
algebra_export str ALGsample(int *result, int* bid, int *param);
-algebra_export str ALGsubsample(int *result, int* bid, int *param);
algebra_export str ALGtsort(int *result, int *bid);
algebra_export str ALGtsort_rev(int *result, int *bid);
diff --git a/monetdb5/modules/kernel/algebra.mal
b/monetdb5/modules/kernel/algebra.mal
--- a/monetdb5/modules/kernel/algebra.mal
+++ b/monetdb5/modules/kernel/algebra.mal
@@ -215,13 +215,9 @@ address ALGlike
comment "Selects all elements that have 'substr' as in the tail.";
# @- Sampling
-command sample ( b:bat[:oid,:any_2], num:int ) :bat[:oid,:any_2]
+command sample ( b:bat[:oid,:any_1], num:int ) :bat[:oid,:oid]
address ALGsample
-comment "Produce a random selection of size 'num' from the input BAT.";
-
-command subsample(b:bat[:oid,:any_1], num:int ) :bat[:oid,:oid]
-address ALGsubsample
-comment "Return the oids of a random selection of size 'num' from the input
BAT.";
+comment "Returns the oids of a random selection of size 'num' from the input
BAT.";
# @+ BAT copying
command copy( b:bat[:any_1,:any_2]) :bat[:any_1,:any_2]
diff --git a/monetdb5/modules/mal/groupby.c b/monetdb5/modules/mal/groupby.c
--- a/monetdb5/modules/mal/groupby.c
+++ b/monetdb5/modules/mal/groupby.c
@@ -103,7 +103,7 @@ GROUPcollect( Client cntxt, MalBlkPtr mb
sample = BATcount(b) < 1000 ? BATcount(b): 1000;
bs = BATsample( b, sample);
if (bs) {
- bh = BATkunique(BATmirror(bs));
+ bh = BATsubunique(b, bs);
a->unique[a->last] = BATcount(bh);
if ( bh ) BBPreleaseref(bh->batCacheid);
}
diff --git a/monetdb5/modules/mal/joinpath.c b/monetdb5/modules/mal/joinpath.c
--- a/monetdb5/modules/mal/joinpath.c
+++ b/monetdb5/modules/mal/joinpath.c
@@ -72,10 +72,10 @@ ALGjoinCost(Client cntxt, BAT *l, BAT *r
/* The sampling method */
if(flag < 2 && ( lc > 100000 || rc > 100000)){
lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
- lsample= BATsample_(l,lsize);
+ lsample= BATsample(l,lsize);
BBPreclaim(lsample);
rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
- rsample= BATsample_(r,rsize);
+ rsample= BATsample(r,rsize);
BBPreclaim(rsample);
j= BATjoin(l,r, MAX(lsize,rsize));
lsize= BATcount(j);
diff --git a/monetdb5/modules/mal/sample.c b/monetdb5/modules/mal/sample.c
--- a/monetdb5/modules/mal/sample.c
+++ b/monetdb5/modules/mal/sample.c
@@ -83,27 +83,10 @@ SAMPLEuniform(bat *r, bat *b, ptr s) {
BAT *br, *bb;
if ((bb = BATdescriptor(*b)) == NULL) {
- throw(MAL, "sample.uniform", INTERNAL_BAT_ACCESS);
+ throw(MAL, "sample.subuniform", INTERNAL_BAT_ACCESS);
}
br = BATsample(bb,*(BUN *)s);
if (br == NULL)
- throw(MAL, "sample.uniform", OPERATION_FAILED);
-
- BBPunfix(bb->batCacheid);
- BBPkeepref(*r = br->batCacheid);
- return MAL_SUCCEED;
-
-}
-
-str
-SAMPLEsubuniform(bat *r, bat *b, ptr s) {
- BAT *br, *bb;
-
- if ((bb = BATdescriptor(*b)) == NULL) {
- throw(MAL, "sample.subuniform", INTERNAL_BAT_ACCESS);
- }
- br = BATsample_(bb,*(BUN *)s);
- if (br == NULL)
throw(MAL, "sample.subuniform", OPERATION_FAILED);
BBPunfix(bb->batCacheid);
diff --git a/monetdb5/modules/mal/sample.h b/monetdb5/modules/mal/sample.h
--- a/monetdb5/modules/mal/sample.h
+++ b/monetdb5/modules/mal/sample.h
@@ -42,9 +42,6 @@ sample_export str
SAMPLEuniform(bat *r, bat *b, ptr s);
sample_export str
-SAMPLEsubuniform(bat *r, bat *b, ptr s);
-
-sample_export str
SAMPLEuniform_dbl(bat *r, bat *b, ptr p);
#endif
diff --git a/monetdb5/modules/mal/sample.mal b/monetdb5/modules/mal/sample.mal
--- a/monetdb5/modules/mal/sample.mal
+++ b/monetdb5/modules/mal/sample.mal
@@ -23,14 +23,11 @@
module sample;
-command uniform(b:bat[:oid,:any],s:wrd):bat[:oid,:any]
+command uniform(b:bat[:oid,:any],s:wrd):bat[:oid,:oid]
address SAMPLEuniform
comment "Returns a uniform sample of size s"
-
-command subuniform(b:bat[:oid,:any],s:wrd):bat[:oid,:oid]
-address SAMPLEsubuniform
comment "Returns the oids of a uniform sample of size s"
-command uniform(b:bat[:oid,:any],p:dbl):bat[:oid,:any]
+command uniform(b:bat[:oid,:any],p:dbl):bat[:oid,:oid]
address SAMPLEuniform_dbl
-comment "Returns a uniform sample of size = (p x count(b)), where 0 <= p <=
1.0"
+comment "Returns the oids of a uniform sample of size = (p x count(b)), where
0 <= p <= 1.0"
diff --git a/sql/backends/monet5/sql_statistics.c
b/sql/backends/monet5/sql_statistics.c
--- a/sql/backends/monet5/sql_statistics.c
+++ b/sql/backends/monet5/sql_statistics.c
@@ -103,8 +103,8 @@ sql_analyze(Client cntxt, MalBlkPtr mb,
if (samplesize > 0) {
bsample = BATsample(bn,
(BUN) 25000);
} else
- bsample = bn;
- br = BATsubselect(bsample,
NULL, ATOMnilptr(bn->ttype), ATOMnilptr(bn->ttype), 0, 0, 0);
+ bsample = NULL;
+ br = BATsubselect(bn, bsample,
ATOMnilptr(bn->ttype), ATOMnilptr(bn->ttype), 0, 0, 0);
nils = BATcount(br);
BBPunfix(br->batCacheid);
if (bn->tkey)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list