Changeset: 0c1c50f08544 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0c1c50f08544
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_select.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
Branch: compressedcandidates
Log Message:
Prepare for bit-vector producing scanselect
All scans should produce a bitvector which may be turned into
an expanded oid list if the storage of the oid list << bitvector size
diffs (168 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -526,10 +526,10 @@ Ready.
[ "algebra", "sort", "command algebra.sort(b:bat[:any_1], o:bat[:oid],
g:bat[:oid], reverse:bit, stable:bit) (X_0:bat[:any_1], X_1:bat[:oid],
X_2:bat[:oid]) ", "ALGsort33;", "Returns a copy of the BAT sorted on
tail values, a BAT that specifies\n how the input was reordered, and a
BAT with group information.\n The input and output are (must be) dense
headed.\n The order is descending if the reverse bit is set.\n\t\t This
is a stable sort if the stable bit is set." ]
[ "algebra", "subslice", "command algebra.subslice(b:bat[:any_1], x:lng,
y:lng):bat[:oid] ", "ALGsubslice_lng;", "Return the oids of the slice
with the BUNs at position x till y." ]
[ "algebra", "thetajoin", "command algebra.thetajoin(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], op:int, nil_matches:bit,
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGthetajoin;",
"Theta join with candidate lists" ]
+[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
val:any_1, op:str):bat[:msk] ", "ALGthetaselect1;", "" ]
[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
val:any_1, op:str):bat[:oid] ", "ALGthetaselect1;", "Select all head
values for which the tail value obeys the relation\n\tvalue OP VAL.\n\tInput is
a dense-headed BAT, output is a dense-headed BAT with in\n\tthe tail the head
value of the input BAT for which the\n\trelationship holds. The output BAT is
sorted on the tail value." ]
+[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
s:bat[:oid], val:any_1, op:str):bat[:msk] ", "ALGthetaselect2;", ""
]
[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
s:bat[:oid], val:any_1, op:str):bat[:oid] ", "ALGthetaselect2;",
"Select all head values of the first input BAT for which the tail
value\n\tobeys the relation value OP VAL and for which the head value occurs
in\n\tthe tail of the second input BAT.\n\tInput is a dense-headed BAT, output
is a dense-headed BAT with in\n\tthe tail the head value of the input BAT for
which the\n\trelationship holds. The output BAT is sorted on the tail value."
]
-[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
val:any_1, op:str):bat[:msk] ", "ALGthetaselectMsk1;", "" ]
-[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
s:bat[:oid], val:any_1, op:str):bat[:msk] ", "ALGthetaselectMsk2;", ""
]
[ "algebra", "unique", "command
algebra.unique(b:bat[:any_1]):bat[:oid] ", "ALGunique1;", "Select all
unique values from the tail of the input.\n\tInput is a dense-headed BAT,
output is a dense-headed BAT with\n\tin the tail the head value of the input
BAT that was selected.\n\tThe output BAT is sorted on the tail value." ]
[ "algebra", "unique", "command algebra.unique(b:bat[:any_1],
s:bat[:oid]):bat[:oid] ", "ALGunique2;", "Select all unique values from
the tail of the first input.\n\tInput is a dense-headed BAT, the second input
is a\n\tdense-headed BAT with sorted tail, output is a dense-headed\n\tBAT with
in the tail the head value of the input BAT that was\n\tselected. The output
BAT is sorted on the tail value. The\n\tsecond input BAT is a list of
candidates." ]
[ "bam", "bam_drop_file", "pattern bam.bam_drop_file(file_id:lng,
dbschema:sht):void ", "bam_drop_file;", "Drop alignment tables and
header data for the bam file with the given file_id" ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -630,10 +630,10 @@ Ready.
[ "algebra", "sort", "command algebra.sort(b:bat[:any_1], o:bat[:oid],
g:bat[:oid], reverse:bit, stable:bit) (X_0:bat[:any_1], X_1:bat[:oid],
X_2:bat[:oid]) ", "ALGsort33;", "Returns a copy of the BAT sorted on
tail values, a BAT that specifies\n how the input was reordered, and a
BAT with group information.\n The input and output are (must be) dense
headed.\n The order is descending if the reverse bit is set.\n\t\t This
is a stable sort if the stable bit is set." ]
[ "algebra", "subslice", "command algebra.subslice(b:bat[:any_1], x:lng,
y:lng):bat[:oid] ", "ALGsubslice_lng;", "Return the oids of the slice
with the BUNs at position x till y." ]
[ "algebra", "thetajoin", "command algebra.thetajoin(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], op:int, nil_matches:bit,
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGthetajoin;",
"Theta join with candidate lists" ]
+[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
val:any_1, op:str):bat[:msk] ", "ALGthetaselect1;", "" ]
[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
val:any_1, op:str):bat[:oid] ", "ALGthetaselect1;", "Select all head
values for which the tail value obeys the relation\n\tvalue OP VAL.\n\tInput is
a dense-headed BAT, output is a dense-headed BAT with in\n\tthe tail the head
value of the input BAT for which the\n\trelationship holds. The output BAT is
sorted on the tail value." ]
+[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
s:bat[:oid], val:any_1, op:str):bat[:msk] ", "ALGthetaselect2;", ""
]
[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
s:bat[:oid], val:any_1, op:str):bat[:oid] ", "ALGthetaselect2;",
"Select all head values of the first input BAT for which the tail
value\n\tobeys the relation value OP VAL and for which the head value occurs
in\n\tthe tail of the second input BAT.\n\tInput is a dense-headed BAT, output
is a dense-headed BAT with in\n\tthe tail the head value of the input BAT for
which the\n\trelationship holds. The output BAT is sorted on the tail value."
]
-[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
val:any_1, op:str):bat[:msk] ", "ALGthetaselectMsk1;", "" ]
-[ "algebra", "thetaselect", "command algebra.thetaselect(b:bat[:any_1],
s:bat[:oid], val:any_1, op:str):bat[:msk] ", "ALGthetaselectMsk2;", ""
]
[ "algebra", "unique", "command
algebra.unique(b:bat[:any_1]):bat[:oid] ", "ALGunique1;", "Select all
unique values from the tail of the input.\n\tInput is a dense-headed BAT,
output is a dense-headed BAT with\n\tin the tail the head value of the input
BAT that was selected.\n\tThe output BAT is sorted on the tail value." ]
[ "algebra", "unique", "command algebra.unique(b:bat[:any_1],
s:bat[:oid]):bat[:oid] ", "ALGunique2;", "Select all unique values from
the tail of the first input.\n\tInput is a dense-headed BAT, the second input
is a\n\tdense-headed BAT with sorted tail, output is a dense-headed\n\tBAT with
in the tail the head value of the input BAT that was\n\tselected. The output
BAT is sorted on the tail value. The\n\tsecond input BAT is a list of
candidates." ]
[ "bam", "bam_drop_file", "pattern bam.bam_drop_file(file_id:lng,
dbschema:sht):void ", "bam_drop_file;", "Drop alignment tables and
header data for the bam file with the given file_id" ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -165,6 +165,7 @@ gdk_return BATreplace(BAT *b, BAT *p, BA
void BATroles(BAT *b, const char *tnme);
BAT *BATsample(BAT *b, BUN n);
BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int hi,
int anti);
+BAT *BATselectMsk(BAT *b, BAT *s, const void *tl, const void *th, int li, int
hi, int anti);
gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
int nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
gdk_return BATsetaccess(BAT *b, int mode);
void BATsetcapacity(BAT *b, BUN cnt);
@@ -732,8 +733,6 @@ str ALGsubslice_lng(bat *ret, const bat
str ALGthetajoin(bat *r1, bat *r2, const bat *l, const bat *r, const bat *sl,
const bat *sr, const int *op, const bit *nil_matches, const lng *estimate);
str ALGthetaselect1(bat *result, const bat *bid, const void *val, const char
**op);
str ALGthetaselect2(bat *result, const bat *bid, const bat *sid, const void
*val, const char **op);
-str ALGthetaselectMsk1(bat *result, const bat *bid, const void *val, const
char **op);
-str ALGthetaselectMsk2(bat *result, const bat *bid, const bat *sid, const void
*val, const char **op);
str ALGunique1(bat *result, const bat *bid);
str ALGunique2(bat *result, const bat *bid, const bat *sid);
str ALGvariance(dbl *res, const bat *bid);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2779,6 +2779,7 @@ gdk_export void BATsetprop(BAT *b, int i
#define JOIN_NE (-3)
gdk_export BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, int
li, int hi, int anti);
+gdk_export BAT *BATselectMsk(BAT *b, BAT *s, const void *tl, const void *th,
int li, int hi, int anti);
gdk_export BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char
*op);
gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, int
role);
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1781,11 +1781,10 @@ BATselect(BAT *b, BAT *s, const void *tl
/* limit estimation by upper limit */
estimate = MIN(estimate, maximum);
- bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
- if (bn == NULL)
- return NULL;
-
if (equi && hash) {
+ bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
+ if (bn == NULL)
+ return NULL;
ALGODEBUG fprintf(stderr, "#BATselect(b=%s#" BUNFMT
",s=%s%s,anti=%d): hash select\n",
BATgetId(b), BATcount(b),
@@ -1794,6 +1793,9 @@ BATselect(BAT *b, BAT *s, const void *tl
bn = BAT_hashselect(b, s, bn, tl, maximum);
} else {
int use_imprints = 0;
+ bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
+ if (bn == NULL)
+ return NULL;
if (!equi &&
!b->tvarsized &&
(b->batPersistence == PERSISTENT ||
@@ -1814,6 +1816,14 @@ BATselect(BAT *b, BAT *s, const void *tl
return virtualize(bn);
}
+BAT *
+BATselectMsk(BAT *b, BAT *s, const void *tl, const void *th,
+ int li, int hi, int anti)
+{
+ /* TODO massage the select to produce a bitvector */
+ return BATselect(b, s, tl, th, li, hi, anti);
+}
+
/* theta select
*
* Returns a dense-headed BAT with the OID values of b in the tail for
diff --git a/monetdb5/modules/kernel/algebra.c
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -307,24 +307,12 @@ ALGthetaselect2(bat *result, const bat *
}
str
-ALGthetaselectMsk2(bat *result, const bat *bid, const bat *sid, const void
*val, const char **op)
-{
- return ALGthetaselect2(result, bid, sid, val, op);
-}
-
-str
ALGthetaselect1(bat *result, const bat *bid, const void *val, const char **op)
{
return ALGthetaselect2(result, bid, NULL, val, op);
}
str
-ALGthetaselectMsk1(bat *result, const bat *bid, const void *val, const char
**op)
-{
- return ALGthetaselectMsk2(result, bid, NULL, val, op);
-}
-
-str
ALGselectNotNil(bat *result, const bat *bid)
{
BAT *b, *bn = NULL;
diff --git a/monetdb5/modules/kernel/algebra.h
b/monetdb5/modules/kernel/algebra.h
--- a/monetdb5/modules/kernel/algebra.h
+++ b/monetdb5/modules/kernel/algebra.h
@@ -27,9 +27,7 @@ mal_export str ALGselectMsk1(bat *result
mal_export str ALGselect2(bat *result, const bat *bid, const bat *sid, const
void *low, const void *high, const bit *li, const bit *hi, const bit *anti);
mal_export str ALGselectMsk2(bat *result, const bat *bid, const bat *sid,
const void *low, const void *high, const bit *li, const bit *hi, const bit
*anti);
mal_export str ALGthetaselect1(bat *result, const bat *bid, const void *val,
const char **op);
-mal_export str ALGthetaselectMsk1(bat *result, const bat *bid, const void
*val, const char **op);
mal_export str ALGthetaselect2(bat *result, const bat *bid, const bat *sid,
const void *val, const char **op);
-mal_export str ALGthetaselectMsk2(bat *result, const bat *bid, const bat *sid,
const void *val, const char **op);
mal_export str ALGjoin(bat *r1, bat *r2, const bat *l, const bat *r, const bat
*sl, const bat *sr, const bit *nil_matches, const lng *estimate);
mal_export str ALGleftjoin(bat *r1, bat *r2, const bat *l, const bat *r, const
bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate);
diff --git a/monetdb5/modules/kernel/algebra.mal
b/monetdb5/modules/kernel/algebra.mal
--- a/monetdb5/modules/kernel/algebra.mal
+++ b/monetdb5/modules/kernel/algebra.mal
@@ -75,7 +75,7 @@ comment "Select all head values of the f
function.";
command thetaselect(b:bat[:any_1], val:any_1, op:str) :bat[:msk]
-address ALGthetaselectMsk1;
+address ALGthetaselect1;
command thetaselect(b:bat[:any_1], val:any_1, op:str) :bat[:oid]
address ALGthetaselect1
comment "Select all head values for which the tail value obeys the relation
@@ -85,7 +85,7 @@ comment "Select all head values for whic
relationship holds. The output BAT is sorted on the tail value.";
command thetaselect(b:bat[:any_1], s:bat[:oid], val:any_1, op:str) :bat[:msk]
-address ALGthetaselectMsk2;
+address ALGthetaselect2;
command thetaselect(b:bat[:any_1], s:bat[:oid], val:any_1, op:str) :bat[:oid]
address ALGthetaselect2
comment "Select all head values of the first input BAT for which the tail value
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list