Changeset: 4dc82dbcced9 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/4dc82dbcced9
Modified Files:
sql/include/sql_relation.h
Branch: balanced_union
Log Message:
Merges with default
diffs (truncated from 2010 to 300 lines):
diff --git a/clients/Tests/MAL-signatures-hge.test
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -51033,56 +51033,6 @@ user_statistics
pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng],
X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str])
SYSMONstatistics;
(empty)
-tokenizer
-append
-command tokenizer.append(X_0:str):oid
-TKNZRappend;
-tokenize a new string and append it to the tokenizer (duplicate elimination is
performed)
-tokenizer
-close
-command tokenizer.close():void
-TKNZRclose;
-close the current tokenizer store
-tokenizer
-depositFile
-command tokenizer.depositFile(X_0:str):void
-TKNZRdepositFile;
-batch insertion from a file of strings to tokenize, each string is separated
by a new line
-tokenizer
-getCardinality
-command tokenizer.getCardinality():bat[:lng]
-TKNZRgetCardinality;
-debugging function that returns the unique tokens at each level
-tokenizer
-getCount
-command tokenizer.getCount():bat[:lng]
-TKNZRgetCount;
-debugging function that returns the size of the bats at each level
-tokenizer
-getIndex
-command tokenizer.getIndex():bat[:oid]
-TKNZRgetIndex;
-administrative function that returns the INDEX bat
-tokenizer
-getLevel
-command tokenizer.getLevel(X_0:int):bat[:str]
-TKNZRgetLevel;
-administrative function that returns the bat on level i
-tokenizer
-locate
-pattern tokenizer.locate(X_0:str):oid
-TKNZRlocate;
-if the given string is in the store returns its oid, otherwise oid_nil
-tokenizer
-open
-command tokenizer.open(X_0:str):void
-TKNZRopen;
-open the named tokenizer store, a new one is created if the specified name
does not exist
-tokenizer
-take
-pattern tokenizer.take(X_0:oid):str
-TKNZRtakeOid;
-reconstruct and returns the i-th string
txtsim
dameraulevenshtein
pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -39358,56 +39358,6 @@ user_statistics
pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng],
X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str])
SYSMONstatistics;
(empty)
-tokenizer
-append
-command tokenizer.append(X_0:str):oid
-TKNZRappend;
-tokenize a new string and append it to the tokenizer (duplicate elimination is
performed)
-tokenizer
-close
-command tokenizer.close():void
-TKNZRclose;
-close the current tokenizer store
-tokenizer
-depositFile
-command tokenizer.depositFile(X_0:str):void
-TKNZRdepositFile;
-batch insertion from a file of strings to tokenize, each string is separated
by a new line
-tokenizer
-getCardinality
-command tokenizer.getCardinality():bat[:lng]
-TKNZRgetCardinality;
-debugging function that returns the unique tokens at each level
-tokenizer
-getCount
-command tokenizer.getCount():bat[:lng]
-TKNZRgetCount;
-debugging function that returns the size of the bats at each level
-tokenizer
-getIndex
-command tokenizer.getIndex():bat[:oid]
-TKNZRgetIndex;
-administrative function that returns the INDEX bat
-tokenizer
-getLevel
-command tokenizer.getLevel(X_0:int):bat[:str]
-TKNZRgetLevel;
-administrative function that returns the bat on level i
-tokenizer
-locate
-pattern tokenizer.locate(X_0:str):oid
-TKNZRlocate;
-if the given string is in the store returns its oid, otherwise oid_nil
-tokenizer
-open
-command tokenizer.open(X_0:str):void
-TKNZRopen;
-open the named tokenizer store, a new one is created if the specified name
does not exist
-tokenizer
-take
-pattern tokenizer.take(X_0:oid):str
-TKNZRtakeOid;
-reconstruct and returns the i-th string
txtsim
dameraulevenshtein
pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int
diff --git a/common/stream/stream.h b/common/stream/stream.h
--- a/common/stream/stream.h
+++ b/common/stream/stream.h
@@ -245,7 +245,7 @@ typedef struct bstream {
stream_export bstream *bstream_create(stream *rs, size_t chunk_size); // used
all over
stream_export void bstream_destroy(bstream *s); // all over
-stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c,
tokenizer.c
+stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c
stream_export ssize_t bstream_next(bstream *s); // all over
/* Callback stream is a stream where the read and write functions are
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -2247,7 +2247,7 @@ BBPdump(void)
continue;
BAT *b = BBP_desc(i);
unsigned status = BBP_status(i);
- printf("# %d: " ALGOOPTBATFMT "refs=%d lrefs=%d status=%u%s",
+ printf("# %d: " ALGOOPTBATFMT " refs=%d lrefs=%d status=%u%s",
i,
ALGOOPTBATPAR(b),
BBP_refs(i),
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -2085,6 +2085,12 @@ log_load(const char *fn, const char *log
lg->seqs_val = BATdescriptor(BBPindex(bak));
strconcat_len(bak, sizeof(bak), fn, "_dseqs", NULL);
lg->dseqs = BATdescriptor(BBPindex(bak));
+ if (lg->seqs_id == NULL ||
+ lg->seqs_val == NULL ||
+ lg->dseqs == NULL) {
+ GDKerror("Logger_new: cannot load seqs bats");
+ goto error;
+ }
} else {
lg->seqs_id = logbat_new(TYPE_int, 1, PERSISTENT);
lg->seqs_val = logbat_new(TYPE_lng, 1, PERSISTENT);
diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c
--- a/gdk/gdk_storage.c
+++ b/gdk/gdk_storage.c
@@ -550,7 +550,7 @@ GDKload(int farmid, const char *nme, con
for (n_expected = (ssize_t) size; n_expected >
0; n_expected -= n) {
n = read(fd, dst, (unsigned) MIN(1 <<
30, n_expected));
if (n < 0)
- GDKsyserror("GDKload: cannot
read: name=%s, ext=%s, %zu bytes missing\n", nme, ext ? ext : "", (size_t)
n_expected);
+ GDKsyserror("GDKload: cannot
read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "",
size, n_expected);
#ifndef __COVERITY__
/* Coverity doesn't seem to
* recognize that we're just
@@ -567,7 +567,8 @@ GDKload(int farmid, const char *nme, con
/* we couldn't read all, error
* already generated */
GDKfree(ret);
- GDKerror("short read from heap %s%s%s,
expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size,
n_expected);
+ if (n >= 0) /* don't report error twice
*/
+ GDKerror("short read from heap
%s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "",
size, n_expected);
ret = NULL;
}
#ifndef NDEBUG
@@ -763,13 +764,12 @@ BATsave_iter(BAT *b, BATiter *bi, BUN si
}
if (size != b->batCount || b->batInserted < b->batCount) {
/* if the sizes don't match, the BAT must be dirty */
- b->batCopiedtodisk = false;
b->theap->dirty = true;
if (b->tvheap)
b->tvheap->dirty = true;
- } else {
- b->batCopiedtodisk = true;
}
+ /* there is something on disk now */
+ b->batCopiedtodisk = true;
MT_lock_unset(&b->theaplock);
if (locked && b->thash && b->thash != (Hash *) 1)
BAThashsave(b, dosync);
diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog
--- a/monetdb5/ChangeLog
+++ b/monetdb5/ChangeLog
@@ -1,3 +1,7 @@
# ChangeLog file for MonetDB5
# This file is updated with Maddlog
+* Wed Sep 13 2023 Sjoerd Mullender <[email protected]>
+- Removed the MAL tokenizer module. It was never usable from SQL and
+ in this form never would be.
+
diff --git a/monetdb5/modules/kernel/bat5.c b/monetdb5/modules/kernel/bat5.c
--- a/monetdb5/modules/kernel/bat5.c
+++ b/monetdb5/modules/kernel/bat5.c
@@ -675,17 +675,16 @@ HASHinfo(BAT *bk, BAT *bv, Hash *h, str
return GDK_SUCCEED;
}
-
static str
-BATinfo(BAT **key, BAT **val, const bat bid)
+BKCinfo(bat *ret1, bat *ret2, const bat *bid)
{
const char *mode, *accessmode;
BAT *bk = NULL, *bv = NULL, *b;
char bf[oidStrlen];
char buf[32];
- if ((b = BATdescriptor(bid)) == NULL) {
- throw(MAL, "BATinfo", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+ if ((b = BATdescriptor(*bid)) == NULL) {
+ throw(MAL, "bat.info", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
}
bk = COLnew(0, TYPE_str, 128, TRANSIENT);
@@ -694,7 +693,7 @@ BATinfo(BAT **key, BAT **val, const bat
BBPreclaim(bk);
BBPreclaim(bv);
BBPunfix(b->batCacheid);
- throw(MAL, "bat.getInfo", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ throw(MAL, "bat.info", SQLSTATE(HY013) MAL_MALLOC_FAIL);
}
BATiter bi = bat_iterator(b);
@@ -718,11 +717,11 @@ BATinfo(BAT **key, BAT **val, const bat
accessmode = "unknown";
}
- if (BUNappend(bk, "batId", false) != GDK_SUCCEED ||
- BUNappend(bv, BATgetId(b), false) != GDK_SUCCEED ||
- BUNappend(bk, "batCacheid", false) != GDK_SUCCEED ||
- BUNappend(bv, local_itoa((ssize_t) b->batCacheid, buf),
- false) != GDK_SUCCEED
+ if (BUNappend(bk, "batId", false) != GDK_SUCCEED
+ || BUNappend(bv, BATgetId(b), false) != GDK_SUCCEED
+ || BUNappend(bk, "batCacheid", false) != GDK_SUCCEED
+ || BUNappend(bv, local_itoa((ssize_t) b->batCacheid, buf),
+ false) != GDK_SUCCEED
|| BUNappend(bk, "tparentid", false) != GDK_SUCCEED
|| BUNappend(bv, local_itoa((ssize_t) bi.h->parentid, buf),
false) != GDK_SUCCEED
@@ -803,7 +802,7 @@ BATinfo(BAT **key, BAT **val, const bat
BBPreclaim(bk);
BBPreclaim(bv);
BBPunfix(b->batCacheid);
- throw(MAL, "bat.getInfo", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ throw(MAL, "bat.info", SQLSTATE(HY013) MAL_MALLOC_FAIL);
}
/* dump index information */
MT_rwlock_rdlock(&b->thashlock);
@@ -813,25 +812,12 @@ BATinfo(BAT **key, BAT **val, const bat
BBPreclaim(bk);
BBPreclaim(bv);
BBPunfix(b->batCacheid);
- throw(MAL, "bat.getInfo", SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ throw(MAL, "bat.info", SQLSTATE(HY013) MAL_MALLOC_FAIL);
}
MT_rwlock_rdunlock(&b->thashlock);
bat_iterator_end(&bi);
- *key = bk;
- *val = bv;
assert(BATcount(bk) == BATcount(bv));
- BBPunfix(bid);
- return MAL_SUCCEED;
-}
-
-static str
-BKCinfo(bat *ret1, bat *ret2, const bat *bid)
-{
- BAT *bv, *bk;
- str msg;
-
- if ((msg = BATinfo(&bk, &bv, *bid)) != NULL)
- return msg;
+ BBPunfix(b->batCacheid);
*ret1 = bk->batCacheid;
BBPkeepref(bk);
*ret2 = bv->batCacheid;
diff --git a/monetdb5/modules/kernel/batmmath.c
b/monetdb5/modules/kernel/batmmath.c
--- a/monetdb5/modules/kernel/batmmath.c
+++ b/monetdb5/modules/kernel/batmmath.c
@@ -95,9 +95,10 @@ CMDscienceUNARY(MalStkPtr stk, InstrPtr
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]