Changeset: 772e36340c82 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/772e36340c82
Modified Files:
clients/Tests/MAL-signatures-hge.test
clients/Tests/MAL-signatures.test
Branch: groupjoin
Log Message:
merged with default
diffs (truncated from 1563 to 300 lines):
diff --git a/clients/Tests/MAL-signatures-hge.test
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -51063,56 +51063,6 @@ user_statistics
pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng],
X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str])
SYSMONstatistics;
(empty)
-tokenizer
-append
-command tokenizer.append(X_0:str):oid
-TKNZRappend;
-tokenize a new string and append it to the tokenizer (duplicate elimination is
performed)
-tokenizer
-close
-command tokenizer.close():void
-TKNZRclose;
-close the current tokenizer store
-tokenizer
-depositFile
-command tokenizer.depositFile(X_0:str):void
-TKNZRdepositFile;
-batch insertion from a file of strings to tokenize, each string is separated
by a new line
-tokenizer
-getCardinality
-command tokenizer.getCardinality():bat[:lng]
-TKNZRgetCardinality;
-debugging function that returns the unique tokens at each level
-tokenizer
-getCount
-command tokenizer.getCount():bat[:lng]
-TKNZRgetCount;
-debugging function that returns the size of the bats at each level
-tokenizer
-getIndex
-command tokenizer.getIndex():bat[:oid]
-TKNZRgetIndex;
-administrative function that returns the INDEX bat
-tokenizer
-getLevel
-command tokenizer.getLevel(X_0:int):bat[:str]
-TKNZRgetLevel;
-administrative function that returns the bat on level i
-tokenizer
-locate
-pattern tokenizer.locate(X_0:str):oid
-TKNZRlocate;
-if the given string is in the store returns its oid, otherwise oid_nil
-tokenizer
-open
-command tokenizer.open(X_0:str):void
-TKNZRopen;
-open the named tokenizer store, a new one is created if the specified name
does not exist
-tokenizer
-take
-pattern tokenizer.take(X_0:oid):str
-TKNZRtakeOid;
-reconstruct and returns the i-th string
txtsim
dameraulevenshtein
pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -39378,56 +39378,6 @@ user_statistics
pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng],
X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str])
SYSMONstatistics;
(empty)
-tokenizer
-append
-command tokenizer.append(X_0:str):oid
-TKNZRappend;
-tokenize a new string and append it to the tokenizer (duplicate elimination is
performed)
-tokenizer
-close
-command tokenizer.close():void
-TKNZRclose;
-close the current tokenizer store
-tokenizer
-depositFile
-command tokenizer.depositFile(X_0:str):void
-TKNZRdepositFile;
-batch insertion from a file of strings to tokenize, each string is separated
by a new line
-tokenizer
-getCardinality
-command tokenizer.getCardinality():bat[:lng]
-TKNZRgetCardinality;
-debugging function that returns the unique tokens at each level
-tokenizer
-getCount
-command tokenizer.getCount():bat[:lng]
-TKNZRgetCount;
-debugging function that returns the size of the bats at each level
-tokenizer
-getIndex
-command tokenizer.getIndex():bat[:oid]
-TKNZRgetIndex;
-administrative function that returns the INDEX bat
-tokenizer
-getLevel
-command tokenizer.getLevel(X_0:int):bat[:str]
-TKNZRgetLevel;
-administrative function that returns the bat on level i
-tokenizer
-locate
-pattern tokenizer.locate(X_0:str):oid
-TKNZRlocate;
-if the given string is in the store returns its oid, otherwise oid_nil
-tokenizer
-open
-command tokenizer.open(X_0:str):void
-TKNZRopen;
-open the named tokenizer store, a new one is created if the specified name
does not exist
-tokenizer
-take
-pattern tokenizer.take(X_0:oid):str
-TKNZRtakeOid;
-reconstruct and returns the i-th string
txtsim
dameraulevenshtein
pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int
diff --git a/common/stream/stream.h b/common/stream/stream.h
--- a/common/stream/stream.h
+++ b/common/stream/stream.h
@@ -245,7 +245,7 @@ typedef struct bstream {
stream_export bstream *bstream_create(stream *rs, size_t chunk_size); // used
all over
stream_export void bstream_destroy(bstream *s); // all over
-stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c,
tokenizer.c
+stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c
stream_export ssize_t bstream_next(bstream *s); // all over
/* Callback stream is a stream where the read and write functions are
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -2077,7 +2077,7 @@ BBPdir_first(bool subcommit, lng logno,
* replacing the entries for the subcommitted bats */
if ((obbpf = GDKfileopen(0, SUBDIR, "BBP", "dir", "r")) == NULL
&&
(obbpf = GDKfileopen(0, BAKDIR, "BBP", "dir", "r")) ==
NULL) {
- GDKsyserror("subcommit attempted without backup
BBP.dir.");
+ GDKsyserror("subcommit attempted without backup
BBP.dir");
goto bailout;
}
/* read first three lines */
@@ -2143,7 +2143,7 @@ BBPdir_step(bat bid, BUN size, int n, ch
}
n = -1;
if (fclose(*obbpfp) == EOF) {
- GDKsyserror("Closing backup BBP.dir file
failed.\n");
+ GDKsyserror("Closing backup BBP.dir file
failed\n");
GDKclrerr(); /* ignore error */
}
*obbpfp = NULL;
@@ -2181,7 +2181,7 @@ BBPdir_last(int n, char *buf, size_t buf
goto bailout;
}
if (fclose(obbpf) == EOF) {
- GDKsyserror("Closing backup BBP.dir file
failed.\n");
+ GDKsyserror("Closing backup BBP.dir file
failed\n");
GDKclrerr(); /* ignore error */
}
obbpf = NULL;
@@ -3902,7 +3902,7 @@ BBPsync(int cnt, bat *restrict subcommit
MT_rename(bakdir, deldir) < 0))
ret = GDK_FAIL;
if (ret != GDK_SUCCEED)
- GDKsyserror("rename(%s,%s) failed.\n", bakdir, deldir);
+ GDKsyserror("rename(%s,%s) failed\n", bakdir, deldir);
TRC_DEBUG(IO_, "rename %s %s = %d\n", bakdir, deldir, (int)
ret);
}
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -439,9 +439,11 @@ log_read_updates(logger *lg, trans *tr,
} else {
lg->rbuf = t;
lg->rbufsize = tlen;
- for (BUN p = 0; p < (BUN) nr; p++) {
- if (r && BUNappend(r, t, true) !=
GDK_SUCCEED)
- res = LOG_ERR;
+ if (r) {
+ for (BUN p = 0; p < (BUN) nr; p++) {
+ if (BUNappend(r, t, true) !=
GDK_SUCCEED)
+ res = LOG_ERR;
+ }
}
}
} else if (l->flag == LOG_UPDATE_BULK) {
@@ -2505,6 +2507,8 @@ log_flush(logger *lg, ulng ts)
if (updated == NULL) {
nupdated = BATcount(lg->catalog_id);
allocated = ((nupdated + 31) & ~31) / 8;
+ if (allocated == 0)
+ allocated = 4;
updated = GDKzalloc(allocated);
if (updated == NULL) {
log_unlock(lg);
diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c
--- a/gdk/gdk_storage.c
+++ b/gdk/gdk_storage.c
@@ -184,7 +184,7 @@ GDKremovedir(int farmid, const char *dir
closedir(dirp);
ret = MT_rmdir(dirnamestr);
if (ret != 0)
- GDKsyserror("rmdir(%s) failed.\n", dirnamestr);
+ GDKsyserror("rmdir(%s) failed\n", dirnamestr);
TRC_DEBUG(IO_, "rmdir %s = %d\n", dirnamestr, ret);
GDKfree(dirnamestr);
return ret ? GDK_FAIL : GDK_SUCCEED;
@@ -550,7 +550,7 @@ GDKload(int farmid, const char *nme, con
for (n_expected = (ssize_t) size; n_expected >
0; n_expected -= n) {
n = read(fd, dst, (unsigned) MIN(1 <<
30, n_expected));
if (n < 0)
- GDKsyserror("GDKload: cannot
read: name=%s, ext=%s, %zu bytes missing.\n", nme, ext ? ext : "", (size_t)
n_expected);
+ GDKsyserror("GDKload: cannot
read: name=%s, ext=%s, %zu bytes missing\n", nme, ext ? ext : "", (size_t)
n_expected);
#ifndef __COVERITY__
/* Coverity doesn't seem to
* recognize that we're just
diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog
--- a/monetdb5/ChangeLog
+++ b/monetdb5/ChangeLog
@@ -1,3 +1,7 @@
# ChangeLog file for MonetDB5
# This file is updated with Maddlog
+* Wed Sep 13 2023 Sjoerd Mullender <[email protected]>
+- Removed the MAL tokenizer module. It was never usable from SQL and
+ in this form never would be.
+
diff --git a/monetdb5/modules/mal/CMakeLists.txt
b/monetdb5/modules/mal/CMakeLists.txt
--- a/monetdb5/modules/mal/CMakeLists.txt
+++ b/monetdb5/modules/mal/CMakeLists.txt
@@ -32,7 +32,6 @@ target_sources(malmodules
mal_mapi.c
remote.c remote.h
txtsim.c
- tokenizer.c
sample.c
querylog.c querylog.h
sysmon.c
diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -36,8 +36,6 @@ mapi06
mapi07
mapi01
-tokenizer00
-
manifold
manifoldstr
#manifoldaggr
diff --git a/monetdb5/modules/mal/Tests/tokenizer00.maltest
b/monetdb5/modules/mal/Tests/tokenizer00.maltest
deleted file mode 100644
--- a/monetdb5/modules/mal/Tests/tokenizer00.maltest
+++ /dev/null
@@ -1,187 +0,0 @@
-statement ok
-include tokenizer
-
-statement ok
-tokenizer.open("test")
-
-statement ok
-tokenizer.append("http://www.cwi.nl")
-
-statement ok
-tokenizer.append("http://www.cwi.nl/")
-
-statement ok
-tokenizer.append("http://www.cwi.nl/~lsidir")
-
-statement ok
-tokenizer.append("http://www.cwi.nl/~mk")
-
-statement ok
-tokenizer.append("http://www.cwi.nl/~mk")
-
-statement ok
-tokenizer.append("http://www.ics.forth.gr")
-
-statement ok
-tokenizer.append("http://www.ics.forth.gr/")
-
-statement ok
-tokenizer.append("http://www.ics.forth.gr/~lsidir")
-
-statement ok
-tokenizer.append("http://www.cook.gr/")
-
-statement ok
-tokenizer.append("http://www.cook.gr/~lsidir")
-
-statement ok
-tokenizer.append("http://www.cook.gr/~mk")
-
-statement ok
-tokenizer.append("http://www.nocook.nl/~mk")
-
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]