Changeset: 35ae9857827b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/35ae9857827b
Branch: iso
Log Message:
Merged with Jul2021
diffs (261 lines):
diff --git a/clients/examples/C/bincopydata.c b/clients/examples/C/bincopydata.c
--- a/clients/examples/C/bincopydata.c
+++ b/clients/examples/C/bincopydata.c
@@ -183,7 +183,7 @@ gen_newline_strings(FILE *f, bool bytesw
{
(void)byteswap;
for (long i = 0; i < nrecs; i++) {
- fprintf(f, "rn\r\nr\r%ld", i);
+ fprintf(f, "RN\r\nR\r%ld", i);
fputc(0, f);
}
}
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -129,24 +129,11 @@ static bool havehge = false;
#define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
-#ifndef NDEBUG
-static inline bool
-islocked(MT_Lock *l)
-{
- if (MT_lock_try(l)) {
- MT_lock_unset(l);
- return false;
- }
- return true;
-}
-#endif
-
static void
BBP_insert(bat i)
{
bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
- assert(islocked(&BBPnameLock));
BBP_next(i) = BBP_hash[idx];
BBP_hash[idx] = i;
}
@@ -158,7 +145,6 @@ BBP_delete(bat i)
const char *s = BBP_logical(i);
bat idx = (bat) (strHash(s) & BBP_mask);
- assert(islocked(&BBPnameLock));
for (h += idx; (i = *h) != 0; h = &BBP_next(i)) {
if (strcmp(BBP_logical(i), s) == 0) {
*h = BBP_next(i);
@@ -400,7 +386,6 @@ BBPextend(int idx, bool buildhash)
static gdk_return
recover_dir(int farmid, bool direxists)
{
- assert(islocked(&GDKtmLock));
if (direxists) {
/* just try; don't care about these non-vital files */
if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
@@ -1410,10 +1395,6 @@ BBPdir_first(bool subcommit, lng logno,
int n = 0;
lng ologno, otransid;
-#ifndef NDEBUG
- assert(islocked(&GDKtmLock));
-#endif
-
if (obbpfp)
*obbpfp = NULL;
*nbbpfp = NULL;
@@ -2874,7 +2855,6 @@ BBPprepare(bool subcommit)
str bakdirpath, subdirpath;
gdk_return ret = GDK_SUCCEED;
- assert(islocked(&GDKtmLock));
if(!(bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL)))
return GDK_FAIL;
if(!(subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL))) {
diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c
--- a/monetdb5/modules/atoms/json.c
+++ b/monetdb5/modules/atoms/json.c
@@ -57,7 +57,6 @@ typedef str json;
if (*(J) != ' ' &&
\
*(J) != '\n' &&
\
*(J) != '\t' &&
\
- *(J) != '\f' &&
\
*(J) != '\r')
\
break;
\
} while (0)
@@ -871,6 +870,8 @@ JSONstringParser(const char *j, const ch
*next = j;
return MAL_SUCCEED;
default:
+ if ((unsigned char)*j < ' ')
+ throw(MAL, "json.parser", "illegal control
char");
if (seensurrogate)
throw(MAL, "json.parser", "illegal escape
char");
break;
@@ -911,6 +912,9 @@ JSONfractionParser(const char *j, const
// skip the period character
j++;
+ // must be followed by more digits
+ if (!isdigit((unsigned char)*j))
+ return false;
for (; *j; j++)
if (!isdigit((unsigned char)*j))
break;
@@ -1022,11 +1026,16 @@ JSONtoken(JSON *jt, const char *j, const
skipblancs(j);
if (*j == '}')
break;
- if (*j != '}' && *j != ',') {
+ if (*j != ',') {
jt->error = createException(MAL, "json.parser",
"JSON syntax error: ',' or '}' expected at offset %td", j - string_start);
return idx;
}
j++;
+ skipblancs(j);
+ if (*j == '}') {
+ jt->error = createException(MAL, "json.parser",
"JSON syntax error: '}' not expected at offset %td", j - string_start);
+ return idx;
+ }
}
if (*j != '}') {
jt->error = createException(MAL, "json.parser", "JSON
syntax error: '}' expected at offset %td", j - string_start);
@@ -1083,12 +1092,16 @@ JSONtoken(JSON *jt, const char *j, const
jt->error = createException(MAL, "json.parser",
"JSON syntax error: Array value expected at offset %td", j - string_start);
return idx;
}
- if (*j != ']' && *j != ',') {
+ if (*j != ',') {
jt->error = createException(MAL, "json.parser",
"JSON syntax error: ',' or ']' expected at offset %td (context: %c%c%c)", j -
string_start, *(j - 1), *j, *(j + 1));
return idx;
}
j++;
skipblancs(j);
+ if (*j == ']') {
+ jt->error = createException(MAL, "json.parser",
"JSON syntax error: '}' not expected at offset %td", j - string_start);
+ return idx;
+ }
}
if (*j != ']') {
jt->error = createException(MAL, "json.parser", "JSON
syntax error: ']' expected at offset %td", j - string_start);
diff --git a/sql/backends/monet5/sql_bincopyfrom.c
b/sql/backends/monet5/sql_bincopyfrom.c
--- a/sql/backends/monet5/sql_bincopyfrom.c
+++ b/sql/backends/monet5/sql_bincopyfrom.c
@@ -326,29 +326,61 @@ convert_timestamp(void *dst_start, void
}
-static void
-convert_line_endings(char *text)
+static str
+convert_and_validate(char *text)
{
- // Read- and write positions.
- // We always have w <= r, or it wouldn't be safe.
- const char *r = text;
- char *w = text;
- while (*r) {
- if (r[0] == '\r' && r[1] == '\n')
- r++;
- *w++ = *r++;
+ unsigned char *r = (unsigned char*)text;
+ unsigned char *w = r;
+
+ if (*r == 0x80 && *(r+1) == 0) {
+ // Technically a utf-8 violation, but we treat it as the NULL
marker
+ // GDK does so as well so we can just pass it on.
+ // load_zero_terminated_text() below contains an assert to
ensure
+ // this remains the case.
+ return MAL_SUCCEED;
+ }
+
+ while (*r != 0) {
+ unsigned char c = *w++ = *r++;
+
+ if (c == '\r' && *r == '\n') {
+ w--;
+ continue;
+ }
+ if ((c & 0x80) == 0x00) // 1xxx_xxxx: standalone byte
+ continue;
+ if ((c & 0xF8) == 0xF0) // 1111_0xxx
+ goto expect3;
+ if ((c & 0xF0) == 0xE0) // 1110_xxxx
+ goto expect2;
+ if ((c & 0xE0) == 0xC0) // 110x_xxxx
+ goto expect1;
+ goto bad_utf8;
+
+expect3:
+ if (((*w++ = *r++) & 0x80) != 0x80)
+ goto bad_utf8;
+expect2:
+ if (((*w++ = *r++) & 0x80) != 0x80)
+ goto bad_utf8;
+expect1:
+ if (((*w++ = *r++) & 0x80) != 0x80)
+ goto bad_utf8;
+
}
*w = '\0';
+ return MAL_SUCCEED;
+
+bad_utf8:
+ return createException(SQL, "BATattach_stream", SQLSTATE(42000)
"malformed utf-8 byte sequence");
}
static str
-append_text(BAT *bat, char *start, char *end)
+append_text(BAT *bat, char *start)
{
- (void)bat;
-
- char *cr = memchr(start, '\r', end - start);
- if (cr)
- convert_line_endings(cr);
+ str msg = convert_and_validate(start);
+ if (msg != MAL_SUCCEED)
+ return msg;
if (BUNappend(bat, start, false) != GDK_SUCCEED)
return createException(SQL, "sql.importColumn", GDK_EXCEPTION);
@@ -365,6 +397,9 @@ load_zero_terminated_text(BAT *bat, stre
str msg = MAL_SUCCEED;
bstream *bs = NULL;
+ // convert_and_validate() above counts on the following property to
hold:
+ assert(strNil((const char[2]){ 0x80, 0 }));
+
bs = bstream_create(s, 1 << 20);
if (bs == NULL) {
msg = createException(SQL, "sql", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
@@ -384,7 +419,7 @@ load_zero_terminated_text(BAT *bat, stre
char *buf_end = &bs->buf[bs->len];
char *start, *end;
for (start = buf_start; (end = memchr(start, '\0', buf_end -
start)) != NULL; start = end + 1) {
- msg = append_text(bat, start, end);
+ msg = append_text(bat, start);
if (msg != NULL)
goto end;
}
diff --git a/sql/test/bincopy/Tests/bincopy_support.py
b/sql/test/bincopy/Tests/bincopy_support.py
--- a/sql/test/bincopy/Tests/bincopy_support.py
+++ b/sql/test/bincopy/Tests/bincopy_support.py
@@ -127,14 +127,14 @@ OR (id % 10000 = 0 AND LENGTH(s) = 28
BROKEN_STRINGS = ("""
CREATE TABLE foo(id INT NOT NULL, s TEXT);
COPY BINARY INTO foo(id, s) FROM @ints@, @broken_strings@ @ON@;
-""", (None, "!GDK reported error: strPut: incorrectly encoded UTF-8"))
+""", (None, "!malformed utf-8 byte sequence"))
# note that the \r\n has been normalized to \n but the lone \r has been
# left alone.
NEWLINE_STRINGS = (r"""
CREATE TABLE foo(id INT NOT NULL, s TEXT);
COPY BINARY INTO foo(id, s) FROM @ints@, @newline_strings@ @ON@;
-SELECT COUNT(id) FROM foo WHERE s = (E'rn\nr\r' || id);
+SELECT COUNT(id) FROM foo WHERE s = (E'RN\nR\r' || id);
""", [f"{NRECS} affected rows", f"{NRECS}"])
NULL_STRINGS = ("""
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list