Changeset: fbd54c1cc9e5 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fbd54c1cc9e5 Modified Files: clients/ChangeLog.Jul2015 clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out clients/mapiclient/mclient.c gdk/gdk_heap.c monetdb5/modules/atoms/str.c monetdb5/modules/atoms/str.h monetdb5/modules/atoms/str.mal sql/backends/monet5/sql_result.c Branch: default Log Message:
Merge with Jul2015 branch. diffs (truncated from 414 to 300 lines): diff --git a/clients/ChangeLog.Jul2015 b/clients/ChangeLog.Jul2015 --- a/clients/ChangeLog.Jul2015 +++ b/clients/ChangeLog.Jul2015 @@ -1,3 +1,7 @@ # ChangeLog file for clients # This file is updated with Maddlog +* Sun Aug 30 2015 Sjoerd Mullender <[email protected]> +- In the SQL formatter of mclient (the default) we now properly align + East Asian wide characters. + diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -41137,7 +41137,7 @@ address STRPrefix; comment Prefix check. command str.stringlength(s:str):int -address STRstringLength; +address STRSQLLength; comment Return the length of a right trimed string (SQL semantics). command str.string(s:str,offset:int,count:int):str diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -52080,7 +52080,7 @@ address STRPrefix; comment Prefix check. command str.stringlength(s:str):int -address STRstringLength; +address STRSQLLength; comment Return the length of a right trimed string (SQL semantics). command str.string(s:str,offset:int,count:int):str diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -1938,7 +1938,6 @@ str STRreplace(str *ret, const str *s1, str STRspace(str *ret, const int *l); str STRsplitpart(str *res, str *haystack, str *needle, int *field); str STRstrSearch(int *res, const str *arg1, const str *arg2); -str STRstringLength(int *res, const str *s); str STRsubstring(str *ret, const str *s, const int *start, const int *l); str STRsubstringTail(str *ret, const str *s, const int *start); str STRsuffix(str *ret, const str *s, const int *l); diff --git a/clients/mapiclient/mclient.c b/clients/mapiclient/mclient.c --- a/clients/mapiclient/mclient.c +++ b/clients/mapiclient/mclient.c @@ -359,36 +359,127 @@ SQLsetSpecial(const char *command) /* return the display length of a UTF-8 string if e is not NULL, return length up to e */ static size_t -utf8strlen(const char *s, const char *e) +utf8strlenmax(char *s, char *e, size_t max, char **t) { - size_t len = 0; + size_t len = 0, len0 = 0; + int c; + int n; + char *t0 = s; + assert(max == 0 || t != NULL); if (s == NULL) return 0; - while (*s && (e == NULL || s < e)) { - /* only count first byte of a sequence */ - if ((*s & 0xC0) != 0x80) + c = 0; + n = 0; + while (*s != 0 && (e == NULL || s < e)) { + if (*s == '\n') { + assert(n == 0); + if (max) { + *t = s; + return len; + } len++; + n = 0; + } else if ((*s & 0x80) == 0) { + assert(n == 0); + len++; + n = 0; + } else if ((*s & 0xC0) == 0x80) { + c = (c << 6) | (*s & 0x3F); + if (--n == 0) { + /* last byte of a multi-byte character */ + len++; + /* the following code points are all East + * Asian Fullwidth and East Asian Wide + * characters as defined in Unicode 8.0 */ + if ((0x1100 <= c && c <= 0x115F) || + c == 0x2329 || + c == 0x232A || + (0x2E80 <= c && c <= 0x2E99) || + (0x2E9B <= c && c <= 0x2EF3) || + (0x2F00 <= c && c <= 0x2FD5) || + (0x2FF0 <= c && c <= 0x2FFB) || + (0x3000 <= c && c <= 0x303E) || + (0x3041 <= c && c <= 0x3096) || + (0x3099 <= c && c <= 0x30FF) || + (0x3105 <= c && c <= 0x312D) || + (0x3131 <= c && c <= 0x318E) || + (0x3190 <= c && c <= 0x31BA) || + (0x31C0 <= c && c <= 0x31E3) || + (0x31F0 <= c && c <= 0x321E) || + (0x3220 <= c && c <= 0x3247) || + (0x3250 <= c && c <= 0x4DBF) || + (0x4E00 <= c && c <= 0xA48C) || + (0xA490 <= c && c <= 0xA4C6) || + (0xA960 <= c && c <= 0xA97C) || + (0xAC00 <= c && c <= 0xD7A3) || + (0xF900 <= c && c <= 0xFAFF) || + (0xFE10 <= c && c <= 0xFE19) || + (0xFE30 <= c && c <= 0xFE52) || + (0xFE54 <= c && c <= 0xFE66) || + (0xFE68 <= c && c <= 0xFE6B) || + (0xFF01 <= c && c <= 0xFFE6) || + (0x1B000 <= c && c <= 0x1B001) || + (0x1F200 <= c && c <= 0x1F202) || + (0x1F210 <= c && c <= 0x1F23A) || + (0x1F240 <= c && c <= 0x1F248) || + (0x1F250 <= c && c <= 0x1F251) || + (0x20000 <= c && c <= 0x2FFFD) || + (0x30000 <= c && c <= 0x3FFFD)) + len++; + } + } else if ((*s & 0xE0) == 0xC0) { + assert(n == 0); + n = 1; + c = *s & 0x1F; + } else if ((*s & 0xF0) == 0xE0) { + assert(n == 0); + n = 2; + c = *s & 0x0F; + } else if ((*s & 0xF8) == 0xF0) { + assert(n == 0); + n = 3; + c = *s & 0x07; + } else if ((*s & 0xFC) == 0xF8) { + assert(n == 0); + n = 4; + c = *s & 0x03; + } else { + assert(0); + n = 0; + } s++; + if (n == 0) { + if (max != 0) { + if (len > max) { + *t = t0; + return len0; + } + if (len == max) { + *t = s; + return len; + } + } + t0 = s; + len0 = len; + } } + if (max != 0) + *t = s; return len; } +static size_t +utf8strlen(char *s, char *e) +{ + return utf8strlenmax(s, e, 0, NULL); +} + /* skip the specified number of UTF-8 characters, but stop at a newline */ static char * utf8skip(char *s, size_t i) { - while (*s && i > 0) { - if ((*s & 0xC0) == 0xC0) { - s++; - while ((*s & 0xC0) == 0x80) - s++; - } else if (*s == '\n') - return s; - else - s++; - i--; - } + utf8strlenmax(s, NULL, i, &s); return s; } @@ -434,8 +525,11 @@ SQLrow(int *len, int *numeric, char **re * correction for a terminal * screen (1.62 * 2 -> 3 : * 9.72~10) */ - if (ulen > (size_t) len[i]) + if (ulen > (size_t) len[i]) { cutafter[i] = 3 * len[i] / 10; + if (cutafter[i] == 1) + cutafter[i]++; + } } /* on each cycle we get closer to the limit */ diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c --- a/gdk/gdk_heap.c +++ b/gdk/gdk_heap.c @@ -412,12 +412,15 @@ GDKupgradevarheap(COLrec *c, var_t v, in * indicated by the "free" pointer */ n = (copyall ? c->heap.size : c->heap.free) >> c->shift; - /* for memory mapped files, create a backup copy before widening + /* Create a backup copy before widening. * - * this solves a problem that we don't control what's in the - * actual file until the next commit happens, so a crash might - * otherwise leave the file (and the database) in an - * inconsistent state + * If the file is memory-mapped, this solves a problem that we + * don't control what's in the actual file until the next + * commit happens, so a crash might otherwise leave the file + * (and the database) in an inconsistent state. If, on the + * other hand, the heap is allocated, it may happen that later + * on the heap is extended and converted into a memory-mapped + * file. Then the same problem arises. * * also see do_backup in gdk_bbp.c */ filename = strrchr(c->heap.filename, DIR_SEP); @@ -426,9 +429,11 @@ GDKupgradevarheap(COLrec *c, var_t v, in else filename++; bid = strtol(filename, NULL, 8); - if (c->heap.storage == STORE_MMAP && - (BBP_status(bid) & (BBPEXISTING|BBPDELETED)) && - !file_exists(c->heap.farmid, BAKDIR, filename, NULL)) { + if ((BBP_status(bid) & (BBPEXISTING|BBPDELETED)) && + !file_exists(c->heap.farmid, BAKDIR, filename, NULL) && + (c->heap.storage != STORE_MEM || + GDKmove(c->heap.farmid, BATDIR, c->heap.filename, NULL, + BAKDIR, filename, NULL) != GDK_SUCCEED)) { int fd; ssize_t ret = 0; size_t size = n << c->shift; diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -2537,15 +2537,3 @@ STRspace(str *ret, const int *l){ char buf[]= " ", *s= buf; return STRrepeat(ret,&s,l); } - -str -STRstringLength(int *res, const str *s) -{ - str r = NULL; - STRRtrim(&r, s); - STRLength(res, &r); - GDKfree(r); - return MAL_SUCCEED; -} - - diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h --- a/monetdb5/modules/atoms/str.h +++ b/monetdb5/modules/atoms/str.h @@ -28,8 +28,6 @@ str_export str strEpilogue(void *ret); str_export str STRtostr(str *res, const str *src); str_export str STRConcat(str *res, const str *val1, const str *val2); str_export str STRLength(int *res, const str *arg1); -/* length of rtrimed string, needed for sql */ -str_export str STRstringLength(int *res, const str *s); str_export str STRBytes(int *res, const str *arg1); str_export str STRTail(str *res, const str *arg1, const int *offset); str_export str STRSubString(str *res, const str *arg1, const int *offset, const int *length); diff --git a/monetdb5/modules/atoms/str.mal b/monetdb5/modules/atoms/str.mal --- a/monetdb5/modules/atoms/str.mal +++ b/monetdb5/modules/atoms/str.mal @@ -25,7 +25,7 @@ address STRLength comment "Return the length of a string."; command stringlength( s:str ) :int -address STRstringLength +address STRSQLLength comment "Return the length of a right trimed string (SQL semantics)."; command nbytes( s:str ) :int diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c --- a/sql/backends/monet5/sql_result.c +++ b/sql/backends/monet5/sql_result.c @@ -230,6 +230,91 @@ sql_timestamp_tostr(void *TS_RES, char * } static int +STRwidth(const char *s) +{ + int len = 0; + int c; + int n; _______________________________________________ checkin-list mailing list [email protected] https://www.monetdb.org/mailman/listinfo/checkin-list
