Changeset: fbd54c1cc9e5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fbd54c1cc9e5
Modified Files:
        clients/ChangeLog.Jul2015
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        clients/Tests/exports.stable.out
        clients/mapiclient/mclient.c
        gdk/gdk_heap.c
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/atoms/str.mal
        sql/backends/monet5/sql_result.c
Branch: default
Log Message:

Merge with Jul2015 branch.


diffs (truncated from 414 to 300 lines):

diff --git a/clients/ChangeLog.Jul2015 b/clients/ChangeLog.Jul2015
--- a/clients/ChangeLog.Jul2015
+++ b/clients/ChangeLog.Jul2015
@@ -1,3 +1,7 @@
 # ChangeLog file for clients
 # This file is updated with Maddlog
 
+* Sun Aug 30 2015 Sjoerd Mullender <[email protected]>
+- In the SQL formatter of mclient (the default) we now properly align
+  East Asian wide characters.
+
diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -41137,7 +41137,7 @@ address STRPrefix;
 comment Prefix check.
 
 command str.stringlength(s:str):int 
-address STRstringLength;
+address STRSQLLength;
 comment Return the length of a right trimed string (SQL semantics).
 
 command str.string(s:str,offset:int,count:int):str 
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -52080,7 +52080,7 @@ address STRPrefix;
 comment Prefix check.
 
 command str.stringlength(s:str):int 
-address STRstringLength;
+address STRSQLLength;
 comment Return the length of a right trimed string (SQL semantics).
 
 command str.string(s:str,offset:int,count:int):str 
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1938,7 +1938,6 @@ str STRreplace(str *ret, const str *s1, 
 str STRspace(str *ret, const int *l);
 str STRsplitpart(str *res, str *haystack, str *needle, int *field);
 str STRstrSearch(int *res, const str *arg1, const str *arg2);
-str STRstringLength(int *res, const str *s);
 str STRsubstring(str *ret, const str *s, const int *start, const int *l);
 str STRsubstringTail(str *ret, const str *s, const int *start);
 str STRsuffix(str *ret, const str *s, const int *l);
diff --git a/clients/mapiclient/mclient.c b/clients/mapiclient/mclient.c
--- a/clients/mapiclient/mclient.c
+++ b/clients/mapiclient/mclient.c
@@ -359,36 +359,127 @@ SQLsetSpecial(const char *command)
 /* return the display length of a UTF-8 string
    if e is not NULL, return length up to e */
 static size_t
-utf8strlen(const char *s, const char *e)
+utf8strlenmax(char *s, char *e, size_t max, char **t)
 {
-       size_t len = 0;
+       size_t len = 0, len0 = 0;
+       int c;
+       int n;
+       char *t0 = s;
 
+       assert(max == 0 || t != NULL);
        if (s == NULL)
                return 0;
-       while (*s && (e == NULL || s < e)) {
-               /* only count first byte of a sequence */
-               if ((*s & 0xC0) != 0x80)
+       c = 0;
+       n = 0;
+       while (*s != 0 && (e == NULL || s < e)) {
+               if (*s == '\n') {
+                       assert(n == 0);
+                       if (max) {
+                               *t = s;
+                               return len;
+                       }
                        len++;
+                       n = 0;
+               } else if ((*s & 0x80) == 0) {
+                       assert(n == 0);
+                       len++;
+                       n = 0;
+               } else if ((*s & 0xC0) == 0x80) {
+                       c = (c << 6) | (*s & 0x3F);
+                       if (--n == 0) {
+                               /* last byte of a multi-byte character */
+                               len++;
+                               /* the following code points are all East
+                                * Asian Fullwidth and East Asian Wide
+                                * characters as defined in Unicode 8.0 */
+                               if ((0x1100 <= c && c <= 0x115F) ||
+                                   c == 0x2329 ||
+                                   c == 0x232A ||
+                                   (0x2E80 <= c && c <= 0x2E99) ||
+                                   (0x2E9B <= c && c <= 0x2EF3) ||
+                                   (0x2F00 <= c && c <= 0x2FD5) ||
+                                   (0x2FF0 <= c && c <= 0x2FFB) ||
+                                   (0x3000 <= c && c <= 0x303E) ||
+                                   (0x3041 <= c && c <= 0x3096) ||
+                                   (0x3099 <= c && c <= 0x30FF) ||
+                                   (0x3105 <= c && c <= 0x312D) ||
+                                   (0x3131 <= c && c <= 0x318E) ||
+                                   (0x3190 <= c && c <= 0x31BA) ||
+                                   (0x31C0 <= c && c <= 0x31E3) ||
+                                   (0x31F0 <= c && c <= 0x321E) ||
+                                   (0x3220 <= c && c <= 0x3247) ||
+                                   (0x3250 <= c && c <= 0x4DBF) ||
+                                   (0x4E00 <= c && c <= 0xA48C) ||
+                                   (0xA490 <= c && c <= 0xA4C6) ||
+                                   (0xA960 <= c && c <= 0xA97C) ||
+                                   (0xAC00 <= c && c <= 0xD7A3) ||
+                                   (0xF900 <= c && c <= 0xFAFF) ||
+                                   (0xFE10 <= c && c <= 0xFE19) ||
+                                   (0xFE30 <= c && c <= 0xFE52) ||
+                                   (0xFE54 <= c && c <= 0xFE66) ||
+                                   (0xFE68 <= c && c <= 0xFE6B) ||
+                                   (0xFF01 <= c && c <= 0xFFE6) ||
+                                   (0x1B000 <= c && c <= 0x1B001) ||
+                                   (0x1F200 <= c && c <= 0x1F202) ||
+                                   (0x1F210 <= c && c <= 0x1F23A) ||
+                                   (0x1F240 <= c && c <= 0x1F248) ||
+                                   (0x1F250 <= c && c <= 0x1F251) ||
+                                   (0x20000 <= c && c <= 0x2FFFD) ||
+                                   (0x30000 <= c && c <= 0x3FFFD))
+                                       len++;
+                       }
+               } else if ((*s & 0xE0) == 0xC0) {
+                       assert(n == 0);
+                       n = 1;
+                       c = *s & 0x1F;
+               } else if ((*s & 0xF0) == 0xE0) {
+                       assert(n == 0);
+                       n = 2;
+                       c = *s & 0x0F;
+               } else if ((*s & 0xF8) == 0xF0) {
+                       assert(n == 0);
+                       n = 3;
+                       c = *s & 0x07;
+               } else if ((*s & 0xFC) == 0xF8) {
+                       assert(n == 0);
+                       n = 4;
+                       c = *s & 0x03;
+               } else {
+                       assert(0);
+                       n = 0;
+               }
                s++;
+               if (n == 0) {
+                       if (max != 0) {
+                               if (len > max) {
+                                       *t = t0;
+                                       return len0;
+                               }
+                               if (len == max) {
+                                       *t = s;
+                                       return len;
+                               }
+                       }
+                       t0 = s;
+                       len0 = len;
+               }
        }
+       if (max != 0)
+               *t = s;
        return len;
 }
 
+static size_t
+utf8strlen(char *s, char *e)
+{
+       return utf8strlenmax(s, e, 0, NULL);
+}
+
 /* skip the specified number of UTF-8 characters, but stop at a newline */
 static char *
 utf8skip(char *s, size_t i)
 {
-       while (*s && i > 0) {
-               if ((*s & 0xC0) == 0xC0) {
-                       s++;
-                       while ((*s & 0xC0) == 0x80)
-                               s++;
-               } else if (*s == '\n')
-                       return s;
-               else
-                       s++;
-               i--;
-       }
+       utf8strlenmax(s, NULL, i, &s);
        return s;
 }
 
@@ -434,8 +525,11 @@ SQLrow(int *len, int *numeric, char **re
                                         * correction for a terminal
                                         * screen (1.62 * 2 -> 3 :
                                         * 9.72~10) */
-                                       if (ulen > (size_t) len[i])
+                                       if (ulen > (size_t) len[i]) {
                                                cutafter[i] = 3 * len[i] / 10;
+                                               if (cutafter[i] == 1)
+                                                       cutafter[i]++;
+                                       }
                                }
 
                                /* on each cycle we get closer to the limit */
diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c
--- a/gdk/gdk_heap.c
+++ b/gdk/gdk_heap.c
@@ -412,12 +412,15 @@ GDKupgradevarheap(COLrec *c, var_t v, in
         * indicated by the "free" pointer */
        n = (copyall ? c->heap.size : c->heap.free) >> c->shift;
 
-       /* for memory mapped files, create a backup copy before widening
+       /* Create a backup copy before widening.
         *
-        * this solves a problem that we don't control what's in the
-        * actual file until the next commit happens, so a crash might
-        * otherwise leave the file (and the database) in an
-        * inconsistent state
+        * If the file is memory-mapped, this solves a problem that we
+        * don't control what's in the actual file until the next
+        * commit happens, so a crash might otherwise leave the file
+        * (and the database) in an inconsistent state.  If, on the
+        * other hand, the heap is allocated, it may happen that later
+        * on the heap is extended and converted into a memory-mapped
+        * file.  Then the same problem arises.
         *
         * also see do_backup in gdk_bbp.c */
        filename = strrchr(c->heap.filename, DIR_SEP);
@@ -426,9 +429,11 @@ GDKupgradevarheap(COLrec *c, var_t v, in
        else
                filename++;
        bid = strtol(filename, NULL, 8);
-       if (c->heap.storage == STORE_MMAP &&
-           (BBP_status(bid) & (BBPEXISTING|BBPDELETED)) &&
-           !file_exists(c->heap.farmid, BAKDIR, filename, NULL)) {
+       if ((BBP_status(bid) & (BBPEXISTING|BBPDELETED)) &&
+           !file_exists(c->heap.farmid, BAKDIR, filename, NULL) &&
+           (c->heap.storage != STORE_MEM ||
+            GDKmove(c->heap.farmid, BATDIR, c->heap.filename, NULL,
+                    BAKDIR, filename, NULL) != GDK_SUCCEED)) {
                int fd;
                ssize_t ret = 0;
                size_t size = n << c->shift;
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -2537,15 +2537,3 @@ STRspace(str *ret, const int *l){
        char buf[]= " ", *s= buf;
        return STRrepeat(ret,&s,l);
 }
-
-str
-STRstringLength(int *res, const str *s)
-{
-       str r = NULL;
-       STRRtrim(&r, s);
-       STRLength(res, &r);
-       GDKfree(r);
-       return MAL_SUCCEED;
-}
-
-
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -28,8 +28,6 @@ str_export str strEpilogue(void *ret);
 str_export str STRtostr(str *res, const str *src);
 str_export str STRConcat(str *res, const str *val1, const str *val2);
 str_export str STRLength(int *res, const str *arg1);
-/* length of rtrimed string, needed for sql */
-str_export str STRstringLength(int *res, const str *s);
 str_export str STRBytes(int *res, const str *arg1);
 str_export str STRTail(str *res, const str *arg1, const int *offset);
 str_export str STRSubString(str *res, const str *arg1, const int *offset, 
const int *length);
diff --git a/monetdb5/modules/atoms/str.mal b/monetdb5/modules/atoms/str.mal
--- a/monetdb5/modules/atoms/str.mal
+++ b/monetdb5/modules/atoms/str.mal
@@ -25,7 +25,7 @@ address STRLength
 comment "Return the length of a string.";
 
 command stringlength( s:str ) :int
-address STRstringLength
+address STRSQLLength
 comment "Return the length of a right trimed string (SQL semantics).";
 
 command nbytes( s:str ) :int
diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c
--- a/sql/backends/monet5/sql_result.c
+++ b/sql/backends/monet5/sql_result.c
@@ -230,6 +230,91 @@ sql_timestamp_tostr(void *TS_RES, char *
 }
 
 static int
+STRwidth(const char *s)
+{
+       int len = 0;
+       int c;
+       int n;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to