Changeset: ae96662f93e6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ae96662f93e6
Added Files:
        sql/test/BugTracker-2019/Tests/merge-table-limit.Bug-6756.sql
        sql/test/BugTracker-2019/Tests/merge-table-limit.Bug-6756.stable.err
        sql/test/BugTracker-2019/Tests/merge-table-limit.Bug-6756.stable.out
Modified Files:
        gdk/gdk_string.c
        monetdb5/mal/mal_client.c
        monetdb5/modules/mal/tablet.c
        sql/server/rel_optimizer.c
        sql/test/BugTracker-2019/Tests/All
Branch: Nov2019
Log Message:

Merge with Apr2019 branch.


diffs (truncated from 537 to 300 lines):

diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c
--- a/gdk/gdk_string.c
+++ b/gdk/gdk_string.c
@@ -258,6 +258,41 @@ strPut(Heap *h, var_t *dst, const char *
        }
        /* the string was not found in the heap, we need to enter it */
 
+       if (v[0] != '\200' || v[1] != '\0') {
+               /* check that string is correctly encoded UTF-8; there
+                * was no need to do this earlier: if the string was
+                * found above, it must have gone through here in the
+                * past */
+               int nutf8 = 0;
+               int m = 0;
+               for (size_t i = 0; v[i]; i++) {
+                       if (nutf8 > 0) {
+                               if ((v[i] & 0xC0) != 0x80 ||
+                                   (m != 0 && (v[i] & m) == 0)) {
+                                 badutf8:
+                                       GDKerror("strPut: incorrectly encoded 
UTF-8");
+                                       return 0;
+                               }
+                               m = 0;
+                               nutf8--;
+                       } else if ((v[i] & 0xE0) == 0xC0) {
+                               nutf8 = 1;
+                               if ((v[i] & 0x1E) == 0)
+                                       goto badutf8;
+                       } else if ((v[i] & 0xF0) == 0xE0) {
+                               nutf8 = 2;
+                               if ((v[i] & 0x0F) == 0)
+                                       m = 0x20;
+                       } else if ((v[i] & 0xF8) == 0xF0) {
+                               nutf8 = 3;
+                               if ((v[i] & 0x07) == 0)
+                                       m = 0x30;
+                       } else if ((v[i] & 0x80) != 0) {
+                               goto badutf8;
+                       }
+               }
+       }
+
        pad = GDK_VARALIGN - (h->free & (GDK_VARALIGN - 1));
        if (elimbase == 0) {    /* i.e. h->free < GDK_ELIMLIMIT */
                if (pad < sizeof(stridx_t)) {
@@ -310,56 +345,6 @@ strPut(Heap *h, var_t *dst, const char *
        /* insert string */
        pos = h->free + pad + extralen;
        *dst = (var_t) pos;
-#ifndef NDEBUG
-       /* just before inserting into the heap, make sure that the
-        * string is actually UTF-8 (if we encountered a return
-        * statement before this, the string was already in the heap,
-        * and hence already checked) */
-       if (v[0] != '\200' || v[1] != '\0') {
-               /* not str_nil, must be UTF-8 */
-               size_t i;
-
-               for (i = 0; v[i] != '\0'; i++) {
-                       /* check that v[i] is the start of a validly
-                        * coded UTF-8 sequence: this involves
-                        * checking that the first byte is a valid
-                        * start byte and is followed by the correct
-                        * number of follow-up bytes, but also that
-                        * the sequence cannot be shorter */
-                       if ((v[i] & 0x80) == 0) {
-                               /* 0aaaaaaa */
-                               continue;
-                       } else if ((v[i] & 0xE0) == 0xC0) {
-                               /* 110bbbba 10aaaaaa
-                                * one of the b's must be set*/
-                               assert(v[i] & 0x4D);
-                               i++;
-                               assert((v[i] & 0xC0) == 0x80);
-                       } else if ((v[i] & 0xF0) == 0xE0) {
-                               /* 1110cccc 10cbbbba 10aaaaaa
-                                * one of the c's must be set*/
-                               assert(v[i] & 0x0F || v[i + 1] & 0x20);
-                               i++;
-                               assert((v[i] & 0xC0) == 0x80);
-                               i++;
-                               assert((v[i] & 0xC0) == 0x80);
-                       } else if ((v[i] & 0xF8) == 0xF0) {
-                               /* 11110ddd 10ddcccc 10cbbbba 10aaaaaa
-                                * one of the d's must be set */
-                               assert(v[i] & 0x07 || v[i + 1] & 0x30);
-                               i++;
-                               assert((v[i] & 0xC0) == 0x80);
-                               i++;
-                               assert((v[i] & 0xC0) == 0x80);
-                               i++;
-                               assert((v[i] & 0xC0) == 0x80);
-                       } else {
-                               /* this will fail */
-                               assert((v[i] & 0x80) == 0);
-                       }
-               }
-       }
-#endif
        memcpy(h->base + pos, v, len);
        if (h->hashash) {
                ((BUN *) (h->base + pos))[-1] = strhash;
diff --git a/monetdb5/mal/mal_client.c b/monetdb5/mal/mal_client.c
--- a/monetdb5/mal/mal_client.c
+++ b/monetdb5/mal/mal_client.c
@@ -421,10 +421,10 @@ MCfreeClient(Client c)
                c->glb = NULL;
        }
        if( c->error_row){
-               BBPrelease(c->error_row->batCacheid);
-               BBPrelease(c->error_fld->batCacheid);
-               BBPrelease(c->error_msg->batCacheid);
-               BBPrelease(c->error_input->batCacheid);
+               BBPunfix(c->error_row->batCacheid);
+               BBPunfix(c->error_fld->batCacheid);
+               BBPunfix(c->error_msg->batCacheid);
+               BBPunfix(c->error_input->batCacheid);
                c->error_row = c->error_fld = c->error_msg = c->error_input = 
NULL;
        }
        if( c->wlc)
diff --git a/monetdb5/modules/mal/tablet.c b/monetdb5/modules/mal/tablet.c
--- a/monetdb5/modules/mal/tablet.c
+++ b/monetdb5/modules/mal/tablet.c
@@ -864,7 +864,8 @@ SQLinsert_val(READERtask *task, int col,
                                        task->rowerror[idx]++;
                                        task->errorcnt++;
                                        task->besteffort = 0; /* no longer best 
effort */
-                                       if (BUNappend(task->cntxt->error_row, 
&row, false) != GDK_SUCCEED ||
+                                       if (task->cntxt->error_row == NULL ||
+                                               
BUNappend(task->cntxt->error_row, &row, false) != GDK_SUCCEED ||
                                                
BUNappend(task->cntxt->error_fld, &col, false) != GDK_SUCCEED ||
                                                
BUNappend(task->cntxt->error_msg, SQLSTATE(HY001) MAL_MALLOC_FAIL, false) != 
GDK_SUCCEED ||
                                                
BUNappend(task->cntxt->error_input, err, false) != GDK_SUCCEED) {
@@ -886,7 +887,8 @@ SQLinsert_val(READERtask *task, int col,
                                task->as->error = createException(MAL, 
"sql.copy_from", SQLSTATE(HY001) MAL_MALLOC_FAIL);
                        task->rowerror[idx]++;
                        task->errorcnt++;
-                       if (BUNappend(task->cntxt->error_row, &row, false) != 
GDK_SUCCEED ||
+                       if (task->cntxt->error_row == NULL ||
+                               BUNappend(task->cntxt->error_row, &row, false) 
!= GDK_SUCCEED ||
                                BUNappend(task->cntxt->error_fld, &col, false) 
!= GDK_SUCCEED ||
                                BUNappend(task->cntxt->error_msg, buf, false) 
!= GDK_SUCCEED ||
                                BUNappend(task->cntxt->error_input, err, false) 
!= GDK_SUCCEED) {
@@ -909,7 +911,8 @@ SQLinsert_val(READERtask *task, int col,
        if (task->rowerror) {
                lng row = BATcount(fmt->c);
                MT_lock_set(&errorlock);
-               if (BUNappend(task->cntxt->error_row, &row, false) != 
GDK_SUCCEED ||
+               if (task->cntxt->error_row == NULL ||
+                       BUNappend(task->cntxt->error_row, &row, false) != 
GDK_SUCCEED ||
                        BUNappend(task->cntxt->error_fld, &col, false) != 
GDK_SUCCEED ||
                        BUNappend(task->cntxt->error_msg, "insert failed", 
false) != GDK_SUCCEED ||
                        (err = SQLload_error(task, idx,task->as->nr_attrs)) == 
NULL ||
@@ -1577,11 +1580,7 @@ create_rejects_table(Client cntxt)
                                BBPunfix(cntxt->error_msg->batCacheid);
                        if (cntxt->error_input)
                                BBPunfix(cntxt->error_input->batCacheid);
-               } else {
-                       BBPkeepref(cntxt->error_row->batCacheid);
-                       BBPkeepref(cntxt->error_fld->batCacheid);
-                       BBPkeepref(cntxt->error_msg->batCacheid);
-                       BBPkeepref(cntxt->error_input->batCacheid);
+                       cntxt->error_row = cntxt->error_fld = cntxt->error_msg 
= cntxt->error_input = NULL;
                }
        }
        MT_lock_unset(&mal_contextLock);
@@ -1609,6 +1608,7 @@ SQLload_file(Client cntxt, Tablet *as, b
        task = (READERtask) {
                .cntxt = cntxt,
                .from_stdin = from_stdin,
+               .as = as,
        };
 
        /* create the reject tables */
@@ -1651,7 +1651,6 @@ SQLload_file(Client cntxt, Tablet *as, b
                goto bailout;
        }
 
-       task.as = as;
        task.skip = skip;
        task.quote = quote;
        task.csep = csep;
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -2157,23 +2157,38 @@ rel_push_topn_down(int *changes, mvc *sq
                        ul = rel_project(sql->sa, ul, NULL);
                        ul->exps = exps_copy(sql->sa, r->exps);
                        /* possibly add order by column */
-                       if (add_r)
-                               ul->exps = list_merge(ul->exps, 
exps_copy(sql->sa, r->r), NULL);
+                       if (add_r) {
+                               for (node *n = ((list*)r->r)->h ; n ; n = 
n->next) {
+                                       sql_exp *exp = (sql_exp*) n->data;
+                                       if (rel_has_exp(ul, exp))
+                                               list_append(ul->exps, 
exp_copy(sql->sa, exp));
+                               }
+                       }
                        ul->r = exps_copy(sql->sa, r->r);
                        ul = rel_topn(sql->sa, ul, sum_limit_offset(sql, 
rel->exps));
                        ur = rel_project(sql->sa, ur, NULL);
                        ur->exps = exps_copy(sql->sa, r->exps);
                        /* possibly add order by column */
-                       if (add_r)
-                               ur->exps = list_merge(ur->exps, 
exps_copy(sql->sa, r->r), NULL);
+                       if (add_r) {
+                               for (node *n = ((list*)r->r)->h ; n ; n = 
n->next) {
+                                       sql_exp *exp = (sql_exp*) n->data;
+                                       if (rel_has_exp(ur, exp))
+                                               list_append(ur->exps, 
exp_copy(sql->sa, exp));
+                               }
+                       }
                        ur->r = exps_copy(sql->sa, r->r);
                        ur = rel_topn(sql->sa, ur, sum_limit_offset(sql, 
rel->exps));
                        u = rel_setop(sql->sa, ul, ur, op_union);
                        u->exps = exps_alias(sql->sa, r->exps); 
                        set_processed(u);
                        /* possibly add order by column */
-                       if (add_r)
-                               u->exps = list_merge(u->exps, 
exps_copy(sql->sa, r->r), NULL);
+                       if (add_r) {
+                               for (node *n = ((list*)r->r)->h ; n ; n = 
n->next) {
+                                       sql_exp *exp = (sql_exp*) n->data;
+                                       if (rel_has_exp(u, exp))
+                                               list_append(u->exps, 
exp_copy(sql->sa, exp));
+                               }
+                       }
 
                        if (need_distinct(r)) {
                                set_distinct(ul);
diff --git a/sql/test/BugTracker-2019/Tests/All 
b/sql/test/BugTracker-2019/Tests/All
--- a/sql/test/BugTracker-2019/Tests/All
+++ b/sql/test/BugTracker-2019/Tests/All
@@ -32,3 +32,4 @@ sequences-types.Bug-6745
 alter_table_drop_column.Bug-6749
 HAVE_PYMONETDB?remote-table-non-existent-column.Bug-6750
 cte-union.Bug-6755
+merge-table-limit.Bug-6756
diff --git a/sql/test/BugTracker-2019/Tests/merge-table-limit.Bug-6756.sql 
b/sql/test/BugTracker-2019/Tests/merge-table-limit.Bug-6756.sql
new file mode 100644
--- /dev/null
+++ b/sql/test/BugTracker-2019/Tests/merge-table-limit.Bug-6756.sql
@@ -0,0 +1,105 @@
+START TRANSACTION;
+
+CREATE SCHEMA logs;
+
+CREATE MERGE TABLE logs.test_message (
+    logentry_no BIGINT,
+    logentry_id STRING,
+    processed_timestamp TIMESTAMP,
+    timestamp TIMESTAMP,
+    logsource STRING,
+    logsource_environment STRING,
+    logsource_service STRING,
+    logsource_location STRING,
+    logsource_subsystem STRING,
+    program STRING,
+    program_type STRING,
+    program_name STRING,
+    program_log STRING,
+    program_source STRING,
+    program_thread STRING,
+    log_level STRING,
+    tags JSON,
+    syslog_severity STRING,
+    syslog_facility STRING,
+    syslog_tag STRING,
+    message STRING,
+    structured_data JSON
+);
+
+CREATE TABLE logs.test_message_20190909 (
+    logentry_no BIGINT GENERATED ALWAYS AS
+        IDENTITY (
+           START WITH 2019090900000000000 INCREMENT BY 1
+           MINVALUE 2019090900000000000 MAXVALUE 2019090999999999999
+           CACHE 50 CYCLE
+    ),
+    logentry_id STRING,
+    processed_timestamp TIMESTAMP,
+    timestamp TIMESTAMP,
+    logsource STRING,
+    logsource_environment STRING,
+    logsource_service STRING,
+    logsource_location STRING,
+    logsource_subsystem STRING,
+    program STRING,
+    program_type STRING,
+    program_name STRING,
+    program_log STRING,
+    program_source STRING,
+    program_thread STRING,
+    log_level STRING,
+    tags JSON,
+    syslog_severity STRING,
+    syslog_facility STRING,
+    syslog_tag STRING,
+    message STRING,
+    structured_data JSON
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to