Changeset: e3e0cbbf067a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e3e0cbbf067a
Modified Files:
        MonetDB5/src/modules/mal/tablet_sql.mx
        MonetDB5/src/optimizer/opt_mergetable.mx
Branch: default
Log Message:

Merge with Jun2010 branch.


diffs (truncated from 337 to 300 lines):

diff -r d36c48c97c35 -r e3e0cbbf067a MonetDB5/src/modules/mal/tablet_sql.mx
--- a/MonetDB5/src/modules/mal/tablet_sql.mx    Tue Aug 17 15:16:43 2010 +0200
+++ b/MonetDB5/src/modules/mal/tablet_sql.mx    Tue Aug 17 23:42:13 2010 +0200
@@ -29,7 +29,7 @@
 that should preferable be maintained.
 
 The code below consists of a file reader, which breaks up the
-file into chunks of distinct lines. Then multiple parallel threads 
+file into chunks of distinct lines. Then multiple parallel threads
 grab them, and break them on the field boundaries.
 After all fields are identified this way, the columns are converted
 and stored in the BATs.
@@ -39,7 +39,7 @@
 to distributed cheap and expensive columns over threads.
 
 The file reader overlaps IO with updates of the BAT.
-Also the buffer size of the block stream might be a little small for 
+Also the buffer size of the block stream might be a little small for
 this task (1MB). It has been increased to 8MB, which indeed improved.
 
 The work divider allocates subtasks to threads based on the
@@ -67,7 +67,7 @@
        int next;
        int limit;
        lng *time, wtime;       /* time per col + time per thread */
-       int rounds;     /* how often did we divide the work */
+       int rounds;     /* how often did we divide the work */
        MT_Id tid;
        MT_Sema producer;       /* reader waits for call */
        MT_Sema consumer;       /* data available */
@@ -89,7 +89,7 @@
 
 @-
 The line is broken into pieces directly on their field separators. It assumes 
that we have
-the record in the cache already, so we can do most work quickly. 
+the record in the cache already, so we can do most work quickly.
 Furthermore, it assume a uniform (SQL) pattern, without whitespace skipping, 
but with quote and separator.
 @c
 
@@ -122,7 +122,7 @@
 
 @-
 The parsing of the individual values is straightforward. If the value 
represents
-the null-replacement string then we grab the underlying nil. 
+the null-replacement string then we grab the underlying nil.
 If the string starts with the quote identified from SQL, we locate the tail
 and interpret the body.
 @c
@@ -144,14 +144,14 @@
        } else
        if ( quote && *s == quote ) {
                /* strip the quotes when present */
-               s++;    
+               s++;
                for ( t = e = s; *t ; t++)
                        if ( *t == quote) e = t;
                *e = 0;
                adt = fmt->frstr(fmt, fmt->adt, s, e, 0);
                /* The user might have specified a null string escape
                  e.g. NULL as '', which should be tested */
-               if (adt == NULL && s == e && fmt->nullstr && 
+               if (adt == NULL && s == e && fmt->nullstr &&
                        strncasecmp(s, fmt->nullstr, fmt->null_length+1) == 0){
                        adt = fmt->nildata;
                        fmt->c[0]->T->nonil = 0;
@@ -274,8 +274,8 @@
                                        GDKerror(errmsg);
                                GDKfree(errline);
                                goto errors;
-                       } 
-               } 
+                       }
+               }
 
                /* eat away the column separator */
                for ( ; *line ; line++)
@@ -292,7 +292,7 @@
                        }
                /* not enough fields */
                if ( i < as->nr_attrs-1)  {
-                       snprintf(errmsg,BUFSIZ, "missing separator '%s' line " 
BUNFMT " field " BUNFMT "\n", 
+                       snprintf(errmsg,BUFSIZ, "missing separator '%s' line " 
BUNFMT " field " BUNFMT "\n",
                                         fmt->sep, BATcount(fmt->c[0]) + 1 + 
task->next, i);
                errors:
                        /* we save all errors detected */
@@ -306,7 +306,7 @@
                        }
                        as->error = GDKstrdup(errmsg);
                        mal_unset_lock(mal_copyLock,"tablet line break");
-                       for (i = 0; i < as->nr_attrs ; i++) 
+                       for (i = 0; i < as->nr_attrs ; i++)
                                task->fields[i][idx] = NULL ;
                        break;
                }
@@ -435,7 +435,7 @@
 #ifdef _DEBUG_TABLET_
        mnstr_printf(GDKout,"SQLloader started\n");
 #endif
-       while( task->ateof == 0) 
+       while( task->ateof == 0)
        {
                MT_down_sema(task->producer, "tablet loader");
 #ifdef _DEBUG_TABLET_
@@ -461,7 +461,10 @@
        READERtask ptask[16];
        int threads= (!maxrow || maxrow > (1<<16)) ? (GDKnr_threads < 16? 
GDKnr_threads: 16):1;
        lng lio =0,  tio, t1 = 0, total=0, iototal=0;
-       int vmtrim = GDK_vm_trim; 
+       int vmtrim = GDK_vm_trim;
+
+       for (i = 0; i < 16; i++)
+               ptask[i].cols = 0;
 
        /* trimming process should not be active during this process. */
        /* on sf10 experiments it showed a slowdown of a factor 2 on */
@@ -479,6 +482,16 @@
        task->fields =  (char***) GDKzalloc(as->nr_attrs * sizeof(char*));
        task->cols =  (int*) GDKzalloc(as->nr_attrs * sizeof(int));
        task->time =  (lng*) GDKzalloc(as->nr_attrs * sizeof(lng));
+       task->base = GDKzalloc(b->size+2);
+
+       if ( task->fields == 0 ||
+                task->cols == 0 ||
+                task->time == 0 ||
+                task->base == 0){
+               as->error = M5OutOfMemory;
+               goto bailout;
+       }
+
        task->as= as;
        task->quote = quote;
        task->csep = csep;
@@ -486,7 +499,6 @@
        task->rsep = rsep;
        task->rseplen = strlen(rsep);
        task->errbuf =  cntxt->errbuf;
-       task->base = GDKzalloc(b->size+2);      
        task->basesize = b->size+2;
        task->input = task->base + 1; /* wrap the buffer with null bytes */
        task->base[b->size+1]= 0;
@@ -497,12 +509,6 @@
        task->b = b;
        task->out = out;
 
-       if ( task->fields == 0 ||
-                task->cols == 0 ||
-                task->time == 0 ){
-               as->error = M5OutOfMemory;
-               return BUN_NONE;
-       }
 #ifdef MLOCK_TST
        mlock( task->fields, as->nr_attrs * sizeof(char *));
        mlock( task->cols, as->nr_attrs * sizeof(int));
@@ -522,16 +528,7 @@
                task->fields[i] = GDKzalloc(sizeof(char*) * task->limit);
                if ( task->fields[i] == 0){
                        as->error = M5OutOfMemory;
-                       while( i != 0)
-                               GDKfree(task->fields[--i]);
-                       GDKfree(task->base);
-                       GDKfree(task->fields);
-                       GDKfree(task->cols);
-                       GDKfree(task->time);
-#ifdef MLOCK_TST
-                       munlockall();
-#endif
-                       return BUN_NONE;
+                       goto bailout;
                }
 #ifdef MLOCK_TST
                mlock( task->fields[i], sizeof(char*) * task->limit);
@@ -551,7 +548,7 @@
                ptask[j].cols =  (int*) GDKzalloc(as->nr_attrs * sizeof(int));
                if ( ptask[j].cols == 0){
                        as->error = M5OutOfMemory;
-                       return BUN_NONE;
+                       goto bailout;
                }
 #ifdef MLOCK_TST
                mlock( ptask[j].cols, sizeof(char*) * task->limit);
@@ -584,7 +581,7 @@
                memcpy(task->input, task->b->buf, task->b->size);
 
 #ifdef _DEBUG_TABLET_
-               mnstr_printf(GDKout,"read pos=" SZFMT " len=" SZFMT " size=" 
SZFMT " eof=%d \n", 
+               mnstr_printf(GDKout,"read pos=" SZFMT " len=" SZFMT " size=" 
SZFMT " eof=%d \n",
                        task->b->pos, task->b->len, task->b->size, 
task->b->eof);
 #endif
 
@@ -619,7 +616,7 @@
                           character within the quoted fields a
                           character may be escaped with a backslash
                           The user should supply the correct number
-                          of fields.  
+                          of fields.
                           In the first phase we simply break the lines
                                at the record boundary.
                        */
@@ -630,7 +627,7 @@
                                                        e++;
                                                        continue;
                                                }
-                                               if (*e == *rsep ) 
+                                               if (*e == *rsep )
                                                        break;
                                        }
                                else
@@ -642,36 +639,36 @@
                                                if (*e == *rsep && strncmp(e, 
rsep, rseplen) == 0)
                                                        break;
                                        }
-                               if ( *e == 0) 
+                               if ( *e == 0)
                                        e = 0;  /* nonterminated record, we 
need more */
                        } else if ( rseplen == 1 ) {
                                for ( ; *e ; e++) {
                                        if (*e == q)
-                                               q = 0;
-                                       else if (*e == quote)
-                                               q = *e;
-                                       else if (*e == '\\') {
-                                               if (e[1])
-                                                       e++;
+                                               q = 0;
+                                       else if (*e == quote)
+                                               q = *e;
+                                       else if (*e == '\\') {
+                                               if (e[1])
+                                                       e++;
                                        } else if (!q && *e == *rsep )
                                                break;
                                }
-                               if ( *e == 0) 
-                                       e = 0;  /* nonterminated record, we 
need more */
+                               if ( *e == 0)
+                                       e = 0;    /* nonterminated record, we 
need more */
                        } else {
                                for ( ; *e ; e++) {
                                        if (*e == q)
-                                               q = 0;
-                                       else if (*e == quote)
-                                               q = *e;
-                                       else if (*e == '\\') {
-                                               if (e[1])
-                                                       e++;
-                                       } else if (!q && *e == *rsep && 
strncmp(e, rsep, rseplen) == 0 ) 
+                                               q = 0;
+                                       else if (*e == quote)
+                                               q = *e;
+                                       else if (*e == '\\') {
+                                               if (e[1])
+                                                       e++;
+                                       } else if (!q && *e == *rsep && 
strncmp(e, rsep, rseplen) == 0 )
                                                break;
                                }
                                if (*e == 0 )
-                                       e = 0;  /* nonterminated record, we 
need more */
+                                       e = 0;    /* nonterminated record, we 
need more */
                        }
 
                        /* check for incomplete line and end of buffer 
condition*/
@@ -723,7 +720,7 @@
                if ( task->next ){
                        if ( res == 0){
                                SQLworkdivider(task, ptask, (int) as->nr_attrs, 
threads);
-                               
+
                                /* activate the workers to update the BATs */
                                for( j= 0; j < threads ; j++) {
                                        /* stage two, update the BATs */
@@ -751,12 +748,12 @@
        }
 
        if (GDKdebug & GRPalgorithms) {
-               if (cnt < (BUN) maxrow   && maxrow > 0) 
+               if (cnt < (BUN) maxrow   && maxrow > 0)
                        /* providing a precise count is not always easy, 
instead consider maxrow as an upper bound */
                        mnstr_printf(GDKout,"#SQLload_file: read error, tuples 
missing (after loading " BUNFMT " records)\n", BATcount(as->format[0].c[0]));
                mnstr_printf(GDKout,"# COPY reader time "LLFMT " line break " 
LLFMT " io " LLFMT "\n", total, lio, iototal);
 #ifdef _DEBUG_TABLET_
-               for( i=0; i< as->nr_attrs; i++) 
+               for( i=0; i< as->nr_attrs; i++)
                                mnstr_printf(GDKout,LLFMT " ", task->time[i]);
                mnstr_printf(GDKout,"\n");
 #endif
@@ -785,7 +782,7 @@
 #ifdef _DEBUG_TABLET_
                mnstr_printf(GDKout,"Found " BUNFMT" tuples\n",cnt);
 #endif
-       for( i=0; i< as->nr_attrs; i++) 
+       for( i=0; i< as->nr_attrs; i++)
                GDKfree(task->fields[i]);
        GDKfree(task->fields);
        GDKfree(task->cols);
@@ -799,6 +796,29 @@
        /* restore system setting */
        GDK_vm_trim = vmtrim;
        return res < 0 ? BUN_NONE : cnt;
+
+  bailout:
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to