Changeset: e758b9cc48c5 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e758b9cc48c5 Removed Files: monetdb5/modules/mal/tablet_mk.c monetdb5/modules/mal/tablet_si.c Modified Files: clients/Tests/exports.stable.out clients/mapiclient/Tests/stethoscope--help.stable.err Branch: default Log Message:
Merge with latest changes diffs (truncated from 2404 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -3648,6 +3648,7 @@ void setPolymorphic(InstrPtr p, int tpe, void setReturnArgument(InstrPtr p, int varid); str setScenario(Client c, str nme); str setStartPoint(Module cntxt, str mod, str fcn); +void setVarName(MalBlkPtr mb, int i, str nme); void setVarType(MalBlkPtr mb, int i, int tpe); str setWriteModeRef; void showAllScenarios(stream *f); diff --git a/clients/mapiclient/Tests/stethoscope--help.stable.err b/clients/mapiclient/Tests/stethoscope--help.stable.err --- a/clients/mapiclient/Tests/stethoscope--help.stable.err +++ b/clients/mapiclient/Tests/stethoscope--help.stable.err @@ -21,7 +21,7 @@ The trace options (default 'ISTest'): S = monitor start of instruction profiling a = aggregate clock ticks per instruction e = event counter - f = module.function name + f = enclosing module.function name i = instruction counter I = interpreter thread number T = wall clock time diff --git a/monetdb5/modules/mal/tablet_mk.c b/monetdb5/modules/mal/tablet_mk.c deleted file mode 100644 --- a/monetdb5/modules/mal/tablet_mk.c +++ /dev/null @@ -1,798 +0,0 @@ -/* - * The contents of this file are subject to the MonetDB Public License - * Version 1.1 (the "License"); you may not use this file except in - * compliance with the License. You may obtain a copy of the License at - * http://www.monetdb.org/Legal/MonetDBLicense - * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the - * License for the specific language governing rights and limitations - * under the License. - * - * The Original Code is the MonetDB Database System. - * - * The Initial Developer of the Original Code is CWI. - * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. - * Copyright August 2008-2012 MonetDB B.V. - * All Rights Reserved. - */ - -/* - * @f tablet_si - * @a Niels Nes, Martin Kersten, Stratos Idreos - * - * @- Parallel bulk load for SQL - * The COPY INTO command for SQL is heavily CPU bound, which means - * that ideally we would like to exploit the multi-cores to do that - * work in parallel. - * Complicating factors are the initial record offset, the - * possible variable length of the input, and the original sort order - * that should preferrable be maintained. - * - * The code below consists of a file reader, which breaks up the - * file into distinct lines/fields. Then multiple parallel threads - * can grab them, convert the value and update the underlying tables. - * - * The threads get a reference to a private copy of the READERtask. - * It includes a list of columns they should handle. This is a basis - * to distributed cheap and expensive columns over threads. - * - * A double buffering scheme might improve CPU and IO concurrent work. - * Readers and writers now overlap. - * Also the buffer size of the block stream might be a little small for - * this task (1MB). It has been increased to 8MB, which indeed improved. - * - * The work divider allocates subtasks to threads based on the - * observed time spending so far. - */ -#include "monetdb_config.h" -#include "tablet.h" -#include "algebra.h" -#include "histogram.h" - -#include <string.h> -#include <ctype.h> -#include <gdk_posix.h> - -#define _SLICE_TABLET_ MK - -/* All the params below should eventually be dynamically defined per column - For simplicity in the initial stages keep them global and fixed */ -#ifdef _SLICE_TABLET_ -oid SmallSliceSize = 2; /* Assume this is known */ -BAT *BigSlice = NULL; /*toy BAT to play with the algos without worying for SQL for now*/ -#endif - -typedef struct{ - int next; - int limit; - lng *time, wtime; /* time per col + time per thread */ - int rounds; /* how often did we divide the work */ - MT_Id tid; - MT_Sema sema; /* threads wait for work , negative next implies exit*/ - MT_Sema reply; /* let reader continue */ - Tablet *as; - char *errbuf; - char *separator; - size_t seplen; - char quote; - int *cols; /* columns to handle */ - char ***fields; -} READERtask; - -/* - * @- - * The parsing of the individual values is straightforward. If the value represents - * the null-replacement string then we grab the underlying nil. - * If the string starts with the quote identified from SQL, we locate the tail - * and interpret the body. - */ -#define _SLICE_TABLET_MK -#ifdef _SLICE_TABLET_MK -Histogram h1 = NULL; -Histogram h2 = NULL; - - -Tablet *SLICEinit(Tablet *as, int slices, Histogram h[]) -{ - Tablet *body; - int len= sizeof(Tablet) + as->nr_attrs * sizeof(Column); - BUN i; - - assert(SLICES >= slices); - for ( i=0; i< as->nr_attrs; i++) - body->columns[i].c[1] = BATnew(TYPE_void, as->columns[i].adt,0); - for ( i = 0; i< slices; i++) - /* init histogram */; - return body; -} -#endif - -static inline ptr* -get_val(Column * fmt, char *s, char quote, str *err, int c) -{ - char buf[BUFSIZ]; - char *e, *t; - ptr *adt;; - - /* include testing on the terminating null byte !! */ - if (fmt->nullstr && strncasecmp(s, fmt->nullstr, fmt->null_length+1) == 0){ -#ifdef _DEBUG_TABLET_ - mnstr_printf(GDKout,"nil value '%s' (%d) found in :%s\n",fmt->nullstr,fmt->nillen,(s?s:"")); -#endif - adt = fmt->nildata; - fmt->c[0]->T->nonil = 0; - } else if ( quote && *s == quote ) { - s++; /* find the last quote */ - for ( t = e = s; *t ; t++) - if ( *t == quote) e = t; - *e = 0; - adt = fmt->frstr(fmt, fmt->adt, s, e, 0); - } else { - for( e=s; *e; e++) - ; - adt = fmt->frstr(fmt, fmt->adt, s, e, 0); - } - - if (!adt) { - char *val; - val = *s ? GDKstrdup(s) : GDKstrdup(""); - if ( *err == NULL){ - snprintf(buf,BUFSIZ, "value '%s' from line " BUNFMT - " field %d not inserted, expecting type %s\n", val, BATcount(fmt->c[0])+1, c+1, fmt->type); - *err= GDKstrdup(buf); - } - GDKfree(val); - /* replace it with a nil */ - adt = fmt->nildata; - fmt->c[0]->T->nonil = 0; - } - /* key maybe NULL but thats not a problem, as long as we have void */ - if (fmt->raw){ - mnstr_write(fmt->raw,adt,ATOMsize(fmt->adt),1); - } - - return adt; -} - -void -replaceVal(BAT * b, int position, ValPtr val) -{ - switch( val->vtype){ - case TYPE_bte: *(bte*)Tloc(b,position) = val->val.btval; - case TYPE_sht: *(sht*)Tloc(b,position) = val->val.shval; - case TYPE_int: *(int*)Tloc(b,position) = val->val.ival; - case TYPE_lng: *(lng*)Tloc(b,position) = val->val.lval; - case TYPE_dbl: *(dbl*)Tloc(b,position) = val->val.dval; - case TYPE_flt: *(flt*)Tloc(b,position) = val->val.fval; - } -} - -#ifdef _SLICE_TABLET_ -static int -Slice(READERtask *task, Column * fmt, ptr key, str *err, int col) -{ - /*naive : fill big, fill small, then rearrange with every new value */ - char buf[BUFSIZ]; - int i; - ptr *adt; - BATiter bsi; - BAT *sample, *sortedSample; - ValPtr vmin, vmax; - /*toy values*/ - int sampleSize=2; - int bins=2; - int distance =0; - - /*sample to get min-max*/ - sample = BATnew(fmt->c[0]->htype, fmt->c[0]->ttype,sampleSize); - for ( i = 0; i< sampleSize ; i++){ - adt=get_val(&fmt[col], task->fields[col][i], task->quote, err, col); - bunfastins(sample, key, adt); - } - sortedSample = BATtsort(sample); - bsi = bat_iterator(sortedSample); - BATprint(sample); - BATprint(sortedSample); - vmin= (ValPtr) GDKzalloc(sizeof(ValRecord)); - vmax= (ValPtr) GDKzalloc(sizeof(ValRecord)); - VALset(vmin, fmt->c[0]->ttype, BUNtail(bsi,0)); - VALset(vmax, fmt->c[0]->ttype, BUNtail(bsi,sampleSize-1)); - - printf("min %d \n", vmin->val.ival); - printf("max %d \n", vmax->val.ival); - - for ( i = 0; i< task->next ; i++){ - ValPtr newValue = (ValPtr) GDKzalloc(sizeof(ValRecord)); - adt=get_val(&fmt[col], task->fields[col][i], task->quote, err, col); - VALset(newValue, fmt->c[0]->ttype, adt); - - /*fill small first*/ - if (BATcount(fmt->c[0]) < SmallSliceSize){ - if (h1 == NULL) - h1 = HSTnew(bins, vmin, vmax); - bunfastins(fmt->c[0], key, adt); - HSTincrement(h1, newValue); - continue; - } - - /*equally fill big*/ - if (BigSlice == NULL) - BigSlice = BATnew(fmt->c[0]->htype, fmt->c[0]->ttype,2*SmallSliceSize); - - if (BATcount(BigSlice) < SmallSliceSize){ - if (h2 == NULL) - h2 = HSTnew(bins, vmin, vmax); - bunfastins(BigSlice, key, adt); - HSTincrement(h2, newValue); - continue; - } - - /*choose target BAT in order to balance the histogram distance*/ - distance = HSTeuclidianWhatIf(h1,h2,newValue); - if (distance==0){ - printf(" add to big\n"); - bunfastins(BigSlice, key, adt); - HSTincrement(h2, newValue); - }else{ - int j; - bit moved = FALSE; - BATiter bi; - ValPtr curVal; - int sizeSmall = BATcount(fmt->c[0]); - printf("add to small\n"); - //bunfastins(fmt->c[0], key, adt); - HSTincrement(h1, newValue); - - /* pick a tuple to move to the big one */ - bi = bat_iterator(fmt->c[0]); - curVal= (ValPtr) GDKzalloc(sizeof(ValRecord)); - for (j=0; j<sizeSmall; j++){ - VALset(curVal, fmt->c[0]->ttype, BUNtail(bi,j)); - distance = HSTeuclidianWhatIfMove(h1,h2,curVal); - if (distance==0){ - moved = TRUE; - printf("found val to replace %d %d with %d\n",j,curVal->val.ival, newValue->val.ival); - replaceVal(fmt->c[0], j, newValue); - HSTprintf(h1); - HSTdecrement(h1,curVal); - HSTprintf(h1); - bunfastins(BigSlice, key,&curVal->val.ival); - HSTincrement(h2,curVal); - BATprint(fmt->c[0]); - BATprint(BigSlice); - break; - } - } - /*could not find a tuple to move*/ - if (!moved){ - printf(" could not find a tuple to move: add to big\n"); - bunfastins(BigSlice, key, adt); - HSTincrement(h2, newValue); - HSTdecrement(h1, newValue); _______________________________________________ Checkin-list mailing list [email protected] http://mail.monetdb.org/mailman/listinfo/checkin-list
