Changeset: cc2f920e521e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cc2f920e521e
Added Files:
        monetdb5/modules/mal/mal_weld.c
        monetdb5/modules/mal/mal_weld.h
        monetdb5/modules/mal/mal_weld.mal
        sql/backends/monet5/rel_weld.c
        sql/backends/monet5/rel_weld.h
Modified Files:
        monetdb5/modules/mal/Makefile.ag
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        sql/backends/monet5/Makefile.ag
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/rel_bin.h
        sql/backends/monet5/sql_gencode.c
        sql/backends/monet5/sql_statement.c
        sql/backends/monet5/sql_statement.h
Branch: rel-weld
Log Message:

generate weld code from the relational algebra

It currently works to generate Weld code for simple queries involving the 
Select and Project operators


diffs (truncated from 1117 to 300 lines):

diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -11,7 +11,7 @@ INCLUDES = ../../mal ../atoms ../kernel 
        ../../../common/utils \
        ../../../gdk \
        $(pcre_CFLAGS) $(zlib_CFLAGS) $(BZIP_INCS) $(MSGCONTROL_FLAGS) \
-       $(openssl_CFLAGS)
+       $(openssl_CFLAGS) $(WELD_INCS)
 
 MTSAFE
 
@@ -30,6 +30,7 @@ lib_mal = {
                language.c language.h \
                mal_io.c mal_io.h \
                mal_mapi.c mal_mapi.h \
+               mal_weld.c mal_weld.h \
                manual.c manual.h \
                mat.c mat.h \
                mdb.c mdb.h \
@@ -50,6 +51,7 @@ lib_mal = {
                sample.c sample.h \
                json_util.c json_util.h \
                calc.c batcalc.c
+       LIBS = $(WELD_LIBS)
 }
 
 headers_mal = {
@@ -61,7 +63,7 @@ headers_mal = {
                iterator.mal clients.mal \
                factories.mal groupby.mal mdb.mal pcre.mal mat.mal \
                transaction.mal oltp.mal wlc.mal \
-               mal_mapi.mal sabaoth.mal remote.mal  \
+               mal_mapi.mal mal_weld.mal sabaoth.mal remote.mal  \
                txtsim.mal \
                tokenizer.mal sample.mal json_util.mal \
                batmtime.mal querylog.mal sysmon.mal
@@ -70,7 +72,7 @@ headers_mal = {
 headers_auto = {
        HEADERS = mal
        DIR = libdir/monetdb5/autoload
-       SOURCES = 01_calc.mal 01_batcalc.mal
+       SOURCES = 01_calc.mal 01_batcalc.mal mal_weld.mal
 }
 
 headers_hge = {
diff --git a/monetdb5/modules/mal/mal_weld.c b/monetdb5/modules/mal/mal_weld.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/mal_weld.c
@@ -0,0 +1,288 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V.
+ */
+
+#include "monetdb_config.h"
+#include "gdk.h"
+#include "mal_exception.h"
+#include "mal_interpreter.h"
+#include "mal_instruction.h"
+#include "mal_weld.h"
+#include "weld.h"
+
+#define STR_BUF_SIZE 4096
+#define WELD_DEBUG 1
+
+#define getOrSetStructMemberImpl(ADDR, TYPE, VALUE, OP)                 \
+       if ((long)*ADDR % sizeof(TYPE) != 0)                                \
+               *ADDR += sizeof(TYPE) - (long)*ADDR % sizeof(TYPE); /* aling */ 
\
+       if (OP == OP_GET)                                                   \
+               *(TYPE *)VALUE = *(TYPE *)(*ADDR); /* get */                    
\
+       else                                                                \
+               *(TYPE *)(*ADDR) = *(TYPE *)VALUE; /* set */                    
\
+       *ADDR += sizeof(TYPE);                             /* increase */
+
+
+void getOrSetStructMember(char **addr, int type, const void *value, int op) {
+       if (type == TYPE_bte) {
+               getOrSetStructMemberImpl(addr, char, value, op);
+       } else if (type == TYPE_int) {
+               getOrSetStructMemberImpl(addr, int, value, op);
+       } else if (type == TYPE_lng) {
+               getOrSetStructMemberImpl(addr, long, value, op);
+       } else if (type == TYPE_flt) {
+               getOrSetStructMemberImpl(addr, float, value, op);
+       } else if (type == TYPE_dbl) {
+               getOrSetStructMemberImpl(addr, double, value, op);
+       } else if (type == TYPE_str) {
+               getOrSetStructMemberImpl(addr, char*, value, op);
+       } else if (type == TYPE_ptr) {
+               /* TODO - will assume that all pointers have the same size */
+               getOrSetStructMemberImpl(addr, char*, value, op);
+       } else if (ATOMstorage(type) != type) {
+               return getOrSetStructMember(addr, ATOMstorage(type), value, op);
+       }
+}
+
+str getWeldType(int type) {
+       if (type == TYPE_bte)
+               return "i8";
+       else if (type == TYPE_int)
+               return "i32";
+       else if (type == TYPE_lng)
+               return "i64";
+       else if (type == TYPE_flt)
+               return "f32";
+       else if (type == TYPE_dbl)
+               return "f64";
+       else if (type == TYPE_str)
+               return "vec[i8]";
+       else if (ATOMstorage(type) != type)
+               return getWeldType(ATOMstorage(type));
+       else
+               return NULL;
+}
+
+static str getWeldUTypeFromWidth(int width) {
+       if (width == 1)
+               return "u8";
+       else if (width == 2)
+               return "u16";
+       else if (width == 4)
+               return "u32";
+       else
+               return "u64";
+}
+
+static int getMalTypeFromWidth(int width) {
+       if (width == 1)
+               return TYPE_bte;
+       else if (width == 2)
+               return TYPE_sht;
+       else if (width == 4)
+               return TYPE_int;
+       else
+               return TYPE_lng;
+}
+
+void dumpWeldProgram(str program, FILE *f) {
+       int i, j, tabs = 0, print_tabs = 0, print_before = 1;
+       for (i = 0; i < (int)strlen(program); i++) {
+               char curr = program[i];
+               char prev = i > 0 ? program[i - 1] : '\0';
+               if (curr == '(' || (curr == '|' && prev != ' ')) {
+                       ++tabs;
+                       print_tabs = 1;
+               } else if (curr == ';') {
+                       print_tabs = 1;
+               } else if (curr == ')') {
+                       --tabs;
+                       print_before = 0;
+                       print_tabs = 1;
+
+               }
+               if (print_before)
+                       fputc(curr, f);
+               if (print_tabs) {
+                       fputc('\n', f);
+                       for (j = 0; j < tabs; j++) {
+                               fputc('\t', f);
+                       }
+               }
+               if (!print_before)
+                       fputc(curr, f);
+               print_tabs = 0;
+               print_before = 1;
+       }
+}
+
+
+static void dumpProgram(MalBlkPtr mb, str program) {
+       FILE *f = fopen(tmpnam(NULL), "w");
+       int i;
+       for (i = 0; i < mb->stop; i++) {
+               fprintInstruction(f, mb, NULL, mb->stmt[i], LIST_MAL_ALL);
+       }
+       fprintf(f, "\n\n\n\n");
+       dumpWeldProgram(program, f);
+       fclose(f);
+}
+
+static long getTimeNowMs(void) {
+    struct timeval timecheck;
+    gettimeofday(&timecheck, NULL);
+    return (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000;
+}
+
+str
+WeldRun(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void)cntxt;
+
+       str programBody = *getArgReference_str(stk, pci, pci->retc);
+       str program = malloc(strlen(programBody) + 2 * STR_BUF_SIZE);
+       int *names = (int*)*getArgReference_ptr(stk, pci, pci->retc + 1);
+       int i, j, headerLen = 0;
+
+       /* Build the input stmt, e.g.: |in13:i32, in50:vec[i8]| */
+       for (i = pci->retc + 2; i < pci->argc; i++) { /* skip wstate and names 
*/
+               int type = getArgType(mb, pci, i);
+               int namesIdx = i - (pci->retc + 2);
+               if (isaBatType(type) && getBatType(type) != TYPE_str) {
+                       headerLen += sprintf(program + headerLen, " 
in%d:vec[%s],", names[namesIdx],
+                                                                
getWeldType(getBatType(type)));
+               } else if (isaBatType(type) && getBatType(type) == TYPE_str) {
+                       bat bid = *getArgReference_bat(stk, pci, i);
+                       BAT *b = BATdescriptor(bid);
+                       if (b == NULL) throw(MAL, "weld.run", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING": %d", getArg(pci, i));
+                       headerLen += sprintf(
+                               program + headerLen, " in%d:vec[%s], 
in%dstr:vec[i8], in%dstroffset:i64,",
+                               names[namesIdx], 
getWeldUTypeFromWidth(b->twidth), names[namesIdx], names[namesIdx]);
+               } else {
+                       headerLen +=
+                               sprintf(program + headerLen, " in%d:%s,", 
names[namesIdx], getWeldType(type));
+               }
+       }
+       program[0] = '|';
+       program[headerLen - 1] = '|';
+       program = strcat(program, programBody);
+
+       weld_error_t e = weld_error_new();
+       weld_conf_t conf = weld_conf_new();
+       (void)dumpProgram; /* supress the unused warning */
+#ifdef WELD_DEBUG
+       dumpProgram(mb, program);
+       weld_conf_set(conf, "weld.compile.dumpCode", "true");
+       weld_conf_set(conf, "weld.compile.dumpCodeDir", "/tmp");
+#endif
+       char nrThreads[8], memLimit[64];
+       sprintf(nrThreads, "%d", GDKnr_threads);
+       sprintf(memLimit, "%ld", 256L * 1L << 30); /* 256 GB */
+       weld_conf_set(conf, "weld.threads", nrThreads);
+       weld_conf_set(conf, "weld.memory.limit", memLimit);
+       long start = getTimeNowMs();
+       weld_module_t m = weld_module_compile(program, conf, e);
+       if (weld_error_code(e)) {
+               throw(MAL, "weld.run", PROGRAM_GENERAL ": %s", 
weld_error_message(e));
+       }
+       long elapsed = getTimeNowMs() - start;
+       fprintf(stderr, "%ld,", elapsed);
+
+       /* Prepare the input for Weld. We're building an array that has the 
layout of a struct */
+       /* Max possible size is when we only have string bats: 2 ptrs for theap 
and tvheap and 4 lngs
+        * for batCount, hseqbase, stroffset and tvheap->size. */
+       char *inputStruct = malloc((pci->argc - pci->retc) * (2 * sizeof(void 
*) + 3 * sizeof(lng)));
+       char *inputPtr = inputStruct;
+       for (i = pci->retc + 2; i < pci->argc; i++) { /* skip wstate and names 
*/
+               int type = getArgType(mb, pci, i);
+               if (isaBatType(type)) {
+                       bat bid = *getArgReference_bat(stk, pci, i);
+                       BAT *b = BATdescriptor(bid);
+                       if (b == NULL) throw(MAL, "weld.run", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING": %d", getArg(pci, i));
+                       if (BATtdense(b)) {
+                               fprintf(stderr, "bat is dense: %d\n", 
getArg(pci, i));
+                       }
+                       getOrSetStructMember(&inputPtr, TYPE_ptr, 
&b->theap.base, OP_SET);
+                       getOrSetStructMember(&inputPtr, TYPE_lng, &b->batCount, 
OP_SET);
+                       if (getBatType(type) == TYPE_str) {
+                               getOrSetStructMember(&inputPtr, TYPE_str, 
&b->tvheap->base, OP_SET);
+                               getOrSetStructMember(&inputPtr, TYPE_lng, 
&b->tvheap->size, OP_SET);
+                               lng offset = b->twidth <= 2 ? GDK_VAROFFSET : 0;
+                               getOrSetStructMember(&inputPtr, TYPE_lng, 
&offset, OP_SET);
+                       }
+               } else {
+                       getOrSetStructMember(&inputPtr, type, 
getArgReference(stk, pci, i), OP_SET);
+                       if (type == TYPE_str) {
+                               long len = strlen(*getArgReference_str(stk, 
pci, i));
+                               getOrSetStructMember(&inputPtr, TYPE_lng, &len, 
OP_SET);
+                       }
+               }
+       }
+
+       /* Run the weld program */
+       weld_value_t arg = weld_value_new(inputStruct);
+       weld_value_t result = weld_module_run(m, conf, arg, e);
+
+       /* Retrieve the output */
+       char *outputStruct = weld_value_data(result);
+       for (i = 0; i < pci->retc; i++) {
+               int type = getArgType(mb, pci, i);
+               if (isaBatType(type)) {
+                       BAT *b = NULL;
+                       char *base = NULL;
+                       long size = 0;
+                       getOrSetStructMember(&outputStruct, TYPE_ptr, &base, 
OP_GET);
+                       getOrSetStructMember(&outputStruct, TYPE_lng, &size, 
OP_GET);
+                       if (getBatType(type) == TYPE_str) {
+                               char *strbase = NULL;
+                               char *strsize = 0;
+                               getOrSetStructMember(&outputStruct, TYPE_str, 
&strbase, OP_GET);
+                               getOrSetStructMember(&outputStruct, TYPE_lng, 
&strsize, OP_GET);
+                               /* Find the matching vheap from the input bats 
*/
+                               for (j = pci->retc; j < pci->argc; j++) {
+                                       int inputType = getArgType(mb, pci, j);
+                                       if (isaBatType(inputType) && 
getBatType(inputType) == TYPE_str) {
+                                               bat inid = 
*getArgReference_bat(stk, pci, j);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to