Changeset: cc2f920e521e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cc2f920e521e
Added Files:
monetdb5/modules/mal/mal_weld.c
monetdb5/modules/mal/mal_weld.h
monetdb5/modules/mal/mal_weld.mal
sql/backends/monet5/rel_weld.c
sql/backends/monet5/rel_weld.h
Modified Files:
monetdb5/modules/mal/Makefile.ag
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
sql/backends/monet5/Makefile.ag
sql/backends/monet5/rel_bin.c
sql/backends/monet5/rel_bin.h
sql/backends/monet5/sql_gencode.c
sql/backends/monet5/sql_statement.c
sql/backends/monet5/sql_statement.h
Branch: rel-weld
Log Message:
generate weld code from the relational algebra
It currently works to generate Weld code for simple queries involving the
Select and Project operators
diffs (truncated from 1117 to 300 lines):
diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -11,7 +11,7 @@ INCLUDES = ../../mal ../atoms ../kernel
../../../common/utils \
../../../gdk \
$(pcre_CFLAGS) $(zlib_CFLAGS) $(BZIP_INCS) $(MSGCONTROL_FLAGS) \
- $(openssl_CFLAGS)
+ $(openssl_CFLAGS) $(WELD_INCS)
MTSAFE
@@ -30,6 +30,7 @@ lib_mal = {
language.c language.h \
mal_io.c mal_io.h \
mal_mapi.c mal_mapi.h \
+ mal_weld.c mal_weld.h \
manual.c manual.h \
mat.c mat.h \
mdb.c mdb.h \
@@ -50,6 +51,7 @@ lib_mal = {
sample.c sample.h \
json_util.c json_util.h \
calc.c batcalc.c
+ LIBS = $(WELD_LIBS)
}
headers_mal = {
@@ -61,7 +63,7 @@ headers_mal = {
iterator.mal clients.mal \
factories.mal groupby.mal mdb.mal pcre.mal mat.mal \
transaction.mal oltp.mal wlc.mal \
- mal_mapi.mal sabaoth.mal remote.mal \
+ mal_mapi.mal mal_weld.mal sabaoth.mal remote.mal \
txtsim.mal \
tokenizer.mal sample.mal json_util.mal \
batmtime.mal querylog.mal sysmon.mal
@@ -70,7 +72,7 @@ headers_mal = {
headers_auto = {
HEADERS = mal
DIR = libdir/monetdb5/autoload
- SOURCES = 01_calc.mal 01_batcalc.mal
+ SOURCES = 01_calc.mal 01_batcalc.mal mal_weld.mal
}
headers_hge = {
diff --git a/monetdb5/modules/mal/mal_weld.c b/monetdb5/modules/mal/mal_weld.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/mal_weld.c
@@ -0,0 +1,288 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V.
+ */
+
+#include "monetdb_config.h"
+#include "gdk.h"
+#include "mal_exception.h"
+#include "mal_interpreter.h"
+#include "mal_instruction.h"
+#include "mal_weld.h"
+#include "weld.h"
+
+#define STR_BUF_SIZE 4096
+#define WELD_DEBUG 1
+
+#define getOrSetStructMemberImpl(ADDR, TYPE, VALUE, OP) \
+ if ((long)*ADDR % sizeof(TYPE) != 0) \
+ *ADDR += sizeof(TYPE) - (long)*ADDR % sizeof(TYPE); /* aling */
\
+ if (OP == OP_GET) \
+ *(TYPE *)VALUE = *(TYPE *)(*ADDR); /* get */
\
+ else \
+ *(TYPE *)(*ADDR) = *(TYPE *)VALUE; /* set */
\
+ *ADDR += sizeof(TYPE); /* increase */
+
+
+void getOrSetStructMember(char **addr, int type, const void *value, int op) {
+ if (type == TYPE_bte) {
+ getOrSetStructMemberImpl(addr, char, value, op);
+ } else if (type == TYPE_int) {
+ getOrSetStructMemberImpl(addr, int, value, op);
+ } else if (type == TYPE_lng) {
+ getOrSetStructMemberImpl(addr, long, value, op);
+ } else if (type == TYPE_flt) {
+ getOrSetStructMemberImpl(addr, float, value, op);
+ } else if (type == TYPE_dbl) {
+ getOrSetStructMemberImpl(addr, double, value, op);
+ } else if (type == TYPE_str) {
+ getOrSetStructMemberImpl(addr, char*, value, op);
+ } else if (type == TYPE_ptr) {
+ /* TODO - will assume that all pointers have the same size */
+ getOrSetStructMemberImpl(addr, char*, value, op);
+ } else if (ATOMstorage(type) != type) {
+ return getOrSetStructMember(addr, ATOMstorage(type), value, op);
+ }
+}
+
+str getWeldType(int type) {
+ if (type == TYPE_bte)
+ return "i8";
+ else if (type == TYPE_int)
+ return "i32";
+ else if (type == TYPE_lng)
+ return "i64";
+ else if (type == TYPE_flt)
+ return "f32";
+ else if (type == TYPE_dbl)
+ return "f64";
+ else if (type == TYPE_str)
+ return "vec[i8]";
+ else if (ATOMstorage(type) != type)
+ return getWeldType(ATOMstorage(type));
+ else
+ return NULL;
+}
+
+static str getWeldUTypeFromWidth(int width) {
+ if (width == 1)
+ return "u8";
+ else if (width == 2)
+ return "u16";
+ else if (width == 4)
+ return "u32";
+ else
+ return "u64";
+}
+
+static int getMalTypeFromWidth(int width) {
+ if (width == 1)
+ return TYPE_bte;
+ else if (width == 2)
+ return TYPE_sht;
+ else if (width == 4)
+ return TYPE_int;
+ else
+ return TYPE_lng;
+}
+
+void dumpWeldProgram(str program, FILE *f) {
+ int i, j, tabs = 0, print_tabs = 0, print_before = 1;
+ for (i = 0; i < (int)strlen(program); i++) {
+ char curr = program[i];
+ char prev = i > 0 ? program[i - 1] : '\0';
+ if (curr == '(' || (curr == '|' && prev != ' ')) {
+ ++tabs;
+ print_tabs = 1;
+ } else if (curr == ';') {
+ print_tabs = 1;
+ } else if (curr == ')') {
+ --tabs;
+ print_before = 0;
+ print_tabs = 1;
+
+ }
+ if (print_before)
+ fputc(curr, f);
+ if (print_tabs) {
+ fputc('\n', f);
+ for (j = 0; j < tabs; j++) {
+ fputc('\t', f);
+ }
+ }
+ if (!print_before)
+ fputc(curr, f);
+ print_tabs = 0;
+ print_before = 1;
+ }
+}
+
+
+static void dumpProgram(MalBlkPtr mb, str program) {
+ FILE *f = fopen(tmpnam(NULL), "w");
+ int i;
+ for (i = 0; i < mb->stop; i++) {
+ fprintInstruction(f, mb, NULL, mb->stmt[i], LIST_MAL_ALL);
+ }
+ fprintf(f, "\n\n\n\n");
+ dumpWeldProgram(program, f);
+ fclose(f);
+}
+
+static long getTimeNowMs(void) {
+ struct timeval timecheck;
+ gettimeofday(&timecheck, NULL);
+ return (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000;
+}
+
+str
+WeldRun(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ (void)cntxt;
+
+ str programBody = *getArgReference_str(stk, pci, pci->retc);
+ str program = malloc(strlen(programBody) + 2 * STR_BUF_SIZE);
+ int *names = (int*)*getArgReference_ptr(stk, pci, pci->retc + 1);
+ int i, j, headerLen = 0;
+
+ /* Build the input stmt, e.g.: |in13:i32, in50:vec[i8]| */
+ for (i = pci->retc + 2; i < pci->argc; i++) { /* skip wstate and names
*/
+ int type = getArgType(mb, pci, i);
+ int namesIdx = i - (pci->retc + 2);
+ if (isaBatType(type) && getBatType(type) != TYPE_str) {
+ headerLen += sprintf(program + headerLen, "
in%d:vec[%s],", names[namesIdx],
+
getWeldType(getBatType(type)));
+ } else if (isaBatType(type) && getBatType(type) == TYPE_str) {
+ bat bid = *getArgReference_bat(stk, pci, i);
+ BAT *b = BATdescriptor(bid);
+ if (b == NULL) throw(MAL, "weld.run", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING": %d", getArg(pci, i));
+ headerLen += sprintf(
+ program + headerLen, " in%d:vec[%s],
in%dstr:vec[i8], in%dstroffset:i64,",
+ names[namesIdx],
getWeldUTypeFromWidth(b->twidth), names[namesIdx], names[namesIdx]);
+ } else {
+ headerLen +=
+ sprintf(program + headerLen, " in%d:%s,",
names[namesIdx], getWeldType(type));
+ }
+ }
+ program[0] = '|';
+ program[headerLen - 1] = '|';
+ program = strcat(program, programBody);
+
+ weld_error_t e = weld_error_new();
+ weld_conf_t conf = weld_conf_new();
+ (void)dumpProgram; /* supress the unused warning */
+#ifdef WELD_DEBUG
+ dumpProgram(mb, program);
+ weld_conf_set(conf, "weld.compile.dumpCode", "true");
+ weld_conf_set(conf, "weld.compile.dumpCodeDir", "/tmp");
+#endif
+ char nrThreads[8], memLimit[64];
+ sprintf(nrThreads, "%d", GDKnr_threads);
+ sprintf(memLimit, "%ld", 256L * 1L << 30); /* 256 GB */
+ weld_conf_set(conf, "weld.threads", nrThreads);
+ weld_conf_set(conf, "weld.memory.limit", memLimit);
+ long start = getTimeNowMs();
+ weld_module_t m = weld_module_compile(program, conf, e);
+ if (weld_error_code(e)) {
+ throw(MAL, "weld.run", PROGRAM_GENERAL ": %s",
weld_error_message(e));
+ }
+ long elapsed = getTimeNowMs() - start;
+ fprintf(stderr, "%ld,", elapsed);
+
+ /* Prepare the input for Weld. We're building an array that has the
layout of a struct */
+ /* Max possible size is when we only have string bats: 2 ptrs for theap
and tvheap and 4 lngs
+ * for batCount, hseqbase, stroffset and tvheap->size. */
+ char *inputStruct = malloc((pci->argc - pci->retc) * (2 * sizeof(void
*) + 3 * sizeof(lng)));
+ char *inputPtr = inputStruct;
+ for (i = pci->retc + 2; i < pci->argc; i++) { /* skip wstate and names
*/
+ int type = getArgType(mb, pci, i);
+ if (isaBatType(type)) {
+ bat bid = *getArgReference_bat(stk, pci, i);
+ BAT *b = BATdescriptor(bid);
+ if (b == NULL) throw(MAL, "weld.run", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING": %d", getArg(pci, i));
+ if (BATtdense(b)) {
+ fprintf(stderr, "bat is dense: %d\n",
getArg(pci, i));
+ }
+ getOrSetStructMember(&inputPtr, TYPE_ptr,
&b->theap.base, OP_SET);
+ getOrSetStructMember(&inputPtr, TYPE_lng, &b->batCount,
OP_SET);
+ if (getBatType(type) == TYPE_str) {
+ getOrSetStructMember(&inputPtr, TYPE_str,
&b->tvheap->base, OP_SET);
+ getOrSetStructMember(&inputPtr, TYPE_lng,
&b->tvheap->size, OP_SET);
+ lng offset = b->twidth <= 2 ? GDK_VAROFFSET : 0;
+ getOrSetStructMember(&inputPtr, TYPE_lng,
&offset, OP_SET);
+ }
+ } else {
+ getOrSetStructMember(&inputPtr, type,
getArgReference(stk, pci, i), OP_SET);
+ if (type == TYPE_str) {
+ long len = strlen(*getArgReference_str(stk,
pci, i));
+ getOrSetStructMember(&inputPtr, TYPE_lng, &len,
OP_SET);
+ }
+ }
+ }
+
+ /* Run the weld program */
+ weld_value_t arg = weld_value_new(inputStruct);
+ weld_value_t result = weld_module_run(m, conf, arg, e);
+
+ /* Retrieve the output */
+ char *outputStruct = weld_value_data(result);
+ for (i = 0; i < pci->retc; i++) {
+ int type = getArgType(mb, pci, i);
+ if (isaBatType(type)) {
+ BAT *b = NULL;
+ char *base = NULL;
+ long size = 0;
+ getOrSetStructMember(&outputStruct, TYPE_ptr, &base,
OP_GET);
+ getOrSetStructMember(&outputStruct, TYPE_lng, &size,
OP_GET);
+ if (getBatType(type) == TYPE_str) {
+ char *strbase = NULL;
+ char *strsize = 0;
+ getOrSetStructMember(&outputStruct, TYPE_str,
&strbase, OP_GET);
+ getOrSetStructMember(&outputStruct, TYPE_lng,
&strsize, OP_GET);
+ /* Find the matching vheap from the input bats
*/
+ for (j = pci->retc; j < pci->argc; j++) {
+ int inputType = getArgType(mb, pci, j);
+ if (isaBatType(inputType) &&
getBatType(inputType) == TYPE_str) {
+ bat inid =
*getArgReference_bat(stk, pci, j);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list