Changeset: 735c252d2dff for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=735c252d2dff
Modified Files:
monetdb5/modules/mal/mal_weld.c
monetdb5/modules/mal/mal_weld.h
monetdb5/modules/mal/mal_weld.mal
monetdb5/modules/mal/mal_weld.mal.sh
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
monetdb5/optimizer/opt_weld.c
Branch: mal-weld
Log Message:
weld impl for group.group and group.groupdone
diffs (250 lines):
diff --git a/monetdb5/modules/mal/mal_weld.c b/monetdb5/modules/mal/mal_weld.c
--- a/monetdb5/modules/mal/mal_weld.c
+++ b/monetdb5/modules/mal/mal_weld.c
@@ -117,12 +117,14 @@ static void dumpWeldProgram(weldState *w
}
str
-WeldInitState(ptr *retval)
+WeldInitState(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
+ (void)cntxt;
weldState *wstate = malloc(sizeof(weldState));
wstate->programMaxLen = 1;
wstate->program = calloc(wstate->programMaxLen, sizeof(char));
- *retval = wstate;
+ wstate->groupDeps = calloc(mb->vtop, sizeof(InstrPtr));
+ *getArgReference_ptr(stk, pci, 0) = wstate;;
return MAL_SUCCEED;
}
@@ -178,6 +180,7 @@ WeldRun(Client cntxt, MalBlkPtr mb, MalS
weld_module_t m = weld_module_compile(wstate->program, conf, e);
weld_conf_free(conf);
free(wstate->program);
+ free(wstate->groupDeps);
free(wstate);
if (weld_error_code(e)) {
throw(MAL, "weld.run", PROGRAM_GENERAL ": %s",
weld_error_message(e));
@@ -531,6 +534,90 @@ WeldBatcalcMULsignal(Client cntxt, MalBl
return WeldBatcalcBinary(mb, stk, pci, "*", "weld.batcalcmul");
}
+/* Ignore the existing groups and instead use all the columns up to this point
to
+ * generate the new group ids. Weld will remove the unnecessary computations.
e.g.:
+ * g1, e1, h1 = group.group(col1) -> for(zip(col1), dictmerger[ty1, i64,
min]...
+ * g2, e2, h2 = group.grou(col2, g1) -> for(zip(col2, col1), dictmerger[{ty1,
ty2}, i64, min]...
+ */
+str
+WeldGroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ (void)cntxt;
+ int groups = getArg(pci, 0); /* bat[:oid] */
+ int extents = getArg(pci, 1); /* bat[:oid] */
+ int histo = getArg(pci, 2); /* bat[:lng] */
+ weldState *wstate;
+ if (pci->argc == 6) {
+ wstate = *getArgReference_ptr(stk, pci, 5); /* has value */
+ } else {
+ wstate = *getArgReference_ptr(stk, pci, 4); /* has value */
+ }
+
+ /* Build zip(col1, col2, ...) */
+ wstate->groupDeps[groups] = pci;
+ InstrPtr dep = pci;
+ char zipStmt[STR_SIZE_INC] = {'\0'};
+ char dictTypeStmt[STR_SIZE_INC] = {'\0'};
+ int count = 0;
+ while (dep != NULL) {
+ ++count;
+ int col = getArg(dep, 3);
+ int colType = getBatType(getArgType(mb, dep, 3));
+ sprintf(zipStmt + strlen(zipStmt), "v%d,", col);
+ sprintf(dictTypeStmt + strlen(dictTypeStmt), " %s,",
getWeldType(colType));
+ if (dep->argc == 6) {
+ int oldGrps = getArg(dep, 4);
+ dep = wstate->groupDeps[oldGrps];
+ } else {
+ dep = NULL;
+ }
+ }
+ /* Replace the last comma */
+ zipStmt[strlen(zipStmt) - 1] = '\0';
+ if (count == 1) {
+ dictTypeStmt[strlen(dictTypeStmt) - 1] = '\0';
+ } else {
+ dictTypeStmt[0] = '{';
+ dictTypeStmt[strlen(dictTypeStmt) - 1] = '}';
+ }
+
+ char weldStmt[STR_SIZE_INC * 2];
+ sprintf(weldStmt, "\
+ let groupHash = result( \
+ for(zip(%s), dictmerger[%s, i64, min], |b, i, n| \
+ merge(b, {n, i}) \
+ ) \
+ ); \
+ let groupHashVec = tovec(groupHash); \
+ let groupIdsDict = result( \
+ for(groupHashVec, dictmerger[%s, i64, min], |b, i, n| \
+ merge(b, {n.$0, i}) \
+ ) \
+ ); \
+ let empty = result( \
+ for(rangeiter(0L, len(groupHashVec), 1L), appender[i64], |b, i,
n| \
+ merge(b, 0L) \
+ ) \
+ ); \
+ let idsAndCounts = for(zip(%s), {appender[i64], vecmerger[i64,
+](empty)}, |b, i, n| \
+ let groupId = lookup(groupIdsDict, n); \
+ {merge(b.$0, groupId), merge(b.$1, {groupId, 1L})} \
+ ); \
+ let v%d = result(idsAndCounts.$0); \
+ let v%dhseqbase = 0; \
+ let v%d = result(idsAndCounts.$1); \
+ let v%dhseqbase = 0; \
+ let v%d = result( \
+ for(groupHashVec, vecmerger[i64, +](empty), |b, i, n| \
+ merge(b, {i, lookup(groupHash, n.$0)}) \
+ ) \
+ ); \
+ let v%dhseqbase = 0;",
+ zipStmt, dictTypeStmt, dictTypeStmt, zipStmt, groups, groups, histo,
histo, extents, extents);
+ appendWeldStmt(wstate, weldStmt);
+ return MAL_SUCCEED;
+}
+
str
WeldLanguagePass(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
diff --git a/monetdb5/modules/mal/mal_weld.h b/monetdb5/modules/mal/mal_weld.h
--- a/monetdb5/modules/mal/mal_weld.h
+++ b/monetdb5/modules/mal/mal_weld.h
@@ -13,10 +13,11 @@
typedef struct {
char *program;
+ InstrPtr *groupDeps;
size_t programMaxLen;
} weldState;
-mal_export str WeldInitState(ptr *retval);
+mal_export str WeldInitState(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
mal_export str WeldRun(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
mal_export str WeldAggrSum(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
mal_export str WeldAlgebraProjection(Client cntxt, MalBlkPtr mb, MalStkPtr
stk, InstrPtr pci);
@@ -27,6 +28,7 @@ mal_export str WeldAlgebraThetaselect2(C
mal_export str WeldBatcalcADDsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
mal_export str WeldBatcalcSUBsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
mal_export str WeldBatcalcMULsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
+mal_export str WeldGroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
mal_export str WeldLanguagePass(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
#endif
diff --git a/monetdb5/modules/mal/mal_weld.mal
b/monetdb5/modules/mal/mal_weld.mal
--- a/monetdb5/modules/mal/mal_weld.mal
+++ b/monetdb5/modules/mal/mal_weld.mal
@@ -9,7 +9,7 @@
module weld;
-command initstate():ptr
+pattern initstate():ptr
address WeldInitState
comment "Initialize the state structure that is used to build a weld program";
@@ -37,6 +37,14 @@ pattern algebrathetaselect(b:bat[:any_1]
address WeldAlgebraThetaselect2
comment "algebra.thetaselect";
+pattern groupgroup(b:bat[:any_1], wstate:ptr) (groups:bat[:oid],
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.group"
+
+pattern groupgroup(b:bat[:any_1], g:bat[:oid], wstate:ptr) (groups:bat[:oid],
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.groupdone"
+
pattern aggrsum(b:bat[:bte], wstate:ptr):bte
address WeldAggrSum
comment "aggr.sum";
diff --git a/monetdb5/modules/mal/mal_weld.mal.sh
b/monetdb5/modules/mal/mal_weld.mal.sh
--- a/monetdb5/modules/mal/mal_weld.mal.sh
+++ b/monetdb5/modules/mal/mal_weld.mal.sh
@@ -19,7 +19,7 @@ alltypes=(bit ${numeric[@]} oid str)
cat <<EOF
-command initstate():ptr
+pattern initstate():ptr
address WeldInitState
comment "Initialize the state structure that is used to build a weld program";
@@ -47,6 +47,14 @@ pattern algebrathetaselect(b:bat[:any_1]
address WeldAlgebraThetaselect2
comment "algebra.thetaselect";
+pattern groupgroup(b:bat[:any_1], wstate:ptr) (groups:bat[:oid],
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.group"
+
+pattern groupgroup(b:bat[:any_1], g:bat[:oid], wstate:ptr) (groups:bat[:oid],
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.groupdone"
+
EOF
for tp in ${numeric[@]}; do
diff --git a/monetdb5/optimizer/opt_prelude.c b/monetdb5/optimizer/opt_prelude.c
--- a/monetdb5/optimizer/opt_prelude.c
+++ b/monetdb5/optimizer/opt_prelude.c
@@ -307,6 +307,7 @@ str weldBatcalcAddRef;
str weldBatcalcSubRef;
str weldBatcalcMulRef;
str weldGetResultRef;
+str weldGroupRef;
str weldInitStateRef;
str weldLanguagePassRef;
str weldRef;
@@ -604,6 +605,7 @@ void optimizerInit(void)
weldBatcalcSubRef = putName("batcalcsub");
weldBatcalcMulRef = putName("batcalcmul");
weldGetResultRef = putName("getresult");
+ weldGroupRef = putName("groupgroup");
weldInitStateRef = putName("initstate");
weldLanguagePassRef = putName("languagepass");
weldRef = putName("weld");
diff --git a/monetdb5/optimizer/opt_prelude.h b/monetdb5/optimizer/opt_prelude.h
--- a/monetdb5/optimizer/opt_prelude.h
+++ b/monetdb5/optimizer/opt_prelude.h
@@ -315,6 +315,7 @@ mal_export str weldBatcalcAddRef;
mal_export str weldBatcalcSubRef;
mal_export str weldBatcalcMulRef;
mal_export str weldGetResultRef;
+mal_export str weldGroupRef;
mal_export str weldInitStateRef;
mal_export str weldLanguagePassRef;
mal_export str weldRef;
diff --git a/monetdb5/optimizer/opt_weld.c b/monetdb5/optimizer/opt_weld.c
--- a/monetdb5/optimizer/opt_weld.c
+++ b/monetdb5/optimizer/opt_weld.c
@@ -18,7 +18,7 @@
#include "mal_instruction.h"
#include "opt_weld.h"
-#define NUM_WELD_INSTR 8
+#define NUM_WELD_INSTR 12
#define UNMARKED 0
#define TEMP_MARK 1
#define PERM_MARK 2
@@ -52,6 +52,10 @@ static void initWeldInstrs(void) {
addWeldInstr(batcalcRef, minusRef, weldBatcalcSubRef);
/* batcalc.- */
addWeldInstr(batcalcRef, mulRef, weldBatcalcMulRef);
/* batcalc.* */
addWeldInstr(languageRef, passRef, weldLanguagePassRef);
/* language.pass */
+ addWeldInstr(groupRef, groupRef, weldGroupRef);
/* group.group*/
+ addWeldInstr(groupRef, subgroupRef, weldGroupRef);
/* group.subgroup */
+ addWeldInstr(groupRef, groupdoneRef, weldGroupRef);
/* group.groupdone */
+ addWeldInstr(groupRef, subgroupdoneRef, weldGroupRef);
/* group.subgroupdone */
}
static str getWeldRef(InstrPtr instr) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list