Hi,
On 2019-09-25 22:11:51 -0700, Soumyadeep Chakraborty wrote:
> Thank you very much for reviewing my patch!
>
> On Wed, Sep 25, 2019 at 1:02 PM Andres Freund <[email protected]> wrote:
> > IOW, wherever ExecComputeSlotInfo() is called, we should only actually
> > push the expression step, when ExecComputeSlotInfo does not determine
> > that a) the slot is virtual, b) and fixed (i.e. guaranteed to always be
> > the same type of slot).
> >
> > Does that make sense?
>
> That is a great suggestion and I totally agree. I have attached a patch
> that achieves this.
I think as done, this has the slight disadvantage of removing the
fast-path for small interpreted expressions, because that explicitly
matches for seeing the EEOP_*_FETCHSOME ops. Look at execExprInterp.c,
around:
/*
* Select fast-path evalfuncs for very simple expressions. "Starting
up"
* the full interpreter is a measurable overhead for these, and these
* patterns occur often enough to be worth optimizing.
*/
if (state->steps_len == 3)
{
So I think we'd have to add a separate fastpath for virtual slots for
that.
What do you think about the attached?
Greetings,
Andres Freund
>From 26b963f2989cdeec963f30c5d9b6c0e9b942741f Mon Sep 17 00:00:00 2001
From: Andres Freund <[email protected]>
Date: Thu, 26 Sep 2019 11:14:46 -0700
Subject: [PATCH v1 1/2] Reduce code duplication for ExecJust*Var operations.
This is mainly in preparation for introducing a few additional
fastpath implementations.
Also reorder ExecJust*Var functions to be consistent with the order in
which they're used.
Author: Andres Freund
Discussion: https://postgr.es/m/cae-ml+9oksn71+mhtfmd-l24odp8dgtfavjdu6u+j+fnaw5...@mail.gmail.com
---
src/backend/executor/execExprInterp.c | 94 ++++++++++-----------------
1 file changed, 35 insertions(+), 59 deletions(-)
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 293bfb61c36..e876160a0e7 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -155,11 +155,11 @@ static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
static Datum ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull);
-static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull);
/*
@@ -1966,13 +1966,12 @@ ShutdownTupleDescRef(Datum arg)
* Fast-path functions, for very simple expressions
*/
-/* Simple reference to inner Var */
-static Datum
-ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull)
+/* implementation of ExecJust(Inner|Outer|Scan)Var */
+static pg_attribute_always_inline Datum
+ExecJustVarImpl(ExprState *state, TupleTableSlot *slot, bool *isnull)
{
ExprEvalStep *op = &state->steps[1];
int attnum = op->d.var.attnum + 1;
- TupleTableSlot *slot = econtext->ecxt_innertuple;
CheckOpSlotCompatibility(&state->steps[0], slot);
@@ -1984,52 +1983,34 @@ ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull)
return slot_getattr(slot, attnum, isnull);
}
-/* Simple reference to outer Var */
+/* Simple reference to inner Var */
static Datum
-ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull)
+ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull)
{
- ExprEvalStep *op = &state->steps[1];
- int attnum = op->d.var.attnum + 1;
- TupleTableSlot *slot = econtext->ecxt_outertuple;
-
- CheckOpSlotCompatibility(&state->steps[0], slot);
-
- /* See comments in ExecJustInnerVar */
- return slot_getattr(slot, attnum, isnull);
+ return ExecJustVarImpl(state, econtext->ecxt_innertuple, isnull);
}
-/* Simple reference to scan Var */
+/* Simple reference to outer Var */
static Datum
-ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull)
+ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull)
{
- ExprEvalStep *op = &state->steps[1];
- int attnum = op->d.var.attnum + 1;
- TupleTableSlot *slot = econtext->ecxt_scantuple;
-
- CheckOpSlotCompatibility(&state->steps[0], slot);
-
- /* See comments in ExecJustInnerVar */
- return slot_getattr(slot, attnum, isnull);
+ return ExecJustVarImpl(state, econtext->ecxt_outertuple, isnull);
}
-/* Simple Const expression */
+/* Simple reference to scan Var */
static Datum
-ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull)
+ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull)
{
- ExprEvalStep *op = &state->steps[0];
-
- *isnull = op->d.constval.isnull;
- return op->d.constval.value;
+ return ExecJustVarImpl(state, econtext->ecxt_scantuple, isnull);
}
-/* Evaluate inner Var and assign to appropriate column of result tuple */
-static Datum
-ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull)
+/* implementation of ExecJustAssign(Inner|Outer|Scan)Var */
+static pg_attribute_always_inline Datum
+ExecJustAssignVarImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull)
{
ExprEvalStep *op = &state->steps[1];
int attnum = op->d.assign_var.attnum + 1;
int resultnum = op->d.assign_var.resultnum;
- TupleTableSlot *inslot = econtext->ecxt_innertuple;
TupleTableSlot *outslot = state->resultslot;
CheckOpSlotCompatibility(&state->steps[0], inslot);
@@ -2047,40 +2028,25 @@ ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull)
return 0;
}
+/* Evaluate inner Var and assign to appropriate column of result tuple */
+static Datum
+ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustAssignVarImpl(state, econtext->ecxt_innertuple, isnull);
+}
+
/* Evaluate outer Var and assign to appropriate column of result tuple */
static Datum
ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, bool *isnull)
{
- ExprEvalStep *op = &state->steps[1];
- int attnum = op->d.assign_var.attnum + 1;
- int resultnum = op->d.assign_var.resultnum;
- TupleTableSlot *inslot = econtext->ecxt_outertuple;
- TupleTableSlot *outslot = state->resultslot;
-
- CheckOpSlotCompatibility(&state->steps[0], inslot);
-
- /* See comments in ExecJustAssignInnerVar */
- outslot->tts_values[resultnum] =
- slot_getattr(inslot, attnum, &outslot->tts_isnull[resultnum]);
- return 0;
+ return ExecJustAssignVarImpl(state, econtext->ecxt_outertuple, isnull);
}
/* Evaluate scan Var and assign to appropriate column of result tuple */
static Datum
ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull)
{
- ExprEvalStep *op = &state->steps[1];
- int attnum = op->d.assign_var.attnum + 1;
- int resultnum = op->d.assign_var.resultnum;
- TupleTableSlot *inslot = econtext->ecxt_scantuple;
- TupleTableSlot *outslot = state->resultslot;
-
- CheckOpSlotCompatibility(&state->steps[0], inslot);
-
- /* See comments in ExecJustAssignInnerVar */
- outslot->tts_values[resultnum] =
- slot_getattr(inslot, attnum, &outslot->tts_isnull[resultnum]);
- return 0;
+ return ExecJustAssignVarImpl(state, econtext->ecxt_scantuple, isnull);
}
/* Evaluate CASE_TESTVAL and apply a strict function to it */
@@ -2120,6 +2086,16 @@ ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull)
return d;
}
+/* Simple Const expression */
+static Datum
+ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ ExprEvalStep *op = &state->steps[0];
+
+ *isnull = op->d.constval.isnull;
+ return op->d.constval.value;
+}
+
#if defined(EEO_USE_COMPUTED_GOTO)
/*
* Comparator used when building address->opcode lookup table for
--
2.23.0.162.gf1d4a28250
>From 09b8c12569671525de5741e77710a905b1bbd1a0 Mon Sep 17 00:00:00 2001
From: Andres Freund <[email protected]>
Date: Thu, 26 Sep 2019 11:23:43 -0700
Subject: [PATCH v1 2/2] Don't generate EEOP_*_FETCHSOME operations for slots
know to be virtual.
That avoids unnecessary work during both interpreted and JIT compiled
expression evaluation.
Author: Soumyadeep Chakraborty, Andres Freund
Discussion: https://postgr.es/m/cae-ml+9oksn71+mhtfmd-l24odp8dgtfavjdu6u+j+fnaw5...@mail.gmail.com
---
src/backend/executor/execExpr.c | 43 ++++++---
src/backend/executor/execExprInterp.c | 133 +++++++++++++++++++++++++-
src/backend/jit/llvm/llvmjit_expr.c | 6 +-
3 files changed, 160 insertions(+), 22 deletions(-)
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 39442f8866f..1ff13d1079a 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -65,7 +65,7 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args,
static void ExecInitExprSlots(ExprState *state, Node *node);
static void ExecPushExprSlots(ExprState *state, LastAttnumInfo *info);
static bool get_last_attnums_walker(Node *node, LastAttnumInfo *info);
-static void ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
+static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
static void ExecInitWholeRowVar(ExprEvalStep *scratch, Var *variable,
ExprState *state);
static void ExecInitSubscriptingRef(ExprEvalStep *scratch,
@@ -2285,8 +2285,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
scratch.d.fetch.fixed = false;
scratch.d.fetch.kind = NULL;
scratch.d.fetch.known_desc = NULL;
- ExecComputeSlotInfo(state, &scratch);
- ExprEvalPushStep(state, &scratch);
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
}
if (info->last_outer > 0)
{
@@ -2295,8 +2295,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
scratch.d.fetch.fixed = false;
scratch.d.fetch.kind = NULL;
scratch.d.fetch.known_desc = NULL;
- ExecComputeSlotInfo(state, &scratch);
- ExprEvalPushStep(state, &scratch);
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
}
if (info->last_scan > 0)
{
@@ -2305,8 +2305,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
scratch.d.fetch.fixed = false;
scratch.d.fetch.kind = NULL;
scratch.d.fetch.known_desc = NULL;
- ExecComputeSlotInfo(state, &scratch);
- ExprEvalPushStep(state, &scratch);
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
}
}
@@ -2364,14 +2364,21 @@ get_last_attnums_walker(Node *node, LastAttnumInfo *info)
* The goal is to determine whether a slot is 'fixed', that is, every
* evaluation of the expression will have the same type of slot, with an
* equivalent descriptor.
+ *
+ * Returns true if the the deforming step is required, false otherwise.
*/
-static void
+static bool
ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
{
PlanState *parent = state->parent;
TupleDesc desc = NULL;
const TupleTableSlotOps *tts_ops = NULL;
bool isfixed = false;
+ ExprEvalOp opcode = op->opcode;
+
+ Assert(opcode == EEOP_INNER_FETCHSOME ||
+ opcode == EEOP_OUTER_FETCHSOME ||
+ opcode == EEOP_SCAN_FETCHSOME);
if (op->d.fetch.known_desc != NULL)
{
@@ -2383,7 +2390,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
{
isfixed = false;
}
- else if (op->opcode == EEOP_INNER_FETCHSOME)
+ else if (opcode == EEOP_INNER_FETCHSOME)
{
PlanState *is = innerPlanState(parent);
@@ -2402,7 +2409,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
desc = ExecGetResultType(is);
}
}
- else if (op->opcode == EEOP_OUTER_FETCHSOME)
+ else if (opcode == EEOP_OUTER_FETCHSOME)
{
PlanState *os = outerPlanState(parent);
@@ -2421,7 +2428,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
desc = ExecGetResultType(os);
}
}
- else if (op->opcode == EEOP_SCAN_FETCHSOME)
+ else if (opcode == EEOP_SCAN_FETCHSOME)
{
desc = parent->scandesc;
@@ -2444,6 +2451,12 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
op->d.fetch.kind = NULL;
op->d.fetch.known_desc = NULL;
}
+
+ /* if the slot is known to always virtual we never need to deform */
+ if (op->d.fetch.fixed && op->d.fetch.kind == &TTSOpsVirtual)
+ return false;
+
+ return true;
}
/*
@@ -3358,16 +3371,16 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
scratch.d.fetch.fixed = false;
scratch.d.fetch.known_desc = ldesc;
scratch.d.fetch.kind = lops;
- ExecComputeSlotInfo(state, &scratch);
- ExprEvalPushStep(state, &scratch);
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
scratch.opcode = EEOP_OUTER_FETCHSOME;
scratch.d.fetch.last_var = maxatt;
scratch.d.fetch.fixed = false;
scratch.d.fetch.known_desc = rdesc;
scratch.d.fetch.kind = rops;
- ExecComputeSlotInfo(state, &scratch);
- ExprEvalPushStep(state, &scratch);
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
/*
* Start comparing at the last field (least significant sort key). That's
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index e876160a0e7..ccea030cd70 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -160,6 +160,12 @@ static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, boo
static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull);
static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
/*
@@ -255,11 +261,45 @@ ExecReadyInterpretedExpr(ExprState *state)
return;
}
}
- else if (state->steps_len == 2 &&
- state->steps[0].opcode == EEOP_CONST)
+ else if (state->steps_len == 2)
{
- state->evalfunc_private = (void *) ExecJustConst;
- return;
+ ExprEvalOp step0 = state->steps[0].opcode;
+
+ if (step0 == EEOP_CONST)
+ {
+ state->evalfunc_private = (void *) ExecJustConst;
+ return;
+ }
+ else if (step0 == EEOP_INNER_VAR)
+ {
+ state->evalfunc_private = (void *) ExecJustInnerVarVirt;
+ return;
+ }
+ else if (step0 == EEOP_OUTER_VAR)
+ {
+ state->evalfunc_private = (void *) ExecJustOuterVarVirt;
+ return;
+ }
+ else if (step0 == EEOP_SCAN_VAR)
+ {
+ state->evalfunc_private = (void *) ExecJustScanVarVirt;
+ return;
+ }
+ else if (step0 == EEOP_ASSIGN_INNER_VAR)
+ {
+ state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt;
+ return;
+ }
+ else if (step0 == EEOP_ASSIGN_OUTER_VAR)
+ {
+ state->evalfunc_private = (void *) ExecJustAssignOuterVarVirt;
+ return;
+ }
+ else if (step0 == EEOP_ASSIGN_SCAN_VAR)
+ {
+ state->evalfunc_private = (void *) ExecJustAssignScanVarVirt;
+ return;
+ }
}
#if defined(EEO_USE_COMPUTED_GOTO)
@@ -2096,6 +2136,91 @@ ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull)
return op->d.constval.value;
}
+/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */
+static pg_attribute_always_inline Datum
+ExecJustVarVirtImpl(ExprState *state, TupleTableSlot *slot, bool *isnull)
+{
+ ExprEvalStep *op = &state->steps[0];
+ int attnum = op->d.var.attnum;
+
+ /*
+ * As it is guaranteed that a virtual slot is used, there never is a need
+ * to perform tuple deforming (nor would it be possible). Therefore
+ * execExpr.c has not emitted a EEOP_*_FETCHSOME step. Verify, as much as
+ * possible, that that determination was accurate.
+ */
+ Assert(slot->tts_ops == &TTSOpsVirtual);
+ Assert(TTS_FIXED(slot));
+ Assert(attnum >= 0 && attnum < slot->tts_nvalid);
+
+ *isnull = slot->tts_isnull[attnum];
+
+ return slot->tts_values[attnum];
+}
+
+/* Like ExecJustInnerVar, optimized for virtual slots */
+static Datum
+ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustVarVirtImpl(state, econtext->ecxt_innertuple, isnull);
+}
+
+/* Like ExecJustOuterVar, optimized for virtual slots */
+static Datum
+ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustVarVirtImpl(state, econtext->ecxt_outertuple, isnull);
+}
+
+/* Like ExecJustScanVar, optimized for virtual slots */
+static Datum
+ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull);
+}
+
+/* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */
+static pg_attribute_always_inline Datum
+ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull)
+{
+ ExprEvalStep *op = &state->steps[0];
+ int attnum = op->d.assign_var.attnum;
+ int resultnum = op->d.assign_var.resultnum;
+ TupleTableSlot *outslot = state->resultslot;
+
+ /* see ExecJustVarVirtImpl for comments */
+
+ Assert(inslot->tts_ops == &TTSOpsVirtual);
+ Assert(TTS_FIXED(inslot));
+ Assert(attnum >= 0 && attnum < inslot->tts_nvalid);
+
+ outslot->tts_values[resultnum] = inslot->tts_values[attnum];
+ outslot->tts_isnull[resultnum] = inslot->tts_isnull[attnum];
+
+ return 0;
+}
+
+/* Like ExecJustAssignInnerVar, optimized for virtual slots */
+static Datum
+ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustAssignVarVirtImpl(state, econtext->ecxt_innertuple, isnull);
+}
+
+/* Like ExecJustAssignOuterVar, optimized for virtual slots */
+static Datum
+ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustAssignVarVirtImpl(state, econtext->ecxt_outertuple, isnull);
+}
+
+/* Like ExecJustAssignScanVar, optimized for virtual slots */
+static Datum
+ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+ return ExecJustAssignVarVirtImpl(state, econtext->ecxt_scantuple, isnull);
+}
+
#if defined(EEO_USE_COMPUTED_GOTO)
/*
* Comparator used when building address->opcode lookup table for
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 30133634c70..d09324637b9 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -287,6 +287,9 @@ llvm_compile_expr(ExprState *state)
if (op->d.fetch.fixed)
tts_ops = op->d.fetch.kind;
+ /* step should not have been generated */
+ Assert(tts_ops != &TTSOpsVirtual);
+
if (opcode == EEOP_INNER_FETCHSOME)
v_slot = v_innerslot;
else if (opcode == EEOP_OUTER_FETCHSOME)
@@ -297,9 +300,6 @@ llvm_compile_expr(ExprState *state)
/*
* Check if all required attributes are available, or
* whether deforming is required.
- *
- * TODO: skip nvalid check if slot is fixed and known to
- * be a virtual slot.
*/
v_nvalid =
l_load_struct_gep(b, v_slot,
--
2.23.0.162.gf1d4a28250