Hello, This is new version of identity projection patch.
Reverted projectionInfo and ExecBuildProjectionInfo. Identity
projection is recognized directly in ExecGroup, ExecResult, and
ExecWindowAgg. nodeAgg is reverted because I couldn't make it
sane..
The following is the result of performance test posted before in
order to show the source of the gain.
regards,
--
--
Kyotaro Horiguchi
NTT Open Source Software Center
At Fri, 05 Oct 2012 16:04:16 +0900, Kyotaro HORIGUCHI wrote in
<[email protected]>
> > Although I said as following, the gain seems a bit larger... I'll
> > recheck the testing conditions...
>
> I had inspected more precisely on two aspects maginifying the
> effect of this patch by putting 300 columns into table.
>
>
> First, explain analyze says the difference caused by this patch
> is only in the actual time of Result node.
>
> orig$ psql -c 'explain analyze select * from parenta'
> QUERY PLAN
> --------------------------------------------------------------------------
> Result (cost=0.00..176667.00 rows=1000001 width=1202)
> (actual time=0.013.. *2406.792* rows=1000000 loops=1)
> -> Append (cost=0.00..176667.00 rows=1000001 width=1202)
> (actual time=0.011..412.749 rows=1000000 loops=1)
> -> Seq Scan on parenta (cost=0.00..0.00 rows=1 width=1228)
> (actual time=0.001..0.001 rows=0 loops=1)
> -> Seq Scan on childa000 parenta
> (cost=0.00..176667.00 rows=1000000 width=1202)
> (actual time=0.009..334.633 rows=1000000 loops=1)
> Total runtime: 2446.474 ms
> (5 rows)
>
> patched$ psql -c 'explain analyze select * from parenta'
> QUERY PLAN
> --------------------------------------------------------------------------
> Result (cost=0.00..176667.00 rows=1000001 width=1202)
> (actual time=0.011.. *507.239* rows=1000000 loops=1)
> -> Append (cost=0.00..176667.00 rows=1000001 width=1202)
> (actual time=0.011..419.420 rows=1000000 loops=1)
> -> Seq Scan on parenta (cost=0.00..0.00 rows=1 width=1228)
> (actual time=0.000..0.000 rows=0 loops=1)
> -> Seq Scan on childa000 parenta
> (cost=0.00..176667.00 rows=1000000 width=1202)
> (actual time=0.010..335.721 rows=1000000 loops=1)
> Total runtime: 545.879 ms
> (5 rows)
>
>
> Second, the results of configure --enable-profiling shows that
> the exectime of ExecProject chages greately. This is consistent
> with what explain showed.
>
> orig:
> > % cumulative self self total
> > time seconds seconds calls s/call s/call name
> > 60.29 1.26 1.26 1000005 0.00 0.00 slot_deform_tuple
> !> 30.14 1.89 0.63 1000000 0.00 0.00 ExecProject
> > 3.35 1.96 0.07 3000004 0.00 0.00 ExecProcNode
> > 0.96 1.98 0.02 1000002 0.00 0.00 ExecScan
> > 0.96 2.00 0.02 166379 0.00 0.00 TerminateBufferIO
> > 0.48 2.01 0.01 3000004 0.00 0.00 InstrStartNode
> > 0.48 2.02 0.01 3000004 0.00 0.00 InstrStopNode
> !> 0.48 2.03 0.01 1000001 0.00 0.00 ExecResult
> > 0.48 2.04 0.01 830718 0.00 0.00 LWLockAcquire
> > 0.48 2.05 0.01 506834 0.00 0.00
> hash_search_with_hash_value
> > 0.48 2.06 0.01 341656 0.00 0.00 LockBuffer
> > 0.48 2.07 0.01 168383 0.00 0.00 ReadBuffer_common
> > 0.48 2.08 0.01 4 0.00 0.00 InstrEndLoop
> > 0.48 2.09 0.01
> ExecCleanTargetListLength
> > 0.00 2.09 0.00 2000005 0.00 0.00 MemoryContextReset
>
> patched:
> > % cumulative self self total
> > time seconds seconds calls ms/call ms/call name
> > 23.08 0.03 0.03 3000004 0.00 0.00 ExecProcNode
> > 15.38 0.05 0.02 1000002 0.00 0.00 heapgettup_pagemode
> > 15.38 0.07 0.02 830718 0.00 0.00 LWLockAcquire
> > 7.69 0.08 0.01 2000005 0.00 0.00 MemoryContextReset
> > 7.69 0.09 0.01 1000002 0.00 0.00 ExecScan
> > 7.69 0.10 0.01 1000000 0.00 0.00 ExecStoreTuple
> > 7.69 0.11 0.01 841135 0.00 0.00 LWLockRelease
> > 7.69 0.12 0.01 168383 0.00 0.00 ReadBuffer_common
> > 7.69 0.13 0.01 168383 0.00 0.00 UnpinBuffer
> > 0.00 0.13 0.00 3000004 0.00 0.00 InstrStartNode
> ...
> !> 0.00 0.13 0.00 1000001 0.00 0.00 ExecResult
> !> 0.00 0.13 0.00 1000000 0.00 0.00 ExecProject
==============================
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c
index a8a1fe6..38037f9 100644
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -110,7 +110,10 @@ ExecGroup(GroupState *node)
TupleTableSlot *result;
ExprDoneCond isDone;
- result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone);
+ if (node->ss.ps.ps_ProjInfo)
+ result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone);
+ else /* Assign outertuple for identity projection */
+ result = econtext->ecxt_outertuple;
if (isDone != ExprEndResult)
{
@@ -173,7 +176,10 @@ ExecGroup(GroupState *node)
TupleTableSlot *result;
ExprDoneCond isDone;
- result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone);
+ if (node->ss.ps.ps_ProjInfo)
+ result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone);
+ else /* Assign outertuple for identity projection */
+ result = econtext->ecxt_outertuple;
if (isDone != ExprEndResult)
{
@@ -244,7 +250,10 @@ ExecInitGroup(Group *node, EState *estate, int eflags)
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&grpstate->ss.ps);
- ExecAssignProjectionInfo(&grpstate->ss.ps, NULL);
+ if (node->plan.tlist_lower_congruent)
+ grpstate->ss.ps.ps_ProjInfo = NULL;
+ else
+ ExecAssignProjectionInfo(&grpstate->ss.ps, NULL);
grpstate->ss.ps.ps_TupFromTlist = false;
diff --git a/src/backend/executor/nodeResult.c b/src/backend/executor/nodeResult.c
index b51efd8..4a129d6 100644
--- a/src/backend/executor/nodeResult.c
+++ b/src/backend/executor/nodeResult.c
@@ -152,7 +152,10 @@ ExecResult(ResultState *node)
* the projection produces an empty set, in which case we must loop
* back to see if there are more outerPlan tuples.
*/
- resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone);
+ if (node->ps.ps_ProjInfo)
+ resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone);
+ else /* Assign outertuple for identity projection */
+ resultSlot = econtext->ecxt_outertuple;
if (isDone != ExprEndResult)
{
@@ -261,7 +264,10 @@ ExecInitResult(Result *node, EState *estate, int eflags)
* initialize tuple type and projection info
*/
ExecAssignResultTypeFromTL(&resstate->ps);
- ExecAssignProjectionInfo(&resstate->ps, NULL);
+ if (node->plan.tlist_lower_congruent)
+ resstate->ps.ps_ProjInfo = NULL;
+ else
+ ExecAssignProjectionInfo(&resstate->ps, NULL);
return resstate;
}
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index ade9b57..d34c45a 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -1373,7 +1373,10 @@ restart:
* evaluated with respect to that row.
*/
econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
- result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone);
+ if (winstate->ss.ps.ps_ProjInfo)
+ result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone);
+ else /* Assign outertuple for identity projection */
+ result = econtext->ecxt_outertuple;
if (isDone == ExprEndResult)
{
@@ -1490,7 +1493,10 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&winstate->ss.ps);
- ExecAssignProjectionInfo(&winstate->ss.ps, NULL);
+ if (node->plan.tlist_lower_congruent)
+ winstate->ss.ps.ps_ProjInfo = NULL;
+ else
+ ExecAssignProjectionInfo(&winstate->ss.ps, NULL);
winstate->ss.ps.ps_TupFromTlist = false;
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index ccd69fc..39d54ce 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1184,6 +1184,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
indexed_tlist *subplan_itlist;
List *output_targetlist;
ListCell *l;
+ int nmatch = 0;
subplan_itlist = build_tlist_index(subplan->targetlist);
@@ -1214,12 +1215,25 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
subplan_itlist,
OUTER_VAR,
rtoffset);
+
+ if (IsA(newexpr, Var) && ((Var*)newexpr)->varattno == nmatch + 1)
+ nmatch++;
+
tle = flatCopyTargetEntry(tle);
tle->expr = (Expr *) newexpr;
output_targetlist = lappend(output_targetlist, tle);
}
- plan->targetlist = output_targetlist;
+ /*
+ * Directly refer to the lower tuple slot on projection if the all elements
+ * in target list exactly correspond to the ones in the lower tlist.
+ */
+ plan->tlist_lower_congruent =
+ (nmatch == list_length(plan->targetlist) &&
+ nmatch == list_length(subplan->targetlist));
+
+ plan->targetlist = output_targetlist;
+
plan->qual = (List *)
fix_upper_expr(root,
(Node *) plan->qual,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index fb9a863..9e7729c 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -106,6 +106,7 @@ typedef struct Plan
* Common structural data for all Plan types.
*/
List *targetlist; /* target list to be computed at this node */
+ bool tlist_lower_congruent; /* target list is lower-congruent */
List *qual; /* implicitly-ANDed qual conditions */
struct Plan *lefttree; /* input plan tree(s) */
struct Plan *righttree;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers