On Wed, Dec 04, 2019 at 10:57:51PM -0800, Jeff Davis wrote:
> > About the `TODO: project needed attributes only` in your patch, when
> > would the input tuple contain columns not needed? It seems like
> > anything
> > you can project has to be in the group or aggregates.
>
> If you have a table like:
>
> CREATE TABLE foo(i int, j int, x int, y int, z int);
>
> And do:
>
> SELECT i, SUM(j) FROM foo GROUP BY i;
>
> At least from a logical standpoint, you might expect that we project
> only the attributes we need from foo before feeding them into the
> HashAgg. But that's not quite how postgres works. Instead, it leaves
> the tuples intact (which, in this case, means they have 5 attributes)
> until after aggregation and lazily fetches whatever attributes are
> referenced. Tuples are spilled from the input, at which time they still
> have 5 attributes; so naively copying them is wasteful.
>
> I'm not sure how often this laziness is really a win in practice,
> especially after the expression evaluation has changed so much in
> recent releases. So it might be better to just project all the
> attributes eagerly, and then none of this would be a problem. If we
> still wanted to be lazy about attribute fetching, that should still be
> possible even if we did a kind of "logical" projection of the tuple so
> that the useless attributes would not be relevant. Regardless, that's
> outside the scope of the patch I'm currently working on.
>
> What I'd like to do is copy just the attributes needed into a new
> virtual slot, leave the unneeded ones NULL, and then write it out to
> the tuplestore as a MinimalTuple. I just need to be sure to get the
> right attributes.
>
> Regards,
> Jeff Davis
Melanie and I tried this, had a installcheck passed patch. The way how
we verify it is composing a wide table with long unnecessary text
columns, then check the size it writes on every iteration.
Please check out the attachment, it's based on your 1204 version.
--
Adam Lee
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index f509c8e8f5..fe4a520305 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1291,6 +1291,68 @@ project_aggregates(AggState *aggstate)
return NULL;
}
+static bool
+find_aggregated_cols_walker(Node *node, Bitmapset **colnos)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ *colnos = bms_add_member(*colnos, var->varattno);
+
+ return false;
+ }
+ return expression_tree_walker(node, find_aggregated_cols_walker,
+ (void *)
colnos);
+}
+
+/*
+ * find_aggregated_cols
+ * Construct a bitmapset of the column numbers of aggregated Vars
+ * appearing in our targetlist and qual (HAVING clause)
+ */
+static Bitmapset *
+find_aggregated_cols(AggState *aggstate)
+{
+ Agg *node = (Agg *) aggstate->ss.ps.plan;
+ Bitmapset *colnos = NULL;
+ ListCell *temp;
+
+ /*
+ * We only want the columns used by aggregations in the targetlist or
qual
+ */
+ if (node->plan.targetlist != NULL)
+ {
+ foreach(temp, (List *) node->plan.targetlist)
+ {
+ if (IsA(lfirst(temp), TargetEntry))
+ {
+ Node *node = (Node *)((TargetEntry
*)lfirst(temp))->expr;
+ if (IsA(node, Aggref) || IsA(node,
GroupingFunc))
+ find_aggregated_cols_walker(node,
&colnos);
+ }
+ }
+ }
+
+ if (node->plan.qual != NULL)
+ {
+ foreach(temp, (List *) node->plan.qual)
+ {
+ if (IsA(lfirst(temp), TargetEntry))
+ {
+ Node *node = (Node *)((TargetEntry
*)lfirst(temp))->expr;
+ if (IsA(node, Aggref) || IsA(node,
GroupingFunc))
+ find_aggregated_cols_walker(node,
&colnos);
+ }
+ }
+ }
+
+ return colnos;
+}
+
/*
* find_unaggregated_cols
* Construct a bitmapset of the column numbers of un-aggregated Vars
@@ -1520,6 +1582,23 @@ find_hash_columns(AggState *aggstate)
for (i = 0; i < perhash->numCols; i++)
colnos = bms_add_member(colnos, grpColIdx[i]);
+ /*
+ * Find the columns used by aggregations
+ *
+ * This is shared by the entire aggregation.
+ */
+ if (aggstate->aggregated_columns == NULL)
+ aggstate->aggregated_columns =
find_aggregated_cols(aggstate);
+
+ /*
+ * The necessary columns to spill are either group keys or used
by
+ * aggregations
+ *
+ * This is the convenient place to calculate the necessary
columns to
+ * spill, because the group keys are different per hash.
+ */
+ perhash->necessarySpillCols = bms_union(colnos,
aggstate->aggregated_columns);
+
/*
* First build mapping for columns directly hashed. These are
the
* first, because they'll be accessed when computing hash
values and
@@ -1861,6 +1940,23 @@ lookup_hash_entries(AggState *aggstate)
hash_spill_init(spill, 0,
perhash->aggnode->numGroups,
aggstate->hashentrysize);
+ AggStatePerHash perhash =
&aggstate->perhash[aggstate->current_set];
+ for (int ttsno = 0; ttsno < slot->tts_nvalid; ttsno++)
+ {
+ /*
+ * null the column out if it's unnecessary, the
following
+ * forming functions will shrink it.
+ *
+ * it must be a virtual tuple here, this
function is only used
+ * by the first round, tuples are from other
node but not the
+ * spilled files.
+ *
+ * note: ttsno is zero indexed, cols are one
indexed.
+ */
+ if (!bms_is_member(ttsno+1,
perhash->necessarySpillCols))
+ slot->tts_isnull[ttsno] = true;
+ }
+
aggstate->hash_disk_used += hash_spill_tuple(spill, 0,
slot, hash);
}
}
@@ -2623,7 +2719,15 @@ hash_spill_tuple(HashAggSpill *spill, int input_bits,
TupleTableSlot *slot,
Assert(spill->partitions != NULL);
- /*TODO: project needed attributes only */
+ /*
+ * heap_form_minimal_tuple() if it's a virtual tuple,
+ * tts_minimal_get_minimal_tuple() if it's a minimal tuple, which is
+ * exactly what we want.
+ *
+ * when we spill the tuples from input, they are virtual tuples with
some
+ * columns nulled out, when we re-spill the tuples from spilling files,
+ * they are minimal tuples which was already nulled out before.
+ */
tuple = ExecFetchSlotMinimalTuple(slot, &shouldFree);
if (spill->partition_bits == 0)
@@ -3072,6 +3176,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
*/
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
+ aggstate->aggregated_columns = NULL;
aggstate->num_hashes = numHashes;
if (numHashes)
{
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 68c9e5f540..3b61109b52 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -302,6 +302,7 @@ typedef struct AggStatePerHashData
AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
AttrNumber *hashGrpColIdxHash; /* indices in hash table tuples */
Agg *aggnode; /* original Agg node, for
numGroups etc. */
+ Bitmapset *necessarySpillCols; /* the necessary columns if spills */
} AggStatePerHashData;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b9803a28bd..0c034b5f67 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2084,6 +2084,7 @@ typedef struct AggState
uint64 hash_disk_used; /* bytes of disk space used */
int hash_batches_used; /* batches used during
entire execution */
List *hash_batches; /* hash batches remaining to be
processed */
+ Bitmapset *aggregated_columns; /* the columns used by aggregations */
AggStatePerHash perhash; /* array of per-hashtable data */
AggStatePerGroup *hash_pergroup; /* grouping set indexed array of