On 10/8/24 11:33, Andrei Lepikhov wrote:
On 9/23/24 20:02, Andrei Lepikhov wrote:
On 12/9/2024 12:12, David Rowley wrote:
On Thu, 12 Sept 2024 at 21:51, Andrei Lepikhov <lepi...@gmail.com>
Minor change to make compiler and cfbot happy
Now, this thread looks connected to the [1]. However, it still has independent profit, which can be discussed separately. After the introduction of the em->em_ndistinct cache, I played around with the idea of letting the estimate_num_groups use this cache. Occasionally found out that we have one more instability case like the following:

DROP TABLE IF EXISTS test;
CREATE TABLE test (x int, y int, z int);
INSERT INTO test (x,y,z) (SELECT random()*1E5, random()*2, random() FROM generate_series(1,1e4));
VACUUM ANALYZE test;

EXPLAIN SELECT count(*) FROM test WHERE x=y GROUP BY x,y;
EXPLAIN SELECT count(*) FROM test WHERE x=y GROUP BY y,x;

Here, you can see that depending on the initial order of grouping, Postgres chooses different columns for grouping. Doing that causes different estimations - one of them is definitely wrong:

GroupAggregate  (cost=181.41..182.29 rows=50 width=16)
GroupAggregate  (cost=181.41..181.82 rows=3 width=16)

That happens because when estimating the number of groups, Postgres doesn't consider EquivalenceClass, which can let him correct group estimation at a low price. It may be done inside the make_pathkeys_for_sortclauses_extended by choosing a column with a lower number of distinct, but IMO, it is better to do it at the moment of the number of groups estimation.

Thoughts? Is it a real issue or just a non-practical corner case?

The new version of the patch is attached.

[1] https://www.postgresql.org/message-id/flat/8742aaa8-9519-4a1f-91bd-364aec65f5cf%40gmail.com
--
regards, Andrei Lepikhov
From 5eb884cbbd9c2e356d5d855da46d7e62d101b8b9 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepi...@gmail.com>
Date: Tue, 29 Oct 2024 08:49:33 +0700
Subject: [PATCH v2 1/4] Stabilise incremental sort cost calculation.

Carefully identify a column/expression that can represent the path key in cost
calculation of specific sort operator. Columns may have different numbers of
distinct values. That's why the order of columns in the sort operation may
impact number of the comparison function's calls.
Sorting has only pathkeys as input for the cost estimation. This patch, instead
of a blind choice of the first equivalence class member, attempts to find an
expression that chooses the most negligible ndistinct value.

TODO: Filtering EC members, external to this sort operator is not a big deal.
But in that case it would be necessary to pass underlying relids to cost
calculation routine that would cause the API change. So, here we stay as simple
as possible.

Add into EquivalenceMember the number of distinct values - em_ndistinct.
It may be additionally used later in groups number estimations.
---
 src/backend/optimizer/path/costsize.c         | 72 +++++++++++++++++--
 src/backend/optimizer/path/equivclass.c       |  1 +
 src/include/nodes/pathnodes.h                 |  2 +
 .../regress/expected/incremental_sort.out     | 51 +++++++++++++
 src/test/regress/sql/incremental_sort.sql     | 31 ++++++++
 5 files changed, 152 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 2bb6db1df7..686d5883d1 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -203,6 +203,8 @@ static int32 get_expr_width(PlannerInfo *root, const Node *expr);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
 static double get_parallel_divisor(Path *path);
+static EquivalenceMember *identify_sort_ecmember(PlannerInfo *root,
+												 EquivalenceClass *ec);
 
 
 /*
@@ -2052,22 +2054,21 @@ cost_incremental_sort(Path *path,
 	 */
 	foreach(l, pathkeys)
 	{
-		PathKey    *key = (PathKey *) lfirst(l);
-		EquivalenceMember *member = (EquivalenceMember *)
-			linitial(key->pk_eclass->ec_members);
+		PathKey			   *key = (PathKey *) lfirst(l);
+		EquivalenceMember  *em = identify_sort_ecmember(root, key->pk_eclass);
 
 		/*
 		 * Check if the expression contains Var with "varno 0" so that we
 		 * don't call estimate_num_groups in that case.
 		 */
-		if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr)))
+		if (bms_is_member(0, pull_varnos(root, (Node *) em->em_expr)))
 		{
 			unknown_varno = true;
 			break;
 		}
 
 		/* expression not containing any Vars with "varno 0" */
-		presortedExprs = lappend(presortedExprs, member->em_expr);
+		presortedExprs = lappend(presortedExprs, em->em_expr);
 
 		if (foreach_current_index(l) + 1 >= presorted_keys)
 			break;
@@ -6604,3 +6605,64 @@ compute_gather_rows(Path *path)
 
 	return clamp_row_est(path->rows * get_parallel_divisor(path));
 }
+
+/*
+ * Find suitable member of the equivalence class.
+ * Passing through the list of EC members find the member with minimum of
+ * distinct values. Cache estimated number of distincts in the em_ndistinct
+ * field of each member.
+ */
+static EquivalenceMember *
+identify_sort_ecmember(PlannerInfo *root, EquivalenceClass *ec)
+{
+	EquivalenceMember  *candidate = linitial(ec->ec_members);
+
+	if (root == NULL)
+		/* Fast path */
+		return candidate;
+
+	foreach_node(EquivalenceMember, em, ec->ec_members)
+	{
+		VariableStatData	vardata;
+
+		if (em->em_ndistinct == 0.)
+			/* Nothing helpful */
+			continue;
+
+		if (em->em_is_child || em->em_is_const || bms_is_empty(em->em_relids) ||
+			bms_is_member(0, em->em_relids))
+		{
+			em->em_ndistinct = 0.;
+			continue;
+		}
+
+		if (em->em_ndistinct < 0.)
+		{
+			bool	isdefault = true;
+			double	ndist = 0.;
+
+			/* Let's check candidate's ndistinct value */
+			examine_variable(root, (Node *) em->em_expr, 0, &vardata);
+			if (HeapTupleIsValid(vardata.statsTuple))
+				ndist = get_variable_numdistinct(&vardata, &isdefault);
+			ReleaseVariableStats(vardata);
+
+			if (isdefault)
+			{
+				em->em_ndistinct = 0.;
+				continue;
+			}
+
+			em->em_ndistinct = ndist;
+		}
+
+		Assert(em->em_ndistinct > 0.);
+
+		if (candidate->em_ndistinct == 0. ||
+			em->em_ndistinct < candidate->em_ndistinct)
+			candidate = em;
+	}
+
+	Assert(candidate != NULL);
+	return candidate;
+}
\ No newline at end of file
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index fae137dd82..3552614502 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -525,6 +525,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
 	em->em_datatype = datatype;
 	em->em_jdomain = jdomain;
 	em->em_parent = parent;
+	em->em_ndistinct = -1.0;
 
 	if (bms_is_empty(relids))
 	{
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index add0f9e45f..4ef5256a7f 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1448,6 +1448,8 @@ typedef struct EquivalenceMember
 	JoinDomain *em_jdomain;		/* join domain containing the source clause */
 	/* if em_is_child is true, this links to corresponding EM for top parent */
 	struct EquivalenceMember *em_parent pg_node_attr(read_write_ignore);
+
+	double em_ndistinct; /* cached value of ndistinct: 0- default value or 'unknown'; -1 - not defined yet */
 } EquivalenceMember;
 
 /*
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 2df7a5db12..409eb8a4b8 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1722,3 +1722,54 @@ order by t1.four, t1.two limit 1;
                ->  Seq Scan on tenk1 t2
 (12 rows)
 
+-- Check:
+-- commuting of sides in an expression doesn't influence cost estimation
+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
+INSERT INTO sort_ndist_t1 (x,y)
+  SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
+INSERT INTO sort_ndist_t2 (x,y)
+  SELECT gs, gs FROM generate_series(1,1E4) AS gs;
+CREATE INDEX t1_idx ON sort_ndist_t1 (x);
+CREATE INDEX t2_idx ON sort_ndist_t2 (x);
+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
+SET enable_hashjoin = 'off';
+-- Having lots of duplicates after the join it is more effective to use plain
+-- Sort instead of incremental sort: with small number of groups we do the same
+-- stuff like Sort but with extra penalty.
+EXPLAIN (COSTS OFF)
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t1.x=t2.x
+ORDER BY t1.x,t1.y;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x, t1.y
+   ->  Nested Loop
+         ->  Seq Scan on sort_ndist_t1 t1
+         ->  Memoize
+               Cache Key: t1.x
+               Cache Mode: logical
+               ->  Index Only Scan using t2_idx on sort_ndist_t2 t2
+                     Index Cond: (x = t1.x)
+(9 rows)
+
+EXPLAIN (COSTS OFF) -- the plan must be the same as above
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t2.x=t1.x
+ORDER BY t1.x,t1.y;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x, t1.y
+   ->  Nested Loop
+         ->  Seq Scan on sort_ndist_t1 t1
+         ->  Memoize
+               Cache Key: t1.x
+               Cache Mode: logical
+               ->  Index Only Scan using t2_idx on sort_ndist_t2 t2
+                     Index Cond: (x = t1.x)
+(9 rows)
+
+RESET enable_hashjoin;
+DROP TABLE sort_ndist_t1, sort_ndist_t2;
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 98b20e17e1..af4d145395 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -298,3 +298,34 @@ explain (costs off)
 select * from
   (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
 order by t1.four, t1.two limit 1;
+
+-- Check:
+-- commuting of sides in an expression doesn't influence cost estimation
+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
+
+INSERT INTO sort_ndist_t1 (x,y)
+  SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
+INSERT INTO sort_ndist_t2 (x,y)
+  SELECT gs, gs FROM generate_series(1,1E4) AS gs;
+CREATE INDEX t1_idx ON sort_ndist_t1 (x);
+CREATE INDEX t2_idx ON sort_ndist_t2 (x);
+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
+
+SET enable_hashjoin = 'off';
+
+-- Having lots of duplicates after the join it is more effective to use plain
+-- Sort instead of incremental sort: with small number of groups we do the same
+-- stuff like Sort but with extra penalty.
+EXPLAIN (COSTS OFF)
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t1.x=t2.x
+ORDER BY t1.x,t1.y;
+
+EXPLAIN (COSTS OFF) -- the plan must be the same as above
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t2.x=t1.x
+ORDER BY t1.x,t1.y;
+
+RESET enable_hashjoin;
+DROP TABLE sort_ndist_t1, sort_ndist_t2;
-- 
2.39.5

Reply via email to