From 00e5314a357f80a54284b7b8fceed9b7eb9da7ce Mon Sep 17 00:00:00 2001
From: Zhenghua lyu <kainwen@gmail.com>
Date: Fri, 15 Jul 2022 13:51:20 +0000
Subject: [PATCH] Adjust ndistinct with nrows in the rel when estimating join
 selectivity.

Ndistinct is key to the accuracy of join selectivity estimation, which
impacts the performance of complex SQLs a lot. Previous code in eqjoinsel
does not take rel's restriction into account. A good math model should
use the dependency of the Vars in rel's restrictions and the join var to
estimate Ndistinct. At least there is a truth that Ndistinct should not
be greater then the number of rows of the rel.
---
 src/backend/utils/adt/selfuncs.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index fa1f589fad..bf6ef60b56 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2269,6 +2269,15 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
 	nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
 
+	/*
+	 * Adjust ndistinct to account for restriction clauses.
+	 * nd should not be greater than the number of rows in the relation.
+	 */
+	if (vardata1.rel)
+		nd1 = Min(nd1, vardata1.rel->rows);
+	if (vardata2.rel)
+		nd2 = Min(nd2, vardata2.rel->rows);
+
 	opfuncoid = get_opcode(operator);
 
 	memset(&sslot1, 0, sizeof(sslot1));
-- 
2.25.1

