On Mon, Nov 27, 2023 at 3:05 PM jian he <jian.universal...@gmail.com> wrote:
>
> Hi.
> Since both array_op_test, arrest both are not dropped at the end of
> src/test/regress/sql/arrays.sql.
> I found using table array_op_test test more convincing.
>
> select
>         reltuples * 10 as original,
>         reltuples * (select
> floor(elem_count_histogram[array_length(elem_count_histogram,1)])
>         from    pg_stats
>         where   tablename = 'array_op_test' and         attname = 'i')
> as with_patch
>         ,(select (elem_count_histogram[array_length(elem_count_histogram,1)])
>         from    pg_stats
>         where   tablename = 'array_op_test' and         attname = 'i')
> as elem_count_histogram_last_element
> from pg_class where relname = 'array_op_test';
>  original | with_patch | elem_count_histogram_last_element
> ----------+------------+-----------------------------------
>      1030 |        412 |                         4.7843137
> (1 row)
>
> without patch:
> explain select unnest(i)  from array_op_test;
>                               QUERY PLAN
> ----------------------------------------------------------------------
>  ProjectSet  (cost=0.00..9.95 rows=1030 width=4)
>    ->  Seq Scan on array_op_test  (cost=0.00..4.03 rows=103 width=40)
> (2 rows)
>
> with patch:
>  explain select unnest(i)  from array_op_test;
>                               QUERY PLAN
> ----------------------------------------------------------------------
>  ProjectSet  (cost=0.00..6.86 rows=412 width=4)
>    ->  Seq Scan on array_op_test  (cost=0.00..4.03 rows=103 width=40)
> (2 rows)
> --------

Hi.
I did a minor change. change estimate_array_length return type to
double,  cost_tidscan function inside `int ntuples` to `double
ntuples`.

 `clamp_row_est(get_function_rows(root, expr->funcid, clause));` will
round 4.7843137 to 5.
so with your patch and my refactor, the rows will be 103 * 5 = 515.

 explain select unnest(i)  from array_op_test;
                              QUERY PLAN
----------------------------------------------------------------------
 ProjectSet  (cost=0.00..7.38 rows=515 width=4)
   ->  Seq Scan on array_op_test  (cost=0.00..4.03 rows=103 width=40)
(2 rows)
From 448e0149d0dd1cfc0ab629a6fc164c6071169926 Mon Sep 17 00:00:00 2001
From: pgaddict <jian.universal...@gmail.com>
Date: Thu, 30 Nov 2023 12:15:01 +0800
Subject: [PATCH v1 1/1] minor refactor.

make estimate_array_length return double instead of int.
similarily change estimate_array_length's caller fucntion cost_tidscan's variable ntuple
from type int to double.

because in estimate_array_length, we may need "average count of distinct element values"
info, and that is a float4 data type.

---
 src/backend/optimizer/path/costsize.c |  6 +++---
 src/backend/utils/adt/selfuncs.c      | 18 +++++++++---------
 src/include/utils/selfuncs.h          |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8bcdcc7f..65854fbe 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1227,7 +1227,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
 	QualCost	qpqual_cost;
 	Cost		cpu_per_tuple;
 	QualCost	tid_qual_cost;
-	int			ntuples;
+	double			ntuples;
 	ListCell   *l;
 	double		spc_random_page_cost;
 
@@ -1242,7 +1242,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
 		path->rows = baserel->rows;
 
 	/* Count how many tuples we expect to retrieve */
-	ntuples = 0;
+	ntuples = 0.0;
 	foreach(l, tidquals)
 	{
 		RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
@@ -4741,7 +4741,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 		Node	   *arraynode = (Node *) lsecond(saop->args);
 		QualCost	sacosts;
 		QualCost	hcosts;
-		int			estarraylen = estimate_array_length(context->root, arraynode);
+		double			estarraylen = estimate_array_length(context->root, arraynode);
 
 		set_sa_opfuncid(saop);
 		sacosts.startup = sacosts.per_tuple = 0;
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 6346b41d..f6519c7f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2130,7 +2130,7 @@ scalararraysel(PlannerInfo *root,
  *
  * It's important that this agree with scalararraysel.
  */
-int
+double
 estimate_array_length(PlannerInfo *root, Node *arrayexpr)
 {
 	/* look through any binary-compatible relabeling of arrayexpr */
@@ -2145,12 +2145,12 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
 		if (arrayisnull)
 			return 0;
 		arrayval = DatumGetArrayTypeP(arraydatum);
-		return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
+		return (double) ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
 	}
 	else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
 			 !((ArrayExpr *) arrayexpr)->multidims)
 	{
-		return list_length(((ArrayExpr *) arrayexpr)->elements);
+		return (double) list_length(((ArrayExpr *) arrayexpr)->elements);
 	}
 	else if (arrayexpr && IsA(arrayexpr, Var))
 	{
@@ -2162,7 +2162,7 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
 		 */
 		VariableStatData vardata;
 		AttStatsSlot sslot;
-		int nelem = -1;
+		double nelem = -1;
 
 		examine_variable(root, arrayexpr, 0, &vardata);
 
@@ -2170,18 +2170,18 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
 		{
 			if (get_attstatsslot(&sslot, vardata.statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, ATTSTATSSLOT_NUMBERS))
 			{
-				nelem = sslot.numbers[sslot.nnumbers - 1];
+				nelem = (double) sslot.numbers[sslot.nnumbers - 1];
 				free_attstatsslot(&sslot);
 			}
 		}
 		ReleaseVariableStats(vardata);
 
 		if (nelem >= 0)
-			return nelem;
+			return (double) nelem;
 	}
 
 	/* default guess --- see also scalararraysel */
-	return 10;
+	return 10.0;
 }
 
 /*
@@ -6565,7 +6565,7 @@ genericcostestimate(PlannerInfo *root,
 		if (IsA(rinfo->clause, ScalarArrayOpExpr))
 		{
 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
-			int			alength = estimate_array_length(root, lsecond(saop->args));
+			double			alength = estimate_array_length(root, lsecond(saop->args));
 
 			if (alength > 1)
 				num_sa_scans *= alength;
@@ -6845,7 +6845,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 			{
 				ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
 				Node	   *other_operand = (Node *) lsecond(saop->args);
-				int			alength = estimate_array_length(root, other_operand);
+				double			alength = estimate_array_length(root, other_operand);
 
 				clause_op = saop->opno;
 				found_saop = true;
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index bf967815..02a6224b 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -200,7 +200,7 @@ extern Selectivity scalararraysel(PlannerInfo *root,
 								  ScalarArrayOpExpr *clause,
 								  bool is_join_clause,
 								  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
-extern int	estimate_array_length(PlannerInfo *root, Node *arrayexpr);
+extern double	estimate_array_length(PlannerInfo *root, Node *arrayexpr);
 extern Selectivity rowcomparesel(PlannerInfo *root,
 								 RowCompareExpr *clause,
 								 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
-- 
2.34.1

Reply via email to