Re: simple patch for discussion

Greg Hennessy Sun, 20 Jul 2025 11:28:13 -0700

On Fri, Jul 18, 2025 at 12:23 AM David G. Johnston <
david.g.johns...@gmail.com> wrote:


> Framing this differently, how about a patch that lets extension authors
> choose to implement alternative formulas or even provide GUC-driven
> constants into the planner at the existing spot instead of having to choose
> a best algorithm.  IOW, what would it take to make the proposed patch an
> extension that a DBA could choose to install and override the current log3
> algorithm?
>

I've added code to make this new GUC, with the default behavior being the
old behavior. I don't think I know enough postgresql to code an extension.

As an example showing this works, where the default algorithm assigns 5
works, the new one assigns 12.:
CREATE TABLE Departments (code VARCHAR(5), UNIQUE (code));
CREATE TABLE Towns (
  id SERIAL UNIQUE NOT NULL,
  code VARCHAR(10) NOT NULL, -- not unique
  article TEXT,
  name TEXT NOT NULL, -- not unique
  department VARCHAR(5) NOT NULL,
  UNIQUE (code, department)
);

insert into towns (
    code, article, name, department
)
select
    left(md5(i::text), 10),
    md5(random()::text),
    md5(random()::text),
    left(md5(random()::text), 5)
from generate_series(1, 10000000) s(i);

insert into departments (
       code
)
select
left(md5(i::text), 5)
from generate_series(1, 1000) s(i);

analyze departments;
analyze towns;

postgres@fedora:~$ /usr/local/pgsql/bin/psql test
psql (19devel)
Type "help" for help.

test=# show parallel_worker_algorithm ;
 parallel_worker_algorithm
---------------------------
 log3
(1 row)

test=# show max_parallel_workers ;
 max_parallel_workers
----------------------
 24
(1 row)

test=# explain (costs off) select count(*) from departments, towns where
towns.department = departments.code;
                                      QUERY PLAN

--------------------------------------------------------------------------------------
 Finalize Aggregate
   ->  Gather
         Workers Planned: 5
         ->  Partial Aggregate
               ->  Hash Join
                     Hash Cond: ((towns.department)::text =
(departments.code)::text)
                     ->  Parallel Seq Scan on towns
                     ->  Hash
                           ->  Seq Scan on departments
(9 rows)

test=# set parallel_worker_algorithm = sqrt;
SET
test=# explain (costs off) select count(*) from departments, towns where
towns.department = departments.code;
                                      QUERY PLAN

--------------------------------------------------------------------------------------
 Finalize Aggregate
   ->  Gather
         Workers Planned: 12
         ->  Partial Aggregate
               ->  Hash Join
                     Hash Cond: ((towns.department)::text =
(departments.code)::text)
                     ->  Parallel Seq Scan on towns
                     ->  Hash
                           ->  Seq Scan on departments
(9 rows)
test=#

From a0ba58b001782bffeb8fbd715276c283cc3c1dd4 Mon Sep 17 00:00:00 2001
From: Gregory Hennessy <greg.henne...@gmail.com>
Date: Sun, 20 Jul 2025 14:18:17 -0400
Subject: [PATCH] Added a new GUC, parallel_worker_algorthm, to allow more
 workers

---
 doc/src/sgml/config.sgml                      | 43 +++++++---
 src/backend/optimizer/path/allpaths.c         | 79 ++++++++++++++-----
 src/backend/utils/misc/guc_tables.c           | 18 +++++
 src/backend/utils/misc/postgresql.conf.sample |  3 +-
 src/include/optimizer/paths.h                 |  9 +++
 5 files changed, 117 insertions(+), 35 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c7acc0f182f..16cf61ce713 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2943,25 +2943,42 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
 
-      <varlistentry id="guc-parallel-leader-participation" xreflabel="parallel_leader_participation">
+      <varlistentry id="guc-max-parallel-workers" xreflabel="max_parallel_workers">
+       <term><varname>max_parallel_workers</varname> (<type>integer</type>)
+       <indexterm>
+        <primary><varname>max_parallel_workers</varname> configuration parameter</primary>
+       </indexterm>
+       </term>
+       <listitem>
+        <para>
+         Sets the maximum number of workers that the cluster can support for
+         parallel operations.  The default value is 8.  When increasing or
+         decreasing this value, consider also adjusting
+         <xref linkend="guc-max-parallel-maintenance-workers"/> and
+         <xref linkend="guc-max-parallel-workers-per-gather"/>.
+         Also, note that a setting for this value which is higher than
+         <xref linkend="guc-max-worker-processes"/> will have no effect,
+         since parallel workers are taken from the pool of worker processes
+         established by that setting.
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry id="guc-parallel-worker-algorithm" xreflabel="parallel_worker_algorithm">
        <term>
-       <varname>parallel_leader_participation</varname> (<type>boolean</type>)
+       <varname>parallel_worker_algorithm</varname> (<type>enum</type>)
        <indexterm>
-        <primary><varname>parallel_leader_participation</varname> configuration parameter</primary>
+        <primary><varname>parallel_worker_algorithm</varname> configuration parameter</primary>
        </indexterm>
        </term>
        <listitem>
         <para>
-         Allows the leader process to execute the query plan under
-         <literal>Gather</literal> and <literal>Gather Merge</literal> nodes
-         instead of waiting for worker processes.  The default is
-         <literal>on</literal>.  Setting this value to <literal>off</literal>
-         reduces the likelihood that workers will become blocked because the
-         leader is not reading tuples fast enough, but requires the leader
-         process to wait for worker processes to start up before the first
-         tuples can be produced.  The degree to which the leader can help or
-         hinder performance depends on the plan type, number of workers and
-         query duration.
+         Controls the number of parallel_wokers to be allocated. The default
+         algorithm <literal>log3</literal> uses the rounded up (to the nearest integer)
+         log base 3 of the ratio of the heap_buffer_threshold to the min_parallel_table_scan_size.
+         A similar calculation is done for the index pages.
+         Using a value of <literal>sqrt</literal> allocates more workers, based on
+         the rounded up sqrt of the ratio of the buffer sizes.
         </para>
        </listitem>
       </varlistentry>
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 6cc6966b060..cfed12e1d08 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -80,6 +80,7 @@ bool		enable_geqo = false;	/* just in case GUC doesn't set it */
 int			geqo_threshold;
 int			min_parallel_table_scan_size;
 int			min_parallel_index_scan_size;
+int			parallel_worker_algorithm;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
@@ -4272,23 +4273,45 @@ compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
 			int			heap_parallel_threshold;
 			int			heap_parallel_workers = 1;
 
-			/*
-			 * Select the number of workers based on the log of the size of
-			 * the relation.  This probably needs to be a good deal more
-			 * sophisticated, but we need something here for now.  Note that
-			 * the upper limit of the min_parallel_table_scan_size GUC is
-			 * chosen to prevent overflow here.
-			 */
-			heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
-			while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
+			switch (parallel_worker_algorithm)
 			{
-				heap_parallel_workers++;
-				heap_parallel_threshold *= 3;
-				if (heap_parallel_threshold > INT_MAX / 3)
-					break;		/* avoid overflow */
+				case PARALLEL_WORKER_ALGORITHM_LOG3:
+					/*
+					 * Select the number of workers based on the log of the size of
+					 * the relation.  This probably needs to be a good deal more
+					 * sophisticated, but we need something here for now.  Note that
+					 * the upper limit of the min_parallel_table_scan_size GUC is
+					 * chosen to prevent overflow here.
+					 */
+					heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
+					while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
+					{
+						heap_parallel_workers++;
+						heap_parallel_threshold *= 3;
+						if (heap_parallel_threshold > INT_MAX / 3)
+							break;		/* avoid overflow */
+					}
+					break;
+				case PARALLEL_WORKER_ALGORITHM_SQRT:
+					/*
+					 * Select the number of workers based on the sqrt of the size of
+					 * the relation.  This probably needs to be a good deal more
+					 * sophisticated, but we need something here for now.  Note that
+					 * the upper limit of the min_parallel_table_scan_size GUC is
+					 * chosen to prevent overflow here.
+					 */
+					heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
+					while (heap_pages >= (BlockNumber) (heap_parallel_threshold * heap_parallel_workers*heap_parallel_workers))
+					{
+						heap_parallel_workers++;
+						if (heap_parallel_threshold * heap_parallel_workers*heap_parallel_workers > INT_MAX)
+							break;		/* avoid overflow */
+					}
+					break;
 			}
 
 			parallel_workers = heap_parallel_workers;
+
 		}
 
 		if (index_pages >= 0)
@@ -4296,16 +4319,30 @@ compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
 			int			index_parallel_workers = 1;
 			int			index_parallel_threshold;
 
-			/* same calculation as for heap_pages above */
-			index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
-			while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
+			switch (parallel_worker_algorithm)
 			{
-				index_parallel_workers++;
-				index_parallel_threshold *= 3;
-				if (index_parallel_threshold > INT_MAX / 3)
-					break;		/* avoid overflow */
+				case PARALLEL_WORKER_ALGORITHM_LOG3:
+					/* same calculation as for heap_pages above */
+					index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
+					while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
+					{
+						index_parallel_workers++;
+						index_parallel_threshold *= 3;
+						if (index_parallel_threshold > INT_MAX / 3)
+							break;		/* avoid overflow */
+					}
+					break;
+				case PARALLEL_WORKER_ALGORITHM_SQRT:
+					/* same calculation as for heap_pages above */
+					index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
+					while (index_pages >= (BlockNumber) (index_parallel_threshold *index_parallel_workers*index_parallel_workers))
+					{
+						index_parallel_workers++;
+						if (index_parallel_threshold * index_parallel_workers*index_parallel_workers > INT_MAX)
+							break;		/* avoid overflow */
+					}
+					break;
 			}
-
 			if (parallel_workers > 0)
 				parallel_workers = Min(parallel_workers, index_parallel_workers);
 			else
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index d14b1678e7f..fd7ccfe6d07 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -491,6 +491,14 @@ static const struct config_enum_entry file_copy_method_options[] = {
 	{NULL, 0, false}
 };
 
+static const struct config_enum_entry parallel_worker_algorithm_options[] = {
+	{"log3", PARALLEL_WORKER_ALGORITHM_LOG3, false},
+	{"sqrt", PARALLEL_WORKER_ALGORITHM_SQRT, false},
+	{NULL, 0, false}
+};
+
+
+
 /*
  * Options for enum values stored in other modules
  */
@@ -5418,6 +5426,16 @@ struct config_enum ConfigureNamesEnum[] =
 		NULL, assign_io_method, NULL
 	},
 
+	{
+		{"parallel_worker_algorithm", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("selects the algorithm for creating parallel_workers"),
+			NULL
+		},
+		&parallel_worker_algorithm,
+		PARALLEL_WORKER_ALGORITHM_LOG3, parallel_worker_algorithm_options,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..984b473dda4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -220,7 +220,8 @@
 #max_parallel_workers = 8		# number of max_worker_processes that
 					# can be used in parallel operations
 #parallel_leader_participation = on
-
+#parallel_worker_algorithm = log3	# old default
+					# change log3 to sqrt to allow additional parallel_workers
 
 #------------------------------------------------------------------------------
 # WRITE-AHEAD LOG
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 8410531f2d6..4decb97a099 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -20,11 +20,20 @@
 /*
  * allpaths.c
  */
+
+typedef enum
+{
+	PARALLEL_WORKER_ALGORITHM_LOG3,				/* use the long term default algorthm */
+	PARALLEL_WORKER_ALGORITHM_SQRT		/* use a new algorithm to allow more workers */
+} ParallelWorkerAlgorithm;
+
+
 extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
 extern PGDLLIMPORT bool enable_group_by_reordering;
+extern PGDLLIMPORT int parallel_worker_algorithm;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
-- 
2.50.1

Re: simple patch for discussion

Reply via email to