From 9c9a5b0732a87d241a28fac12883c94c10089df1 Mon Sep 17 00:00:00 2001
From: "houzj.fnst" <houzj.fnst@fujitsu.com>
Date: Mon, 29 Nov 2021 16:05:03 +0800
Subject: [PATCH] Fix double publish of child table's data.

if publish_via_partition_root is true, then the child table's data will be
copied twice if adding both child and parent table to the publication. The
reason is that the subscriber will fetch the table list from publisher's
pg_publication_tables view to do the table synchronization. But the view always
show both child and parent table which cause the extra synchronization
for the child table.

Fix it by making pg_publication_tables only show parent table if both parent
and child exists in the publication.

---
 src/backend/catalog/pg_publication.c      | 18 ++++--------------
 src/test/regress/expected/publication.out |  8 ++++++++
 src/test/regress/sql/publication.sql      |  4 ++++
 src/test/subscription/t/013_partition.pl  | 14 +++++++++++---
 4 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 63579b2f82..e442d70cad 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -142,7 +142,7 @@ is_publishable_class(Oid relid, Form_pg_class reltuple)
  * the publication.
  */
 static List *
-filter_partitions(List *relids, List *schemarelids)
+filter_partitions(List *relids)
 {
 	List	   *result = NIL;
 	ListCell   *lc;
@@ -163,14 +163,8 @@ filter_partitions(List *relids, List *schemarelids)
 
 			/*
 			 * Check if the parent table exists in the published table list.
-			 *
-			 * XXX As of now, we do this if the partition relation or the
-			 * partition relation's ancestor is present in schema publication
-			 * relations.
 			 */
-			if (list_member_oid(relids, ancestor) &&
-				(list_member_oid(schemarelids, relid) ||
-				 list_member_oid(schemarelids, ancestor)))
+			if (list_member_oid(relids, ancestor))
 			{
 				skip = true;
 				break;
@@ -879,7 +873,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
 															PUBLICATION_PART_ROOT :
 															PUBLICATION_PART_LEAF);
 			tables = list_concat_unique_oid(relids, schemarelids);
-			if (schemarelids && publication->pubviaroot)
+			if (publication->pubviaroot)
 			{
 				/*
 				 * If the publication publishes partition changes via their
@@ -888,12 +882,8 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
 				 * tables. Otherwise, the function could return both the child
 				 * and parent tables which could cause data of the child table
 				 * to be double-published on the subscriber side.
-				 *
-				 * XXX As of now, we do this when a publication has associated
-				 * schema or for all tables publication. See
-				 * GetAllTablesPublicationRelations().
 				 */
-				tables = filter_partitions(tables, schemarelids);
+				tables = filter_partitions(tables);
 			}
 		}
 
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 1feb558968..bf1484804c 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -814,6 +814,14 @@ SELECT * FROM pg_publication_tables;
  pub     | sch2       | tbl1_part1
 (1 row)
 
+-- Table publication that includes both the parent table and the child table
+ALTER PUBLICATION pub ADD TABLE sch1.tbl1;
+SELECT * FROM pg_publication_tables;
+ pubname | schemaname | tablename 
+---------+------------+-----------
+ pub     | sch1       | tbl1
+(1 row)
+
 DROP PUBLICATION pub;
 -- Schema publication that does not include the schema that has the parent table
 CREATE PUBLICATION pub FOR ALL TABLES IN SCHEMA sch2 WITH (PUBLISH_VIA_PARTITION_ROOT=0);
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 8fa0435c32..3b18261dcf 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -459,6 +459,10 @@ DROP PUBLICATION pub;
 CREATE PUBLICATION pub FOR TABLE sch2.tbl1_part1 WITH (PUBLISH_VIA_PARTITION_ROOT=1);
 SELECT * FROM pg_publication_tables;
 
+-- Table publication that includes both the parent table and the child table
+ALTER PUBLICATION pub ADD TABLE sch1.tbl1;
+SELECT * FROM pg_publication_tables;
+
 DROP PUBLICATION pub;
 -- Schema publication that does not include the schema that has the parent table
 CREATE PUBLICATION pub FOR ALL TABLES IN SCHEMA sch2 WITH (PUBLISH_VIA_PARTITION_ROOT=0);
diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl
index c75a07d6b3..89e6dcd1f2 100644
--- a/src/test/subscription/t/013_partition.pl
+++ b/src/test/subscription/t/013_partition.pl
@@ -6,7 +6,7 @@ use strict;
 use warnings;
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
-use Test::More tests => 62;
+use Test::More tests => 63;
 
 # setup
 
@@ -414,9 +414,12 @@ $node_publisher->safe_psql('postgres',
 # Note: tab3_1's parent is not in the publication, in which case its
 # changes are published using own identity.
 $node_publisher->safe_psql('postgres',
-	"CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab3_1 WITH (publish_via_partition_root = true)"
+	"CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab2_1, tab3_1 WITH (publish_via_partition_root = true)"
 );
 
+# prepare data for the initial sync
+$node_publisher->safe_psql('postgres', "INSERT INTO tab2 VALUES (1)");
+
 # subscriber 1
 $node_subscriber1->safe_psql('postgres', "DROP SUBSCRIPTION sub1");
 $node_subscriber1->safe_psql('postgres',
@@ -468,12 +471,17 @@ $node_subscriber1->poll_query_until('postgres', $synced_query)
 $node_subscriber2->poll_query_until('postgres', $synced_query)
   or die "Timed out while waiting for subscriber to synchronize data";
 
+# check that data is synced correctly
+$result = $node_subscriber1->safe_psql('postgres',
+	"SELECT c, a FROM tab2");
+is( $result, qq(sub1_tab2|1), 'initial data synced for pub_viaroot');
+
 # insert
 $node_publisher->safe_psql('postgres', "INSERT INTO tab1 VALUES (1), (0)");
 $node_publisher->safe_psql('postgres', "INSERT INTO tab1_1 (a) VALUES (3)");
 $node_publisher->safe_psql('postgres', "INSERT INTO tab1_2 VALUES (5)");
 $node_publisher->safe_psql('postgres',
-	"INSERT INTO tab2 VALUES (1), (0), (3), (5)");
+	"INSERT INTO tab2 VALUES (0), (3), (5)");
 $node_publisher->safe_psql('postgres',
 	"INSERT INTO tab3 VALUES (1), (0), (3), (5)");
 
-- 
2.18.4

