Re: [HACKERS] Logical Replication WIP

2017-03-03 Thread Peter Eisentraut
On 2/22/17 07:00, Petr Jelinek wrote:
> On 22/02/17 12:24, Petr Jelinek wrote:
>> Hi,
>>
>> I updated these patches for current HEAD and removed the string
>> initialization in walsender as Fuji Masao committed similar fix in meantime.
>>
>> I also found typo/thinko in the first patch which is now fixed.
>>
> 
> And of course I missed the xlog->wal rename, sigh. Fixed.

all three committed

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-02-22 Thread Petr Jelinek
On 22/02/17 12:24, Petr Jelinek wrote:
> Hi,
> 
> I updated these patches for current HEAD and removed the string
> initialization in walsender as Fuji Masao committed similar fix in meantime.
> 
> I also found typo/thinko in the first patch which is now fixed.
> 

And of course I missed the xlog->wal rename, sigh. Fixed.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From d8216ac470cb0722c536f1094791ce532dda2e4d Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Fri, 20 Jan 2017 21:20:56 +0100
Subject: [PATCH 1/3] Use asynchronous connect API in libpqwalreceiver

This makes the connection attempt from CREATE SUBSCRIPTION and from
WalReceiver interruptable by user in case the libpq connection is
hanging. The previous coding required immediate shutdown (SIGQUIT) of
PostgreSQL in that situation.
---
 src/backend/postmaster/pgstat.c|  4 +-
 .../libpqwalreceiver/libpqwalreceiver.c| 51 +-
 src/include/pgstat.h   |  2 +-
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index ada374c..2fb9a8b 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3340,8 +3340,8 @@ pgstat_get_wait_client(WaitEventClient w)
 		case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
 			event_name = "WalReceiverWaitStart";
 			break;
-		case WAIT_EVENT_LIBPQWALRECEIVER_READ:
-			event_name = "LibPQWalReceiverRead";
+		case WAIT_EVENT_LIBPQWALRECEIVER:
+			event_name = "LibPQWalReceiver";
 			break;
 		case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
 			event_name = "WalSenderWaitForWAL";
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 44a89c7..9366421 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -112,6 +112,7 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
  char **err)
 {
 	WalReceiverConn *conn;
+	PostgresPollingStatusType status;
 	const char *keys[5];
 	const char *vals[5];
 	int			i = 0;
@@ -138,7 +139,53 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
 	vals[i] = NULL;
 
 	conn = palloc0(sizeof(WalReceiverConn));
-	conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
+	conn->streamConn = PQconnectStartParams(keys, vals,
+			/* expand_dbname = */ true);
+	/* Check for conn status. */
+	if (PQstatus(conn->streamConn) == CONNECTION_BAD)
+	{
+		*err = pstrdup(PQerrorMessage(conn->streamConn));
+		return NULL;
+	}
+
+	/* Poll connection. */
+	do
+	{
+		int		rc;
+
+		/* Determine current state of the connection. */
+		status = PQconnectPoll(conn->streamConn);
+
+		/* Sleep a bit if waiting for socket. */
+		if (status == PGRES_POLLING_READING ||
+			status == PGRES_POLLING_WRITING)
+		{
+			int		extra_flag;
+
+			extra_flag = status == PGRES_POLLING_READING ? WL_SOCKET_READABLE :
+WL_SOCKET_WRITEABLE;
+
+			ResetLatch(>procLatch);
+			rc = WaitLatchOrSocket(>procLatch,
+   WL_POSTMASTER_DEATH |
+   WL_LATCH_SET | extra_flag,
+   PQsocket(conn->streamConn),
+   0,
+   WAIT_EVENT_LIBPQWALRECEIVER);
+
+			/* Emergency bailout. */
+			if (rc & WL_POSTMASTER_DEATH)
+exit(1);
+
+			/* Interrupted. */
+			if (rc & WL_LATCH_SET)
+CHECK_FOR_INTERRUPTS();
+		}
+
+		/* Otherwise loop until we have OK or FAILED status. */
+	} while (status != PGRES_POLLING_OK && status != PGRES_POLLING_FAILED);
+
+	/* Check the status. */
 	if (PQstatus(conn->streamConn) != CONNECTION_OK)
 	{
 		*err = pstrdup(PQerrorMessage(conn->streamConn));
@@ -521,7 +568,7 @@ libpqrcv_PQexec(PGconn *streamConn, const char *query)
    WL_LATCH_SET,
    PQsocket(streamConn),
    0,
-   WAIT_EVENT_LIBPQWALRECEIVER_READ);
+   WAIT_EVENT_LIBPQWALRECEIVER);
 			if (rc & WL_POSTMASTER_DEATH)
 exit(1);
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 8b710ec..0062fb8 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -764,7 +764,7 @@ typedef enum
 	WAIT_EVENT_CLIENT_WRITE,
 	WAIT_EVENT_SSL_OPEN_SERVER,
 	WAIT_EVENT_WAL_RECEIVER_WAIT_START,
-	WAIT_EVENT_LIBPQWALRECEIVER_READ,
+	WAIT_EVENT_LIBPQWALRECEIVER,
 	WAIT_EVENT_WAL_SENDER_WAIT_WAL,
 	WAIT_EVENT_WAL_SENDER_WRITE_DATA
 } WaitEventClient;
-- 
2.7.4

From be69a9bc19089ec093448c10ea685ccc048ec42c Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Sun, 22 Jan 2017 23:16:57 +0100
Subject: [PATCH 2/3] Fix after trigger execution in logical replication

---
 src/backend/replication/logical/worker.c   |  15 
 src/test/subscription/t/003_constraints.pl | 113 +
 2 files changed, 128 insertions(+)
 create mode 100644 

Re: [HACKERS] Logical Replication WIP

2017-02-22 Thread Petr Jelinek
Hi,

I updated these patches for current HEAD and removed the string
initialization in walsender as Fuji Masao committed similar fix in meantime.

I also found typo/thinko in the first patch which is now fixed.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
>From 8e191350acc8da89e94b784806f4487e2e9c5970 Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Fri, 20 Jan 2017 21:20:56 +0100
Subject: [PATCH 1/3] Use asynchronous connect API in libpqwalreceiver

This makes the connection attempt from CREATE SUBSCRIPTION and from
WalReceiver interruptable by user in case the libpq connection is
hanging. The previous coding required immediate shutdown (SIGQUIT) of
PostgreSQL in that situation.
---
 src/backend/postmaster/pgstat.c|  4 +-
 .../libpqwalreceiver/libpqwalreceiver.c| 51 +-
 src/include/pgstat.h   |  2 +-
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index ada374c..2fb9a8b 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3340,8 +3340,8 @@ pgstat_get_wait_client(WaitEventClient w)
 		case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
 			event_name = "WalReceiverWaitStart";
 			break;
-		case WAIT_EVENT_LIBPQWALRECEIVER_READ:
-			event_name = "LibPQWalReceiverRead";
+		case WAIT_EVENT_LIBPQWALRECEIVER:
+			event_name = "LibPQWalReceiver";
 			break;
 		case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
 			event_name = "WalSenderWaitForWAL";
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 44a89c7..9366421 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -112,6 +112,7 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
  char **err)
 {
 	WalReceiverConn *conn;
+	PostgresPollingStatusType status;
 	const char *keys[5];
 	const char *vals[5];
 	int			i = 0;
@@ -138,7 +139,53 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
 	vals[i] = NULL;
 
 	conn = palloc0(sizeof(WalReceiverConn));
-	conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
+	conn->streamConn = PQconnectStartParams(keys, vals,
+			/* expand_dbname = */ true);
+	/* Check for conn status. */
+	if (PQstatus(conn->streamConn) == CONNECTION_BAD)
+	{
+		*err = pstrdup(PQerrorMessage(conn->streamConn));
+		return NULL;
+	}
+
+	/* Poll connection. */
+	do
+	{
+		int		rc;
+
+		/* Determine current state of the connection. */
+		status = PQconnectPoll(conn->streamConn);
+
+		/* Sleep a bit if waiting for socket. */
+		if (status == PGRES_POLLING_READING ||
+			status == PGRES_POLLING_WRITING)
+		{
+			int		extra_flag;
+
+			extra_flag = status == PGRES_POLLING_READING ? WL_SOCKET_READABLE :
+WL_SOCKET_WRITEABLE;
+
+			ResetLatch(>procLatch);
+			rc = WaitLatchOrSocket(>procLatch,
+   WL_POSTMASTER_DEATH |
+   WL_LATCH_SET | extra_flag,
+   PQsocket(conn->streamConn),
+   0,
+   WAIT_EVENT_LIBPQWALRECEIVER);
+
+			/* Emergency bailout. */
+			if (rc & WL_POSTMASTER_DEATH)
+exit(1);
+
+			/* Interrupted. */
+			if (rc & WL_LATCH_SET)
+CHECK_FOR_INTERRUPTS();
+		}
+
+		/* Otherwise loop until we have OK or FAILED status. */
+	} while (status != PGRES_POLLING_OK && status != PGRES_POLLING_FAILED);
+
+	/* Check the status. */
 	if (PQstatus(conn->streamConn) != CONNECTION_OK)
 	{
 		*err = pstrdup(PQerrorMessage(conn->streamConn));
@@ -521,7 +568,7 @@ libpqrcv_PQexec(PGconn *streamConn, const char *query)
    WL_LATCH_SET,
    PQsocket(streamConn),
    0,
-   WAIT_EVENT_LIBPQWALRECEIVER_READ);
+   WAIT_EVENT_LIBPQWALRECEIVER);
 			if (rc & WL_POSTMASTER_DEATH)
 exit(1);
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 8b710ec..0062fb8 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -764,7 +764,7 @@ typedef enum
 	WAIT_EVENT_CLIENT_WRITE,
 	WAIT_EVENT_SSL_OPEN_SERVER,
 	WAIT_EVENT_WAL_RECEIVER_WAIT_START,
-	WAIT_EVENT_LIBPQWALRECEIVER_READ,
+	WAIT_EVENT_LIBPQWALRECEIVER,
 	WAIT_EVENT_WAL_SENDER_WAIT_WAL,
 	WAIT_EVENT_WAL_SENDER_WRITE_DATA
 } WaitEventClient;
-- 
2.7.4

>From 7fdb56566c69fd22c21a863a13d5d3ad35604729 Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Sun, 22 Jan 2017 23:16:57 +0100
Subject: [PATCH 2/3] Fix after trigger execution in logical replication

---
 src/backend/replication/logical/worker.c   |  15 
 src/test/subscription/t/003_constraints.pl | 113 +
 2 files changed, 128 insertions(+)
 create mode 100644 src/test/subscription/t/003_constraints.pl

diff --git a/src/backend/replication/logical/worker.c 

Re: [HACKERS] Logical Replication WIP

2017-01-26 Thread Petr Jelinek
On 25/01/17 18:16, Peter Eisentraut wrote:
> On 1/22/17 8:11 PM, Petr Jelinek wrote:
>> 0001 - Changes the libpqrcv_connect to use async libpq api so that it
>> won't get stuck forever in case of connect is stuck. This is preexisting
>> bug that also affects walreceiver but it's less visible there as there
>> is no SQL interface to initiate connection there.
> 
> Probably a mistake here:
> 
> +   case PGRES_POLLING_READING:
> +   extra_flag = WL_SOCKET_READABLE;
> +   /* pass through */
> +   case PGRES_POLLING_WRITING:
> +   extra_flag = WL_SOCKET_WRITEABLE;
> 
> extra_flag gets overwritten in the reading case.
> 

Eh, reworked that to just if statement as switch does not really buy us
anything there.

> Please elaborate in the commit message what this change is for.
> 

Okay.

>> 0002 - Close replication connection when CREATE SUBSCRIPTION gets
>> canceled (otherwise walsender on the other side may stay in idle in
>> transaction state).
> 
> committed

Thanks!

> 
>> 0003 - Fixes buffer initialization in walsender that I found when
>> testing the above two. This one should be back-patched to 9.4 since it's
>> broken since then.
> 
> Can you explain more in which code path this problem occurs?

With existing code base, anything that calls WalSndWaitForWal (it calls
ProcessRepliesIfAny()) which is called from logical_read_xlog_page which
is given as callback to logical decoding in CreateReplicationSlot and
StartLogicalReplication.

The reason why I decided to put it into init is that following up all
the paths to where buffers are used is rather complicated due to various
callbacks so if anybody else starts poking around in the future it might
get easily broken again if we don't initialize those unconditionally
(plus the memory footprint is few kB and in usual use of WalSender they
will eventually be initialized anyway as they are needed for streaming).

> I think we should get rid of the global variables and give each function
> its own buffer that it initializes the first time through.  Otherwise
> we'll keep having to worry about this.
> 

Because of above, it would mean some refactoring in logical decoding
APIs not just in WalSender so that would not be backpatchable (and in
general it's much bigger patch then).

>> 0004 - Fixes the foreign key issue reported by Thom Brown and also adds
>> tests for FK and trigger handling.
> 
> I think the trigger handing should go into execReplication.c.
> 

Not in the current state, eventually (and I am afraid that PG11 material
at this point as we still have partitioned tables support and initial
data copy to finish in this release) we'll want to move all the executor
state code to execReplication.c and do less of reinitialization but in
the current code the trigger stuff belongs to worker IMHO.

>> 0005 - Adds support for renaming publications and subscriptions.
> 
> Could those not be handled in the generic rename support in
> ExecRenameStmt()?

Yes seems they can.

Attached updated version of the uncommitted patches.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From 980ce872862e7a9abcbec14864721103507b5136 Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Fri, 20 Jan 2017 21:20:56 +0100
Subject: [PATCH 1/4] Use asynchronous connect API in libpqwalreceiver

This makes the connection attempt from CREATE SUBSCRIPTION and from
WalReceiver interruptable by user in case the libpq connection is
hanging. The previous coding required immediate shutdown (SIGQUIT) of
PostgreSQL in that situation.
---
 src/backend/postmaster/pgstat.c|  4 +-
 .../libpqwalreceiver/libpqwalreceiver.c| 51 +-
 src/include/pgstat.h   |  2 +-
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7176cf1..19ad6b5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3340,8 +3340,8 @@ pgstat_get_wait_client(WaitEventClient w)
 		case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
 			event_name = "WalReceiverWaitStart";
 			break;
-		case WAIT_EVENT_LIBPQWALRECEIVER_READ:
-			event_name = "LibPQWalReceiverRead";
+		case WAIT_EVENT_LIBPQWALRECEIVER:
+			event_name = "LibPQWalReceiver";
 			break;
 		case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
 			event_name = "WalSenderWaitForWAL";
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 44a89c7..536324c 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -112,6 +112,7 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
  char **err)
 {
 	WalReceiverConn 

Re: [HACKERS] Logical Replication WIP

2017-01-25 Thread Peter Eisentraut
On 1/23/17 11:19 AM, Fujii Masao wrote:
> The copyright in each file that the commit of logical rep added needs to
> be updated.

I have fixed that.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-25 Thread Peter Eisentraut
On 1/22/17 8:11 PM, Petr Jelinek wrote:
> 0001 - Changes the libpqrcv_connect to use async libpq api so that it
> won't get stuck forever in case of connect is stuck. This is preexisting
> bug that also affects walreceiver but it's less visible there as there
> is no SQL interface to initiate connection there.

Probably a mistake here:

+   case PGRES_POLLING_READING:
+   extra_flag = WL_SOCKET_READABLE;
+   /* pass through */
+   case PGRES_POLLING_WRITING:
+   extra_flag = WL_SOCKET_WRITEABLE;

extra_flag gets overwritten in the reading case.

Please elaborate in the commit message what this change is for.

> 0002 - Close replication connection when CREATE SUBSCRIPTION gets
> canceled (otherwise walsender on the other side may stay in idle in
> transaction state).

committed

> 0003 - Fixes buffer initialization in walsender that I found when
> testing the above two. This one should be back-patched to 9.4 since it's
> broken since then.

Can you explain more in which code path this problem occurs?

I think we should get rid of the global variables and give each function
its own buffer that it initializes the first time through.  Otherwise
we'll keep having to worry about this.

> 0004 - Fixes the foreign key issue reported by Thom Brown and also adds
> tests for FK and trigger handling.

I think the trigger handing should go into execReplication.c.

> 0005 - Adds support for renaming publications and subscriptions.

Could those not be handled in the generic rename support in
ExecRenameStmt()?

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-23 Thread Petr Jelinek
On 23/01/17 17:19, Fujii Masao wrote:
> On Sat, Jan 21, 2017 at 1:39 AM, Petr Jelinek
>  wrote:
>> On 20/01/17 17:33, Jaime Casanova wrote:
>>> On 20 January 2017 at 11:25, Petr Jelinek  
>>> wrote:
 On 20/01/17 17:05, Fujii Masao wrote:
> On Fri, Jan 20, 2017 at 11:08 PM, Peter Eisentraut
>  wrote:
>> On 1/19/17 5:01 PM, Petr Jelinek wrote:
>>> There were some conflicting changes committed today so I rebased the
>>> patch on top of them.
>>>
>>> Other than that nothing much has changed, I removed the separate sync
>>> commit patch, included the rename patch in the patchset and fixed the
>>> bug around pg_subscription catalog reported by Erik Rijkers.
>>
>> Committed.
>
> Sorry I've not followed the discussion about logical replication at all, 
> but
> why does logical replication launcher need to start up by default?
>

 Because running subscriptions is allowed by default. You'd need to set
 max_logical_replication_workers to 0 to disable that.

>>>
>>> surely wal_level < logical shouldn't start a logical replication
>>> launcher, and after an initdb wal_level is only replica
>>>
>>
>> Launcher is needed for subscriptions, subscriptions don't depend on
>> wal_level.
> 
> But why did you enable only subscription by default while publication is
> disabled by default (i.e., wal_level != logical)? I think that it's better to
> enable both by default OR disable both by default.
> 

That depends, the wal_level = logical by default was deemed to not be
worth the potential overhead in the thread about wal_level thread. There
is no such overhead associated with enabling subscription, one could say
that it's less work this way to setup whole thing. But I guess it's up
for a debate.

> While I was reading the logical rep code, I found that
> logicalrep_worker_launch returns *without* releasing LogicalRepWorkerLock
> when there is no unused worker slot. This seems a bug.

True, fix attached.

> 
> /* Report this after the initial starting message for consistency. */
> if (max_replication_slots == 0)
> ereport(ERROR,
> (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
> errmsg("cannot start logical replication workers when
> max_replication_slots = 0")));
> 
> logicalrep_worker_launch checks max_replication_slots as above.
> Why does it need to check that setting value in the *subscriber* side?
> Maybe I'm missing something here, but ISTM that the subscription uses
> one replication slot in *publisher* side but doesn't use in *subscriber* side.

Because replication origins are also limited by the
max_replication_slots and they are required for subscription to work (I
am not quite sure why it's the case, I guess we wanted to save GUC).

> 
> *  The apply worker may spawn additional workers (sync) for initial data
> *  synchronization of tables.
> 
> The above header comment in logical/worker.c is true?
> 

Hmm not yet, there is separate patch for it in CF, I guess it got
through the cracks while rebasing.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From 1492ef374e3de60c112fe8e09225a788aa548755 Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Mon, 23 Jan 2017 17:50:27 +0100
Subject: [PATCH] Release lock on failure to launch replication worker

---
 src/backend/replication/logical/launcher.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index d9ad66d..cb415f8 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -261,6 +261,7 @@ logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid)
 	/* Bail if not found */
 	if (worker == NULL)
 	{
+		LWLockRelease(LogicalRepWorkerLock);
 		ereport(WARNING,
 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
  errmsg("out of logical replication workers slots"),
-- 
2.7.4


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-23 Thread Fujii Masao
On Sat, Jan 21, 2017 at 1:39 AM, Petr Jelinek
 wrote:
> On 20/01/17 17:33, Jaime Casanova wrote:
>> On 20 January 2017 at 11:25, Petr Jelinek  
>> wrote:
>>> On 20/01/17 17:05, Fujii Masao wrote:
 On Fri, Jan 20, 2017 at 11:08 PM, Peter Eisentraut
  wrote:
> On 1/19/17 5:01 PM, Petr Jelinek wrote:
>> There were some conflicting changes committed today so I rebased the
>> patch on top of them.
>>
>> Other than that nothing much has changed, I removed the separate sync
>> commit patch, included the rename patch in the patchset and fixed the
>> bug around pg_subscription catalog reported by Erik Rijkers.
>
> Committed.

 Sorry I've not followed the discussion about logical replication at all, 
 but
 why does logical replication launcher need to start up by default?

>>>
>>> Because running subscriptions is allowed by default. You'd need to set
>>> max_logical_replication_workers to 0 to disable that.
>>>
>>
>> surely wal_level < logical shouldn't start a logical replication
>> launcher, and after an initdb wal_level is only replica
>>
>
> Launcher is needed for subscriptions, subscriptions don't depend on
> wal_level.

But why did you enable only subscription by default while publication is
disabled by default (i.e., wal_level != logical)? I think that it's better to
enable both by default OR disable both by default.

While I was reading the logical rep code, I found that
logicalrep_worker_launch returns *without* releasing LogicalRepWorkerLock
when there is no unused worker slot. This seems a bug.

/* Report this after the initial starting message for consistency. */
if (max_replication_slots == 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("cannot start logical replication workers when
max_replication_slots = 0")));

logicalrep_worker_launch checks max_replication_slots as above.
Why does it need to check that setting value in the *subscriber* side?
Maybe I'm missing something here, but ISTM that the subscription uses
one replication slot in *publisher* side but doesn't use in *subscriber* side.

*  The apply worker may spawn additional workers (sync) for initial data
*  synchronization of tables.

The above header comment in logical/worker.c is true?

The copyright in each file that the commit of logical rep added needs to
be updated.

Regards,

-- 
Fujii Masao


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-23 Thread Thom Brown
On 23 January 2017 at 01:11, Petr Jelinek  wrote:
> On 20/01/17 22:30, Petr Jelinek wrote:
>> Since it's not exactly straight forward to find when these need to be
>> initialized based on commands, I decided to move the initialization code
>> to exec_replication_command() since that's always called before anything
>> so that makes it much less error prone (patch 0003).
>>
>> The 0003 should be backpatched all the way to 9.4 where multiple
>> commands started using those buffers.
>>
>
> Actually there is better place, the WalSndInit().
>
> Just to make it easier for PeterE (or whichever committer picks this up)
> I attached all the logical replication followup fix/polish patches:
>
> 0001 - Changes the libpqrcv_connect to use async libpq api so that it
> won't get stuck forever in case of connect is stuck. This is preexisting
> bug that also affects walreceiver but it's less visible there as there
> is no SQL interface to initiate connection there.
>
> 0002 - Close replication connection when CREATE SUBSCRIPTION gets
> canceled (otherwise walsender on the other side may stay in idle in
> transaction state).
>
> 0003 - Fixes buffer initialization in walsender that I found when
> testing the above two. This one should be back-patched to 9.4 since it's
> broken since then.
>
> 0004 - Fixes the foreign key issue reported by Thom Brown and also adds
> tests for FK and trigger handling.

This fixes the problem for me.  Thanks.
>
> 0005 - Adds support for renaming publications and subscriptions.

Works for me.

I haven't tested the first 3.

Regards

Thom


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-22 Thread Petr Jelinek
On 20/01/17 22:30, Petr Jelinek wrote:
> Since it's not exactly straight forward to find when these need to be
> initialized based on commands, I decided to move the initialization code
> to exec_replication_command() since that's always called before anything
> so that makes it much less error prone (patch 0003).
> 
> The 0003 should be backpatched all the way to 9.4 where multiple
> commands started using those buffers.
> 

Actually there is better place, the WalSndInit().

Just to make it easier for PeterE (or whichever committer picks this up)
I attached all the logical replication followup fix/polish patches:

0001 - Changes the libpqrcv_connect to use async libpq api so that it
won't get stuck forever in case of connect is stuck. This is preexisting
bug that also affects walreceiver but it's less visible there as there
is no SQL interface to initiate connection there.

0002 - Close replication connection when CREATE SUBSCRIPTION gets
canceled (otherwise walsender on the other side may stay in idle in
transaction state).

0003 - Fixes buffer initialization in walsender that I found when
testing the above two. This one should be back-patched to 9.4 since it's
broken since then.

0004 - Fixes the foreign key issue reported by Thom Brown and also adds
tests for FK and trigger handling.

0005 - Adds support for renaming publications and subscriptions.

All rebased on top of current master (90992e0).

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From e2c30b258e97fbba51c8d0f6e12ac557cafb129a Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Fri, 20 Jan 2017 21:20:56 +0100
Subject: [PATCH 1/5] Use asynchronous connect API in libpqwalreceiver

---
 src/backend/postmaster/pgstat.c|  4 +-
 .../libpqwalreceiver/libpqwalreceiver.c| 58 +-
 src/include/pgstat.h   |  2 +-
 3 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7176cf1..19ad6b5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3340,8 +3340,8 @@ pgstat_get_wait_client(WaitEventClient w)
 		case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
 			event_name = "WalReceiverWaitStart";
 			break;
-		case WAIT_EVENT_LIBPQWALRECEIVER_READ:
-			event_name = "LibPQWalReceiverRead";
+		case WAIT_EVENT_LIBPQWALRECEIVER:
+			event_name = "LibPQWalReceiver";
 			break;
 		case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
 			event_name = "WalSenderWaitForWAL";
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7df3698..8dc51d4 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -112,6 +112,7 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
  char **err)
 {
 	WalReceiverConn *conn;
+	PostgresPollingStatusType status;
 	const char *keys[5];
 	const char *vals[5];
 	int			i = 0;
@@ -138,7 +139,60 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
 	vals[i] = NULL;
 
 	conn = palloc0(sizeof(WalReceiverConn));
-	conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
+	conn->streamConn = PQconnectStartParams(keys, vals,
+			/* expand_dbname = */ true);
+	/* Check for conn status. */
+	if (PQstatus(conn->streamConn) == CONNECTION_BAD)
+	{
+		*err = pstrdup(PQerrorMessage(conn->streamConn));
+		return NULL;
+	}
+
+	/* Poll connection. */
+	do
+	{
+		int			rc;
+		int			extra_flag;
+
+		/* Determine which function we should use for Polling. */
+		status = PQconnectPoll(conn->streamConn);
+
+		/* Next action based upon status value. */
+		switch (status)
+		{
+			case PGRES_POLLING_READING:
+extra_flag = WL_SOCKET_READABLE;
+/* pass through */
+			case PGRES_POLLING_WRITING:
+extra_flag = WL_SOCKET_WRITEABLE;
+
+ResetLatch(>procLatch);
+rc = WaitLatchOrSocket(>procLatch,
+	   WL_POSTMASTER_DEATH |
+	   WL_LATCH_SET | extra_flag,
+	   PQsocket(conn->streamConn),
+	   0,
+	   WAIT_EVENT_LIBPQWALRECEIVER);
+if (rc & WL_POSTMASTER_DEATH)
+	exit(1);
+
+/* Interrupted. */
+if (rc & WL_LATCH_SET)
+{
+	CHECK_FOR_INTERRUPTS();
+	break;
+}
+break;
+
+			/* Either can continue polling or finished one way or another. */
+			default:
+break;
+		}
+
+		/* Loop until we have OK or FAILED status. */
+	} while (status != PGRES_POLLING_OK && status != PGRES_POLLING_FAILED);
+
+	/* Check the status. */
 	if (PQstatus(conn->streamConn) != CONNECTION_OK)
 	{
 		*err = pstrdup(PQerrorMessage(conn->streamConn));
@@ -508,7 +562,7 @@ libpqrcv_PQexec(PGconn *streamConn, const char *query)
    WL_LATCH_SET,
    PQsocket(streamConn),
    0,
-	

Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Petr Jelinek
On 20/01/17 17:23, Petr Jelinek wrote:
> On 20/01/17 15:08, Peter Eisentraut wrote:
>> On 1/19/17 5:01 PM, Petr Jelinek wrote:
>>> There were some conflicting changes committed today so I rebased the
>>> patch on top of them.
>>>
>>> Other than that nothing much has changed, I removed the separate sync
>>> commit patch, included the rename patch in the patchset and fixed the
>>> bug around pg_subscription catalog reported by Erik Rijkers.
>>
>> Committed.  I haven't reviewed the rename patch yet, so I'll get back to
>> that later.
>>
> 
> Hi,
> 
> Thanks!
> 
> Here is fix for the dependency mess.
> 

Álvaro pointed out off list couple of issues with how we handle
interruption of commands that connect to walsender.

a) The libpqwalreceiver.c does blocking connect so it's impossible to
cancel CREATE SUBSCRIPTION which is stuck on connect. This is btw
preexisting problem and applies to walreceiver as well. I rewrote the
connect function to use asynchronous API (patch 0001).

b) We can cancel in middle of the command (when stuck in
libpqrcv_PQexec) but the connection to walsender stays open which in
case we are waiting for snapshot can mean that it will stay idle in
transaction. I added PG_TRY wrapper which disconnects on error around
this (patch 0002).

And finally, while testing these two I found bug in walsender StringInfo
initialization (or lack there of). There are 3 static StringInfo buffers
that are initialized in WalSndLoop. Problem with that is that they can
be in some rare scenarios used from CreateReplicationSlot (and IMHO
StartLogicalReplication) before WalSndLoop is called which causes
segfault of walsender. This is rare because it only happens when
downstream closes connection during logical decoding initialization.

Since it's not exactly straight forward to find when these need to be
initialized based on commands, I decided to move the initialization code
to exec_replication_command() since that's always called before anything
so that makes it much less error prone (patch 0003).

The 0003 should be backpatched all the way to 9.4 where multiple
commands started using those buffers.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From 0c18831557c963bdd4d03ef5d8f3c4ace1a51d0e Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Fri, 20 Jan 2017 21:20:56 +0100
Subject: [PATCH 1/3] Use asynchronous connect API in libpqwalreceiver

---
 src/backend/postmaster/pgstat.c|  4 +-
 .../libpqwalreceiver/libpqwalreceiver.c| 58 +-
 src/include/pgstat.h   |  2 +-
 3 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7176cf1..19ad6b5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3340,8 +3340,8 @@ pgstat_get_wait_client(WaitEventClient w)
 		case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
 			event_name = "WalReceiverWaitStart";
 			break;
-		case WAIT_EVENT_LIBPQWALRECEIVER_READ:
-			event_name = "LibPQWalReceiverRead";
+		case WAIT_EVENT_LIBPQWALRECEIVER:
+			event_name = "LibPQWalReceiver";
 			break;
 		case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
 			event_name = "WalSenderWaitForWAL";
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7df3698..8dc51d4 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -112,6 +112,7 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
  char **err)
 {
 	WalReceiverConn *conn;
+	PostgresPollingStatusType status;
 	const char *keys[5];
 	const char *vals[5];
 	int			i = 0;
@@ -138,7 +139,60 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
 	vals[i] = NULL;
 
 	conn = palloc0(sizeof(WalReceiverConn));
-	conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
+	conn->streamConn = PQconnectStartParams(keys, vals,
+			/* expand_dbname = */ true);
+	/* Check for conn status. */
+	if (PQstatus(conn->streamConn) == CONNECTION_BAD)
+	{
+		*err = pstrdup(PQerrorMessage(conn->streamConn));
+		return NULL;
+	}
+
+	/* Poll connection. */
+	do
+	{
+		int			rc;
+		int			extra_flag;
+
+		/* Determine which function we should use for Polling. */
+		status = PQconnectPoll(conn->streamConn);
+
+		/* Next action based upon status value. */
+		switch (status)
+		{
+			case PGRES_POLLING_READING:
+extra_flag = WL_SOCKET_READABLE;
+/* pass through */
+			case PGRES_POLLING_WRITING:
+extra_flag = WL_SOCKET_WRITEABLE;
+
+ResetLatch(>procLatch);
+rc = WaitLatchOrSocket(>procLatch,
+	   WL_POSTMASTER_DEATH |
+	   WL_LATCH_SET | extra_flag,
+	   PQsocket(conn->streamConn),
+	   0,
+	   

Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Robert Haas
On Fri, Jan 20, 2017 at 2:57 PM, Craig Ringer  wrote:
> > I don't see how a subscription can do anything useful with wal_level <
> > logical?
>
> The upstream must have it set to logical so we can decide the change stream.
>
> The downstream need not. It's an independent instance.

/me facepalms.

Thanks.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Craig Ringer
On 21 Jan. 2017 06:48, "Robert Haas"  wrote:

On Fri, Jan 20, 2017 at 11:39 AM, Petr Jelinek
 wrote:
> Launcher is needed for subscriptions, subscriptions don't depend on
> wal_level.

I don't see how a subscription can do anything useful with wal_level <
logical?


The upstream must have it set to logical so we can decide the change stream.

The downstream need not. It's an independent instance.


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Robert Haas
On Fri, Jan 20, 2017 at 11:39 AM, Petr Jelinek
 wrote:
> Launcher is needed for subscriptions, subscriptions don't depend on
> wal_level.

I don't see how a subscription can do anything useful with wal_level < logical?

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Jaime Casanova
On 20 January 2017 at 11:39, Petr Jelinek  wrote:
> On 20/01/17 17:33, Jaime Casanova wrote:
>>>
>>
>> surely wal_level < logical shouldn't start a logical replication
>> launcher, and after an initdb wal_level is only replica
>>
>
> Launcher is needed for subscriptions, subscriptions don't depend on
> wal_level.
>

mmm... ok, i need to read a little then. thanks

-- 
Jaime Casanova  www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Petr Jelinek
On 20/01/17 17:33, Jaime Casanova wrote:
> On 20 January 2017 at 11:25, Petr Jelinek  
> wrote:
>> On 20/01/17 17:05, Fujii Masao wrote:
>>> On Fri, Jan 20, 2017 at 11:08 PM, Peter Eisentraut
>>>  wrote:
 On 1/19/17 5:01 PM, Petr Jelinek wrote:
> There were some conflicting changes committed today so I rebased the
> patch on top of them.
>
> Other than that nothing much has changed, I removed the separate sync
> commit patch, included the rename patch in the patchset and fixed the
> bug around pg_subscription catalog reported by Erik Rijkers.

 Committed.
>>>
>>> Sorry I've not followed the discussion about logical replication at all, but
>>> why does logical replication launcher need to start up by default?
>>>
>>
>> Because running subscriptions is allowed by default. You'd need to set
>> max_logical_replication_workers to 0 to disable that.
>>
> 
> surely wal_level < logical shouldn't start a logical replication
> launcher, and after an initdb wal_level is only replica
> 

Launcher is needed for subscriptions, subscriptions don't depend on
wal_level.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Jaime Casanova
On 20 January 2017 at 11:25, Petr Jelinek  wrote:
> On 20/01/17 17:05, Fujii Masao wrote:
>> On Fri, Jan 20, 2017 at 11:08 PM, Peter Eisentraut
>>  wrote:
>>> On 1/19/17 5:01 PM, Petr Jelinek wrote:
 There were some conflicting changes committed today so I rebased the
 patch on top of them.

 Other than that nothing much has changed, I removed the separate sync
 commit patch, included the rename patch in the patchset and fixed the
 bug around pg_subscription catalog reported by Erik Rijkers.
>>>
>>> Committed.
>>
>> Sorry I've not followed the discussion about logical replication at all, but
>> why does logical replication launcher need to start up by default?
>>
>
> Because running subscriptions is allowed by default. You'd need to set
> max_logical_replication_workers to 0 to disable that.
>

surely wal_level < logical shouldn't start a logical replication
launcher, and after an initdb wal_level is only replica


-- 
Jaime Casanova  www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Petr Jelinek
On 20/01/17 17:05, Fujii Masao wrote:
> On Fri, Jan 20, 2017 at 11:08 PM, Peter Eisentraut
>  wrote:
>> On 1/19/17 5:01 PM, Petr Jelinek wrote:
>>> There were some conflicting changes committed today so I rebased the
>>> patch on top of them.
>>>
>>> Other than that nothing much has changed, I removed the separate sync
>>> commit patch, included the rename patch in the patchset and fixed the
>>> bug around pg_subscription catalog reported by Erik Rijkers.
>>
>> Committed.
> 
> Sorry I've not followed the discussion about logical replication at all, but
> why does logical replication launcher need to start up by default?
> 

Because running subscriptions is allowed by default. You'd need to set
max_logical_replication_workers to 0 to disable that.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Petr Jelinek
On 20/01/17 15:08, Peter Eisentraut wrote:
> On 1/19/17 5:01 PM, Petr Jelinek wrote:
>> There were some conflicting changes committed today so I rebased the
>> patch on top of them.
>>
>> Other than that nothing much has changed, I removed the separate sync
>> commit patch, included the rename patch in the patchset and fixed the
>> bug around pg_subscription catalog reported by Erik Rijkers.
> 
> Committed.  I haven't reviewed the rename patch yet, so I'll get back to
> that later.
> 

Hi,

Thanks!

Here is fix for the dependency mess.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 21e523d..9791f43 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -236,6 +236,8 @@ CreatePublication(CreatePublicationStmt *stmt)
 
 	heap_close(rel, RowExclusiveLock);
 
+	recordDependencyOnOwner(PublicationRelationId, puboid, GetUserId());
+
 	InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0);
 
 	return myself;
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 1448ee3..00f2a5f 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -313,6 +313,8 @@ CreateSubscription(CreateSubscriptionStmt *stmt)
 
 	ObjectAddressSet(myself, SubscriptionRelationId, subid);
 
+	recordDependencyOnOwner(SubscriptionRelationId, subid, GetUserId());
+
 	InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0);
 
 	return myself;
@@ -493,6 +495,10 @@ DropSubscription(DropSubscriptionStmt *stmt)
 
 	ReleaseSysCache(tup);
 
+	/* Clean up the depenencies. */
+	deleteSharedDependencyRecordsFor(SubscriptionRelationId, subid,
+	 InvalidOid);
+
 	/* Protect against launcher restarting the worker. */
 	LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE);
 

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Fujii Masao
On Fri, Jan 20, 2017 at 11:08 PM, Peter Eisentraut
 wrote:
> On 1/19/17 5:01 PM, Petr Jelinek wrote:
>> There were some conflicting changes committed today so I rebased the
>> patch on top of them.
>>
>> Other than that nothing much has changed, I removed the separate sync
>> commit patch, included the rename patch in the patchset and fixed the
>> bug around pg_subscription catalog reported by Erik Rijkers.
>
> Committed.

Sorry I've not followed the discussion about logical replication at all, but
why does logical replication launcher need to start up by default?

$ initdb -D data
$ pg_ctl -D data start

When I ran the above commands, I got the following message and
found that the bgworker for logical replicatdion launcher was running.

LOG:  logical replication launcher started

Regards,

-- 
Fujii Masao


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-20 Thread Peter Eisentraut
On 1/19/17 5:01 PM, Petr Jelinek wrote:
> There were some conflicting changes committed today so I rebased the
> patch on top of them.
> 
> Other than that nothing much has changed, I removed the separate sync
> commit patch, included the rename patch in the patchset and fixed the
> bug around pg_subscription catalog reported by Erik Rijkers.

Committed.  I haven't reviewed the rename patch yet, so I'll get back to
that later.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP - FailedAssertion, File: "array_typanalyze.c", Line: 340

2017-01-19 Thread Erik Rijkers

On 2017-01-19 19:12, Petr Jelinek wrote:

On 19/01/17 18:44, Erik Rijkers wrote:


Could probably be whittled down to something shorter but I hope it's
still easily reproduced.



Just analyze on the pg_subscription is enough.



heh. Ah well, I did find it :)


Can you give the current patch set? I am failing to get a compilable 
set.


In the following order they apply, but then fail during compile.

0001-Add-PUBLICATION-catalogs-and-DDL-v18.patch
0002-Add-SUBSCRIPTION-catalog-and-DDL-v18.patch
0003-Define-logical-replication-protocol-and-output-plugi-v18.patch
0004-Add-logical-replication-workers-v18fixed.patch
0006-Add-RENAME-support-for-PUBLICATIONs-and-SUBSCRIPTION.patch
0001-Logical-replication-support-for-initial-data-copy-v3.patch
pg_subscription-analyze-fix.diff

The compile fails with:

In file included from ../../../../src/include/postgres.h:47:0,
 from worker.c:27:
worker.c: In function ‘create_estate_for_relation’:
../../../../src/include/c.h:203:14: warning: passing argument 4 of 
‘InitResultRelInfo’ makes pointer from integer without a cast 
[-Wint-conversion]

 #define true ((bool) 1)
  ^
worker.c:187:53: note: in expansion of macro ‘true’
  InitResultRelInfo(resultRelInfo, rel->localrel, 1, true, NULL, 0);
 ^~~~
In file included from ../../../../src/include/funcapi.h:21:0,
 from worker.c:31:
../../../../src/include/executor/executor.h:189:13: note: expected 
‘Relation {aka struct RelationData *}’ but argument is of type ‘char’

 extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
 ^
worker.c:187:59: warning: passing argument 5 of ‘InitResultRelInfo’ 
makes integer from pointer without a cast [-Wint-conversion]

  InitResultRelInfo(resultRelInfo, rel->localrel, 1, true, NULL, 0);
   ^~~~
In file included from ../../../../src/include/funcapi.h:21:0,
 from worker.c:31:
../../../../src/include/executor/executor.h:189:13: note: expected ‘int’ 
but argument is of type ‘void *’

 extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
 ^
worker.c:187:2: error: too many arguments to function 
‘InitResultRelInfo’

  InitResultRelInfo(resultRelInfo, rel->localrel, 1, true, NULL, 0);
  ^
In file included from ../../../../src/include/funcapi.h:21:0,
 from worker.c:31:
../../../../src/include/executor/executor.h:189:13: note: declared here
 extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
 ^
make[4]: *** [worker.o] Error 1
make[4]: *** Waiting for unfinished jobs
make[3]: *** [logical-recursive] Error 2
make[2]: *** [replication-recursive] Error 2
make[2]: *** Waiting for unfinished jobs
^[make[1]: *** [all-backend-recurse] Error 2
make: *** [all-src-recurse] Error 2




but perhaps that patchset itself is incorrect, or the order in which I 
applied them.


Can you please put them in the right order?  (I tried already a few...)


thanks,


Erik Rijkers



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP - FailedAssertion, File: "array_typanalyze.c", Line: 340

2017-01-19 Thread Petr Jelinek
On 19/01/17 18:44, Erik Rijkers wrote:
> 
> Could probably be whittled down to something shorter but I hope it's
> still easily reproduced.
> 

Just analyze on the pg_subscription is enough. Looks like it's the
name[] type, when I change it to text[] like in the attached patch it
works fine for me.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h
index ccb8880..0ad7b0e 100644
--- a/src/include/catalog/pg_subscription.h
+++ b/src/include/catalog/pg_subscription.h
@@ -41,7 +41,7 @@ CATALOG(pg_subscription,6100) BKI_SHARED_RELATION BKI_ROWTYPE_OID(6101) BKI_SCHE
 	text		subconninfo;	/* Connection string to the publisher */
 	NameData	subslotname;	/* Slot name on publisher */
 
-	name		subpublications[1];	/* List of publications subscribed to */
+	text		subpublications[1];	/* List of publications subscribed to */
 #endif
 } FormData_pg_subscription;
 

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP - FailedAssertion, File: "array_typanalyze.c", Line: 340

2017-01-19 Thread Erik Rijkers

On 2017-01-19 01:02, Petr Jelinek wrote:

This causes the replica to crash:

#--
#!/bin/bash

# 2 instances on 6972 (master) and 6973 (replica)
# initially without publication or subscription

# clean logs
#echo > 
/var/data1/pg_stuff/pg_installations/pgsql.logical_replication/logfile.logical_replication
#echo > 
/var/data1/pg_stuff/pg_installations/pgsql.logical_replication2/logfile.logical_replication2


SLEEP=1
bail=0
pub_count=$( echo "select count(*) from pg_publication" | psql -qtAXp 
6972 )

if  [[ $pub_count -ne 0 ]]
then
  echo "pub_count -ne 0 - deleting pub1 & bailing out"
  echo "drop publication if exists pub1" | psql -Xp 6972
  bail=1
fi
sub_count=$( echo "select count(*) from pg_subscription" | psql -qtAXp 
6973 )

if [[ $sub_count -ne 0 ]]
 then
  echo "sub_count -ne 0 - deleting sub1 & bailing out"
  echo "drop subscription if exists sub1" | psql -Xp 6973
  bail=1
fi

if [[ $bail -eq 1 ]]
then
   exit -1
fi

echo "drop table if exists testt;" | psql -qXap 6972
echo "drop table if exists testt;" | psql -qXap 6973

echo "-- on master  (port 6972):
create table testt(id serial primary key, n integer, c text);
create publication pub1 for all tables; " | psql -qXap 6972

echo "-- on replica (port 6973):
create table testt(id serial primary key, n integer, c text);
create subscription sub1 connection 'port=6972' publication pub1 with 
(disabled);

alter  subscription sub1 enable; "| psql -qXap 6973

sleep $SLEEP

echo "table testt /*limit 3*/; select current_setting('port'), count(*) 
from testt;" | psql -qXp 6972
echo "table testt /*limit 3*/; select current_setting('port'), count(*) 
from testt;" | psql -qXp 6973


echo "-- now crash:
analyze pg_subscription" | psql -qXp 6973
#--



-- log of the replica:

2017-01-19 17:54:09.163 CET 224200 LOG:  starting logical replication 
worker for subscription "sub1"
2017-01-19 17:54:09.166 CET 21166 LOG:  logical replication apply for 
subscription sub1 started
2017-01-19 17:54:09.169 CET 21166 LOG:  starting logical replication 
worker for subscription "sub1"
2017-01-19 17:54:09.172 CET 21171 LOG:  logical replication sync for 
subscription sub1, table testt started
2017-01-19 17:54:09.190 CET 21171 LOG:  logical replication 
synchronization worker finished processing
TRAP: FailedAssertion("!(((array)->elemtype) == extra_data->type_id)", 
File: "array_typanalyze.c", Line: 340)
2017-01-19 17:54:20.110 CET 224190 LOG:  server process (PID 21183) was 
terminated by signal 6: Aborted
2017-01-19 17:54:20.110 CET 224190 DETAIL:  Failed process was running: 
autovacuum: ANALYZE pg_catalog.pg_subscription
2017-01-19 17:54:20.110 CET 224190 LOG:  terminating any other active 
server processes
2017-01-19 17:54:20.110 CET 224198 WARNING:  terminating connection 
because of crash of another server process
2017-01-19 17:54:20.110 CET 224198 DETAIL:  The postmaster has commanded 
this server process to roll back the current transaction and exit, 
because another server process exited abnormally and possibly corrupted 
shared memory.
2017-01-19 17:54:20.110 CET 224198 HINT:  In a moment you should be able 
to reconnect to the database and repeat your command.
2017-01-19 17:54:20.111 CET 224190 LOG:  all server processes 
terminated; reinitializing
2017-01-19 17:54:20.143 CET 21184 LOG:  database system was interrupted; 
last known up at 2017-01-19 17:38:48 CET
2017-01-19 17:54:20.179 CET 21184 LOG:  recovered replication state of 
node 1 to 0/2CEBF08
2017-01-19 17:54:20.179 CET 21184 LOG:  database system was not properly 
shut down; automatic recovery in progress

2017-01-19 17:54:20.181 CET 21184 LOG:  redo starts at 0/2513E88
2017-01-19 17:54:20.184 CET 21184 LOG:  invalid record length at 
0/2546980: wanted 24, got 0

2017-01-19 17:54:20.184 CET 21184 LOG:  redo done at 0/2546918
2017-01-19 17:54:20.184 CET 21184 LOG:  last completed transaction was 
at log time 2017-01-19 17:54:09.191697+01
2017-01-19 17:54:20.191 CET 21184 LOG:  MultiXact member wraparound 
protections are now enabled
2017-01-19 17:54:20.193 CET 224190 LOG:  database system is ready to 
accept connections

2017-01-19 17:54:20.193 CET 21188 LOG:  autovacuum launcher started
2017-01-19 17:54:20.194 CET 21190 LOG:  logical replication launcher 
started
2017-01-19 17:54:20.194 CET 21190 LOG:  starting logical replication 
worker for subscription "sub1"
2017-01-19 17:54:20.202 CET 21191 LOG:  logical replication apply for 
subscription sub1 started




Could probably be whittled down to something shorter but I hope it's 
still easily reproduced.



thanks,

Erik Rijkers





setup of the 2 instances:


# ./instances.sh
#!/bin/bash
port1=6972
port2=6973
project1=logical_replication
project2=logical_replication2
# pg_stuff_dir=$HOME/pg_stuff
  pg_stuff_dir=/var/data1/pg_stuff
PATH1=$pg_stuff_dir/pg_installations/pgsql.$project1/bin:$PATH
PATH2=$pg_stuff_dir/pg_installations/pgsql.$project2/bin:$PATH

Re: [HACKERS] Logical Replication WIP

2017-01-18 Thread Petr Jelinek
On 17/01/17 22:43, Robert Haas wrote:
> On Tue, Jan 17, 2017 at 11:15 AM, Petr Jelinek
>  wrote:
>>> Is there anything stopping anyone from implementing it?
>>
>> No, just didn't seem priority for the functionality right now.
> 
> Why is it OK for this to not support rename like everything else does?
>  It shouldn't be more than a few hours of work to fix that, and I
> think leaving stuff like that out just because it's a lower priority
> is fairly short-sighted.
> 

Sigh, I wanted to leave it for next CF, but since you insist. Here is a
patch that adds rename.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From 3ea2e0027bdeab5d6877ac7c18c28e12c7406b76 Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Thu, 19 Jan 2017 00:59:01 +0100
Subject: [PATCH 6/6] Add RENAME support for PUBLICATIONs and SUBSCRIPTIONs

---
 src/backend/commands/alter.c   |  6 
 src/backend/commands/publicationcmds.c | 50 ++
 src/backend/commands/subscriptioncmds.c| 50 ++
 src/backend/parser/gram.y  | 18 +++
 src/backend/replication/logical/worker.c   | 16 +-
 src/include/commands/publicationcmds.h |  2 ++
 src/include/commands/subscriptioncmds.h|  2 ++
 src/test/regress/expected/publication.out  | 10 +-
 src/test/regress/expected/subscription.out | 10 +-
 src/test/regress/sql/publication.sql   |  6 +++-
 src/test/regress/sql/subscription.sql  |  6 +++-
 src/test/subscription/t/001_rep_changes.pl | 11 ++-
 12 files changed, 181 insertions(+), 6 deletions(-)

diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c
index 768fcc8..1a4154c 100644
--- a/src/backend/commands/alter.c
+++ b/src/backend/commands/alter.c
@@ -351,6 +351,12 @@ ExecRenameStmt(RenameStmt *stmt)
case OBJECT_TYPE:
return RenameType(stmt);
 
+   case OBJECT_PUBLICATION:
+   return RenamePublication(stmt);
+
+   case OBJECT_SUBSCRIPTION:
+   return RenameSubscription(stmt);
+
case OBJECT_AGGREGATE:
case OBJECT_COLLATION:
case OBJECT_CONVERSION:
diff --git a/src/backend/commands/publicationcmds.c 
b/src/backend/commands/publicationcmds.c
index 21e523d..8bc4e02 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -752,3 +752,53 @@ AlterPublicationOwner_oid(Oid subid, Oid newOwnerId)
 
heap_close(rel, RowExclusiveLock);
 }
+
+/*
+ * Rename the publication.
+ */
+ObjectAddress
+RenamePublication(RenameStmt *stmt)
+{
+   Oid pubid;
+   HeapTuple   tup;
+   Relationrel;
+   ObjectAddress address;
+
+   rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+   tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+ 
CStringGetDatum(stmt->subname));
+
+   if (!HeapTupleIsValid(tup))
+   ereport(ERROR,
+   (errcode(ERRCODE_UNDEFINED_OBJECT),
+errmsg("publication \"%s\" does not exist", 
stmt->subname)));
+
+   /* make sure the new name doesn't exist */
+   if (OidIsValid(get_publication_oid(stmt->newname, true)))
+   ereport(ERROR,
+   (errcode(ERRCODE_DUPLICATE_SCHEMA),
+errmsg("publication \"%s\" already exists", 
stmt->newname)));
+
+   pubid = HeapTupleGetOid(tup);
+
+   /* Must be owner. */
+   if (!pg_publication_ownercheck(pubid, GetUserId()))
+   aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+  stmt->subname);
+
+   /* rename */
+   namestrcpy(&(((Form_pg_publication) GETSTRUCT(tup))->pubname),
+  stmt->newname);
+   simple_heap_update(rel, >t_self, tup);
+   CatalogUpdateIndexes(rel, tup);
+
+   InvokeObjectPostAlterHook(PublicationRelationId, pubid, 0);
+
+   ObjectAddressSet(address, PublicationRelationId, pubid);
+
+   heap_close(rel, NoLock);
+   heap_freetuple(tup);
+
+   return address;
+}
diff --git a/src/backend/commands/subscriptioncmds.c 
b/src/backend/commands/subscriptioncmds.c
index 1448ee3..56b254e 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -641,3 +641,53 @@ AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId)
 
heap_close(rel, RowExclusiveLock);
 }
+
+/*
+ * Rename the subscription.
+ */
+ObjectAddress
+RenameSubscription(RenameStmt *stmt)
+{
+   Oid subid;
+   HeapTuple   tup;
+   Relationrel;
+   ObjectAddress address;
+
+   rel = 

Re: [HACKERS] Logical Replication WIP

2017-01-17 Thread Robert Haas
On Tue, Jan 17, 2017 at 11:15 AM, Petr Jelinek
 wrote:
>> Is there anything stopping anyone from implementing it?
>
> No, just didn't seem priority for the functionality right now.

Why is it OK for this to not support rename like everything else does?
 It shouldn't be more than a few hours of work to fix that, and I
think leaving stuff like that out just because it's a lower priority
is fairly short-sighted.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-17 Thread Petr Jelinek
On 17/01/17 17:11, Peter Eisentraut wrote:
> On 1/15/17 1:48 PM, Petr Jelinek wrote:
>> It's meant to decouple the synchronous commit setting for logical
>> replication workers from the one set for normal clients. Now that we
>> have owners for subscription and subscription runs as that owner, maybe
>> we could do that via ALTER USER.
> 
> I was thinking about that as well.
> 
>> However I think the apply should by
>> default run with sync commit turned off as the performance benefits are
>> important there given that there is one worker that has to replicate in
>> serialized manner and the success of replication is not confirmed by
>> responding to COMMIT but by reporting LSNs of various replication stages.
> 
> Hmm, I don't think we should ship with an "unsafe" default.  Do we have
> any measurements of the performance impact?
> 

I will have to do some for the patch specifically, I only have ones for
pglogical/bdr where it's quite significant.

The default is not unsafe really, we still report correct flush position
to the publisher. The synchronous replication on publisher will still
work even if synchronous standby is subscription which itself has sync
commit off (that's why the complicated
send_feedback()/get_flush_position()) but will have higher latency as
flushes don't happen immediately. Cascading should be fine as well even
around crashes as logical decoding only picks up flushed WAL.

It could be however argued there may be some consistency issues around
the crash as other transactions could have already seen data that
disappeared after postgres recovery and then reappeared again when the
replication caught up again. That might indeed be a show stopper for the
default off.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-17 Thread Petr Jelinek
On 17/01/17 17:09, Peter Eisentraut wrote:
> 
>> Yes, that will need some discussion about corner case behaviour. For
>> example, have partitioned table 'foo' which is in publication, then you
>> have table 'bar' which is not in publication, you attach it to the
>> partitioned table 'foo', should it automatically be added to
>> publication? Then you detach it, should it then be removed from publication?
>> What if 'bar' was in publication before it was attached/detached to/from
>> 'foo'? What if 'foo' wasn't in publication but 'bar' was? Should we
>> allow ONLY syntax for partitioned table when they are being added and
>> removed?
> 
> Let's think about that in a separate thread.
> 

Agreed.

>>> reread_subscription() complains if the subscription name was changed.
>>> I don't know why that is a problem.
>>
>> Because we don't have ALTER SUBSCRIPTION RENAME currently. Maybe should
>> be Assert?
> 
> Is there anything stopping anyone from implementing it?
> 

No, just didn't seem priority for the functionality right now.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-17 Thread Peter Eisentraut
On 1/15/17 1:48 PM, Petr Jelinek wrote:
> It's meant to decouple the synchronous commit setting for logical
> replication workers from the one set for normal clients. Now that we
> have owners for subscription and subscription runs as that owner, maybe
> we could do that via ALTER USER.

I was thinking about that as well.

> However I think the apply should by
> default run with sync commit turned off as the performance benefits are
> important there given that there is one worker that has to replicate in
> serialized manner and the success of replication is not confirmed by
> responding to COMMIT but by reporting LSNs of various replication stages.

Hmm, I don't think we should ship with an "unsafe" default.  Do we have
any measurements of the performance impact?

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-17 Thread Peter Eisentraut
On 1/15/17 5:20 PM, Petr Jelinek wrote:
> Well, it's 4 because max_worker_processes is 8, I think default
> max_worker_processes should be higher than
> max_logical_replication_workers so that's why I picked 4. If we are okay
> wit bumping the max_worker_processes a bit, I am all for increasing
> max_logical_replication_workers as well.
> 
> The quick setup mentions 10 mainly for consistency with slots and wal
> senders (those IMHO should also not be 0 by default at this point...).

Those defaults have now been changed, so the "Quick setup" section could
potentially be simplified a bit.

> I did this somewhat differently, with struct that defines options and
> has different union members for physical and logical replication. What
> do you think of that?

Looks good.

>>  Not sure about
>> worker_internal.h.  Maybe rename apply.c to worker.c?
>>
> 
> Hmm I did that, seems reasonably okay. Original patch in fact had both
> worker.c and apply.c and I eventually moved the worker.c functions to
> either apply.c or launcher.c.

I'm not too worried about this.

> Yes, that will need some discussion about corner case behaviour. For
> example, have partitioned table 'foo' which is in publication, then you
> have table 'bar' which is not in publication, you attach it to the
> partitioned table 'foo', should it automatically be added to
> publication? Then you detach it, should it then be removed from publication?
> What if 'bar' was in publication before it was attached/detached to/from
> 'foo'? What if 'foo' wasn't in publication but 'bar' was? Should we
> allow ONLY syntax for partitioned table when they are being added and
> removed?

Let's think about that in a separate thread.

>> reread_subscription() complains if the subscription name was changed.
>> I don't know why that is a problem.
> 
> Because we don't have ALTER SUBSCRIPTION RENAME currently. Maybe should
> be Assert?

Is there anything stopping anyone from implementing it?


I'm happy with these patches now.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-17 Thread Peter Eisentraut
On 1/15/17 2:28 PM, Petr Jelinek wrote:
> Well the preinstalled information_schema is excluded by the
> FirstNormalObjectId filter as it's created by initdb. If user drops and
> recreates it that means it was created as user object.
> 
> My opinion is that FOR ALL TABLES should replicate all user tables (ie,
> anything that has Oid >= FirstNormalObjectId), if those are added to
> information_schema that's up to user. We also replicate user created
> tables in pg_catalog even if it's system catalog so I don't see why
> information_schema should be filtered on schema level.

Fair enough.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-15 Thread Petr Jelinek
On 15/01/17 23:57, Erik Rijkers wrote:
> On 2017-01-15 23:20, Petr Jelinek wrote:
> 
>> 0001-Add-PUBLICATION-catalogs-and-DDL-v18.patch
>> 0002-Add-SUBSCRIPTION-catalog-and-DDL-v18.patch
>> 0003-Define-logical-replication-protocol-and-output-plugi-v18.patch
>> 0004-Add-logical-replication-workers-v18.patch
>> 0005-Add-separate-synchronous-commit-control-for-logical--v18.patch
> 
> patches apply OK (to master), but I get this compile error:
> 

Ah missed that during final rebase, sorry. Here is fixed 0004 patch.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


0004-Add-logical-replication-workers-v18fixed.patch.gz
Description: application/gzip

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-15 Thread Erik Rijkers

On 2017-01-15 23:20, Petr Jelinek wrote:


0001-Add-PUBLICATION-catalogs-and-DDL-v18.patch
0002-Add-SUBSCRIPTION-catalog-and-DDL-v18.patch
0003-Define-logical-replication-protocol-and-output-plugi-v18.patch
0004-Add-logical-replication-workers-v18.patch
0005-Add-separate-synchronous-commit-control-for-logical--v18.patch


patches apply OK (to master), but I get this compile error:

execReplication.c: In function ‘ExecSimpleRelationInsert’:
execReplication.c:392:41: warning: passing argument 3 of 
‘ExecConstraints’ from incompatible pointer type 
[-Wincompatible-pointer-types]

ExecConstraints(resultRelInfo, slot, estate);
 ^~
In file included from execReplication.c:21:0:
../../../src/include/executor/executor.h:197:13: note: expected 
‘TupleTableSlot * {aka struct TupleTableSlot *}’ but argument is of type 
‘EState * {aka struct EState *}’

 extern void ExecConstraints(ResultRelInfo *resultRelInfo,
 ^~~
execReplication.c:392:4: error: too few arguments to function 
‘ExecConstraints’

ExecConstraints(resultRelInfo, slot, estate);
^~~
In file included from execReplication.c:21:0:
../../../src/include/executor/executor.h:197:13: note: declared here
 extern void ExecConstraints(ResultRelInfo *resultRelInfo,
 ^~~
execReplication.c: In function ‘ExecSimpleRelationUpdate’:
execReplication.c:451:41: warning: passing argument 3 of 
‘ExecConstraints’ from incompatible pointer type 
[-Wincompatible-pointer-types]

ExecConstraints(resultRelInfo, slot, estate);
 ^~
In file included from execReplication.c:21:0:
../../../src/include/executor/executor.h:197:13: note: expected 
‘TupleTableSlot * {aka struct TupleTableSlot *}’ but argument is of type 
‘EState * {aka struct EState *}’

 extern void ExecConstraints(ResultRelInfo *resultRelInfo,
 ^~~
execReplication.c:451:4: error: too few arguments to function 
‘ExecConstraints’

ExecConstraints(resultRelInfo, slot, estate);
^~~
In file included from execReplication.c:21:0:
../../../src/include/executor/executor.h:197:13: note: declared here
 extern void ExecConstraints(ResultRelInfo *resultRelInfo,
 ^~~
make[3]: *** [execReplication.o] Error 1
make[2]: *** [executor-recursive] Error 2
make[1]: *** [install-backend-recurse] Error 2
make: *** [install-src-recurse] Error 2



Erik Rijkers





--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-15 Thread Petr Jelinek
On 15/01/17 20:20, Euler Taveira wrote:
> On 15-01-2017 15:13, Petr Jelinek wrote:
>> I am not quite sure I agree with this. Either it's system object and we
>> don't replicate it (which I would have considered to be anything with
>> Oid < FirstNormalObjectId) or it's user made and then it should be
>> replicated. Filtering by schema name is IMHO way too fragile (what stops
>> user creating additional tables there for example).
>>
> What happens if you replicate information_schema tables? AFAICS, those
> tables are already in the subscriber database. And will it generate
> error or warning? (I'm not sure how this functionality deals with
> schemas.) Also, why do I want to replicate a information schema table?
> Their contents are static and, by default, it is already in each database.
> 
> Information schema isn't a catalog but I think it is good to exclude it
> from FOR ALL TABLES clause because the use case is almost zero. Of
> course, it should be documented. Also, if someone wants to replicate an
> information schema table, it could do it with ALTER PUBLICATION.
> 

Well the preinstalled information_schema is excluded by the
FirstNormalObjectId filter as it's created by initdb. If user drops and
recreates it that means it was created as user object.

My opinion is that FOR ALL TABLES should replicate all user tables (ie,
anything that has Oid >= FirstNormalObjectId), if those are added to
information_schema that's up to user. We also replicate user created
tables in pg_catalog even if it's system catalog so I don't see why
information_schema should be filtered on schema level.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-15 Thread Euler Taveira
On 15-01-2017 15:13, Petr Jelinek wrote:
> I am not quite sure I agree with this. Either it's system object and we
> don't replicate it (which I would have considered to be anything with
> Oid < FirstNormalObjectId) or it's user made and then it should be
> replicated. Filtering by schema name is IMHO way too fragile (what stops
> user creating additional tables there for example).
> 
What happens if you replicate information_schema tables? AFAICS, those
tables are already in the subscriber database. And will it generate
error or warning? (I'm not sure how this functionality deals with
schemas.) Also, why do I want to replicate a information schema table?
Their contents are static and, by default, it is already in each database.

Information schema isn't a catalog but I think it is good to exclude it
from FOR ALL TABLES clause because the use case is almost zero. Of
course, it should be documented. Also, if someone wants to replicate an
information schema table, it could do it with ALTER PUBLICATION.


-- 
   Euler Taveira   Timbira - http://www.timbira.com.br/
   PostgreSQL: Consultoria, Desenvolvimento, Suporte 24x7 e Treinamento


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-15 Thread Petr Jelinek
On 06/01/17 21:26, Peter Eisentraut wrote:
> 0005-Add-separate-synchronous-commit-control-for-logical--v16.patch.gz
> 
> This looks a little bit hackish.  I'm not sure how this would behave
> properly when either synchronous_commit or
> logical_replication_synchronous_commit is changed at run time with a reload.
> 

Yes, I said in the initial email that this is meant for discussion and
not as final implementation. And certainly it's not required for initial
commit. Perhaps I should have started separate thread for this part.

> I'm thinking maybe this and perhaps some other WAL receiver settings
> should be properties of a subscription, like ALTER SUBSCRIPTION ...
> SET/RESET.
>

True, but we still need the GUC defaults.

> Actually, maybe I'm a bit confused what this is supposed to achieve.
> synchronous_commit has both a local and a remote meaning.  What behavior
> are the various combinations of physical and logical replication
> supposed to accomplish?
> 

It's meant to decouple the synchronous commit setting for logical
replication workers from the one set for normal clients. Now that we
have owners for subscription and subscription runs as that owner, maybe
we could do that via ALTER USER. However I think the apply should by
default run with sync commit turned off as the performance benefits are
important there given that there is one worker that has to replicate in
serialized manner and the success of replication is not confirmed by
responding to COMMIT but by reporting LSNs of various replication stages.

Perhaps the logical_replication_synchronous_commit should only be
boolean that would translate to 'off' and 'local' for the real
synchronous_commit.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-15 Thread Petr Jelinek
On 11/01/17 22:30, Peter Eisentraut wrote:
> On 1/11/17 3:35 PM, Petr Jelinek wrote:
>> On 11/01/17 18:32, Peter Eisentraut wrote:
>>> On 1/11/17 3:29 AM, Petr Jelinek wrote:
 Okay, looking into my notes, I originally did this because we did not
 allow adding tables without pkeys to publications which effectively
 prohibited FOR ALL TABLES publication from working because of
 information_schema without this. Since this is no longer the case I
 think it's safe to skip the FirstNormalObjectId check.
>>>
>>> Wouldn't that mean that FOR ALL TABLES replicates the tables from
>>> information_schema?
>>>
>>
>> Yes, as they are not catalog tables, I thought that was your point.
> 
> But we shouldn't do that.  So we need to exclude information_schema from
> "all tables" somehow.  Just probably not by OID, since that is not fixed.
> 

I am not quite sure I agree with this. Either it's system object and we
don't replicate it (which I would have considered to be anything with
Oid < FirstNormalObjectId) or it's user made and then it should be
replicated. Filtering by schema name is IMHO way too fragile (what stops
user creating additional tables there for example).

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Peter Eisentraut
On 1/11/17 3:35 PM, Petr Jelinek wrote:
> On 11/01/17 18:32, Peter Eisentraut wrote:
>> On 1/11/17 3:29 AM, Petr Jelinek wrote:
>>> Okay, looking into my notes, I originally did this because we did not
>>> allow adding tables without pkeys to publications which effectively
>>> prohibited FOR ALL TABLES publication from working because of
>>> information_schema without this. Since this is no longer the case I
>>> think it's safe to skip the FirstNormalObjectId check.
>>
>> Wouldn't that mean that FOR ALL TABLES replicates the tables from
>> information_schema?
>>
> 
> Yes, as they are not catalog tables, I thought that was your point.

But we shouldn't do that.  So we need to exclude information_schema from
"all tables" somehow.  Just probably not by OID, since that is not fixed.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Peter Eisentraut
On 1/11/17 3:35 PM, Petr Jelinek wrote:
> On 11/01/17 18:27, Peter Eisentraut wrote:
>> On 1/11/17 3:11 AM, Petr Jelinek wrote:
>>> That will not help, issue is that we consider names for origins to be
>>> unique across cluster while subscription names are per database so if
>>> there is origin per subscription (which there has to be) it will always
>>> clash if we just use the name. I already have locally changed this to
>>> pg_ naming scheme and it works fine.
>>
>> How will that make it unique across the cluster?
>>
>> Should we include the system ID from pg_control?
>>
> 
> pg_subscription is shared catalog so oids are unique.

Oh, I see what you mean by cluster now.  It's a confusing term.


-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Petr Jelinek
On 11/01/17 18:27, Peter Eisentraut wrote:
> On 1/11/17 3:11 AM, Petr Jelinek wrote:
>> That will not help, issue is that we consider names for origins to be
>> unique across cluster while subscription names are per database so if
>> there is origin per subscription (which there has to be) it will always
>> clash if we just use the name. I already have locally changed this to
>> pg_ naming scheme and it works fine.
> 
> How will that make it unique across the cluster?
> 
> Should we include the system ID from pg_control?
> 

pg_subscription is shared catalog so oids are unique.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Petr Jelinek
On 11/01/17 18:32, Peter Eisentraut wrote:
> On 1/11/17 3:29 AM, Petr Jelinek wrote:
>> Okay, looking into my notes, I originally did this because we did not
>> allow adding tables without pkeys to publications which effectively
>> prohibited FOR ALL TABLES publication from working because of
>> information_schema without this. Since this is no longer the case I
>> think it's safe to skip the FirstNormalObjectId check.
> 
> Wouldn't that mean that FOR ALL TABLES replicates the tables from
> information_schema?
> 

Yes, as they are not catalog tables, I thought that was your point.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Peter Eisentraut
On 1/11/17 3:29 AM, Petr Jelinek wrote:
> Okay, looking into my notes, I originally did this because we did not
> allow adding tables without pkeys to publications which effectively
> prohibited FOR ALL TABLES publication from working because of
> information_schema without this. Since this is no longer the case I
> think it's safe to skip the FirstNormalObjectId check.

Wouldn't that mean that FOR ALL TABLES replicates the tables from
information_schema?

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Peter Eisentraut
On 1/11/17 3:11 AM, Petr Jelinek wrote:
> That will not help, issue is that we consider names for origins to be
> unique across cluster while subscription names are per database so if
> there is origin per subscription (which there has to be) it will always
> clash if we just use the name. I already have locally changed this to
> pg_ naming scheme and it works fine.

How will that make it unique across the cluster?

Should we include the system ID from pg_control?

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Petr Jelinek
On 10/01/17 15:06, Peter Eisentraut wrote:
> On 1/3/17 5:23 PM, Petr Jelinek wrote:
>> I got this remark about IsCatalogClass() from Andres offline as well,
>> but it's not true, it only checks for FirstNormalObjectId for objects in
>> pg_catalog and toast schemas, not anywhere else.
> 
> I see your statement is correct, but I'm not sure the overall behavior
> is sensible.  Either we consider the information_schema tables to be
> catalog tables, and then IsCatalogClass() should be changed, or we
> consider then non-catalog tables, and then we should let them be in
> publications.  I don't think having a third category of
> sometimes-catalog tables is desirable.
> 
> Currently, they clearly behave like non-catalog tables, since you can
> just drop and recreate them freely, so I would choose the second option.
>  It might be worth changing that, but it doesn't have to be the job of
> this patch set.
> 

Okay, looking into my notes, I originally did this because we did not
allow adding tables without pkeys to publications which effectively
prohibited FOR ALL TABLES publication from working because of
information_schema without this. Since this is no longer the case I
think it's safe to skip the FirstNormalObjectId check.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-11 Thread Petr Jelinek
On 10/01/17 14:52, Peter Eisentraut wrote:
> On 1/2/17 8:32 AM, Petr Jelinek wrote:
>> On 02/01/17 05:23, Steve Singer wrote:
>>> but I can't drop the subscription either
>>>
>>>
>>> test_b=# drop subscription mysub;
>>> ERROR:  could not drop replication origin with OID 1, in use by PID 24996
>>>
>>>  alter subscription mysub disable;
>>> ALTER SUBSCRIPTION
>>> drop subscription mysub;
>>> ERROR:  could not drop replication origin with OID 1, in use by PID 24996
>>>
>>> drop subscription mysub nodrop slot;
>>>
>>> doesn't work either.  If I first drop the working/active subscription on
>>> the original 'test' database it works but I can't seem to drop the
>>> subscription record on test_b
> 
> I can't reproduce this exactly, but I notice that CREATE SUBSCRIPTION
> NOCREATE SLOT does not create a replication origin, but DROP
> SUBSCRIPTION NODROP SLOT does attempt to drop the origin.  If the origin
> is not in use, it will just go away, but if it is in use, it might lead
> to the situation described above, where the second subscription cannot
> be removed.

This is thinko in it's own regard, origin needs to be created regardless
of the slot.

> 
>> I guess this is because replication origins are pg instance global and
>> we use subscription name for origin name internally. Maybe we need to
>> prefix/suffix it with db oid or something like that, but that's
>> problematic a bit as well as they both have same length limit. I guess
>> we could use subscription OID as replication origin name which is
>> somewhat less user friendly in terms of debugging but would be unique.
> 
> I think the most robust way would be to associate origins to
> subscriptions using the object dependency mechanism, and just pick an
> internal name like we do for automatically created indexes or sequences,
> for example.
> 

That will not help, issue is that we consider names for origins to be
unique across cluster while subscription names are per database so if
there is origin per subscription (which there has to be) it will always
clash if we just use the name. I already have locally changed this to
pg_ naming scheme and it works fine.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-10 Thread Peter Eisentraut
On 1/3/17 5:23 PM, Petr Jelinek wrote:
> I got this remark about IsCatalogClass() from Andres offline as well,
> but it's not true, it only checks for FirstNormalObjectId for objects in
> pg_catalog and toast schemas, not anywhere else.

I see your statement is correct, but I'm not sure the overall behavior
is sensible.  Either we consider the information_schema tables to be
catalog tables, and then IsCatalogClass() should be changed, or we
consider then non-catalog tables, and then we should let them be in
publications.  I don't think having a third category of
sometimes-catalog tables is desirable.

Currently, they clearly behave like non-catalog tables, since you can
just drop and recreate them freely, so I would choose the second option.
 It might be worth changing that, but it doesn't have to be the job of
this patch set.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-10 Thread Peter Eisentraut
On 1/2/17 8:32 AM, Petr Jelinek wrote:
> On 02/01/17 05:23, Steve Singer wrote:
>> but I can't drop the subscription either
>>
>>
>> test_b=# drop subscription mysub;
>> ERROR:  could not drop replication origin with OID 1, in use by PID 24996
>>
>>  alter subscription mysub disable;
>> ALTER SUBSCRIPTION
>> drop subscription mysub;
>> ERROR:  could not drop replication origin with OID 1, in use by PID 24996
>>
>> drop subscription mysub nodrop slot;
>>
>> doesn't work either.  If I first drop the working/active subscription on
>> the original 'test' database it works but I can't seem to drop the
>> subscription record on test_b

I can't reproduce this exactly, but I notice that CREATE SUBSCRIPTION
NOCREATE SLOT does not create a replication origin, but DROP
SUBSCRIPTION NODROP SLOT does attempt to drop the origin.  If the origin
is not in use, it will just go away, but if it is in use, it might lead
to the situation described above, where the second subscription cannot
be removed.

> I guess this is because replication origins are pg instance global and
> we use subscription name for origin name internally. Maybe we need to
> prefix/suffix it with db oid or something like that, but that's
> problematic a bit as well as they both have same length limit. I guess
> we could use subscription OID as replication origin name which is
> somewhat less user friendly in terms of debugging but would be unique.

I think the most robust way would be to associate origins to
subscriptions using the object dependency mechanism, and just pick an
internal name like we do for automatically created indexes or sequences,
for example.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-06 Thread Peter Eisentraut
0005-Add-separate-synchronous-commit-control-for-logical--v16.patch.gz

This looks a little bit hackish.  I'm not sure how this would behave
properly when either synchronous_commit or
logical_replication_synchronous_commit is changed at run time with a reload.

I'm thinking maybe this and perhaps some other WAL receiver settings
should be properties of a subscription, like ALTER SUBSCRIPTION ...
SET/RESET.

Actually, maybe I'm a bit confused what this is supposed to achieve.
synchronous_commit has both a local and a remote meaning.  What behavior
are the various combinations of physical and logical replication
supposed to accomplish?

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-06 Thread Peter Eisentraut
Comments on 0004-Add-logical-replication-workers-v16.patch.gz:

I didn't find any major problems.  At times while I was testing strange
things it was not clear why "nothing is happening".  I'll do some more
checking in that direction.

Fixup patch attached that enhances some error messages, fixes some
typos, and other minor changes.  See also comments below.

---

The way check_max_logical_replication_workers() is implemented creates
potential ordering dependencies in postgresql.conf.  For example,

max_logical_replication_workers = 100
max_worker_processes = 200

fails, but if you change the order, it works.  The existing
check_max_worker_processes() has the same problem, but I suspect because
it only checks against MAX_BACKENDS, nobody has ever seriously hit that
limit.

I suggest just removing the check.  If you set
max_logical_replication_workers higher than max_worker_processes and you
hit the lower limit, then whatever is controlling max_worker_processes
should complain with its own error message.

---

The default for max_logical_replication_workers is 4, which seems very
little.  Maybe it should be more like 10 or 20.  The "Quick setup"
section recommends changing it to 10.  We should at least be
consistent there: If you set a default value that is not 0, then it
should enough that we don't need to change it again in the Quick
setup.  (Maybe the default max_worker_processes should also be
raised?)

+max_logical_replication_workers = 10 # one per subscription + one per
instance needed on subscriber

I think this is incorrect (copied from max_worker_processes?).  The
launcher does not count as one of the workers here.

On a related note, should the minimum not be 0 instead of 1?

---

About the changes to libpqrcv_startstreaming().  The timeline is not
really an option in the syntax.  Just passing in a string that is
pasted in the final command creates too much coupling, I think.  I
would keep the old timeline (TimeLineID tli) argument, and make the
options const char * [], and let startstreaming() assemble the final
string, including commas and parentheses.  It's still not a perfect
abstraction, because you need to do the quoting yourself, but much
better.  (Alternatively, get rid of the startstreaming call and just
have callers use libpqrcv_PQexec directly.)

---

Some of the header files are named inconsistently with their .c files.
I think src/include/replication/logicalworker.h should be split into
logicalapply.h and logicallauncher.h.  Not sure about
worker_internal.h.  Maybe rename apply.c to worker.c?

(I'm also not fond of throwing publicationcmds.h and
subscriptioncmds.h together into replicationcmds.h.  Maybe that could
be changed, too.)

---

Various FATAL errors in logical/relation.c when the target relation is
not in the right state.  Could those not be ERRORs?  The behavior is
the same at the moment because background workers terminate on
uncaught exceptions, but that should eventually be improved.

A FATAL error will lead to a

LOG:  unexpected EOF on standby connection

on the publisher, because the process just dies without protocol
shutdown.  (And then it reconnects and tries again.  So we might as
well not die and just retry again.)

---

In LogicalRepRelMapEntry, rename rel to localrel, so it's clearer in
the code using this struct.  (Maybe reloid -> localreloid)

---

Partitioned tables are not supported in either publications or as
replication targets.  This is expected but should be fixed before the
final release.

---

In apply.c:

The comment in apply_handle_relation() makes a point that the schema
validation is done later, but does not tell why.  The answer is
probably because it doesn't matter and it's more convenient, but it
should be explained in the comment.

See XXX comment in logicalrep_worker_stop().

The get_flush_position() return value is not intuitive from the
function name.  Maybe make that another pointer argument for clarity.

reread_subscription() complains if the subscription name was changed.
I don't know why that is a problem.

---

In launcher.c:

pg_stat_get_subscription should hold LogicalRepWorkerLock around the
whole loop, so that it doesn't get inconsistent results when workers
change during the loop.

---

In relation.c:

Inconsistent use of uint32 vs LogicalRepRelId.  Pick one. :)

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 30e7b9d70b5c6488dd74eb648c62372911096544 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Thu, 5 Jan 2017 12:00:00 -0500
Subject: [PATCH] fixup! Add logical replication workers

---
 doc/src/sgml/catalogs.sgml |  6 ++
 doc/src/sgml/ref/create_subscription.sgml  |  2 +-
 src/backend/executor/execReplication.c |  6 +-
 src/backend/replication/logical/apply.c| 88 --
 src/backend/replication/logical/launcher.c | 34 ++--
 

Re: [HACKERS] Logical Replication WIP

2017-01-04 Thread Peter Eisentraut
0003-Define-logical-replication-protocol-and-output-plugi-v16.patch.gz
looks good now, documentation is clear now.

Another fixup patch to remove excessive includes. ;-)

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 85537d88ec1f79403d9db28b0bdfede28e60e975 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Wed, 4 Jan 2017 12:00:00 -0500
Subject: [PATCH] fixup! Define logical replication protocol and output plugin

Remove unused includes
---
 src/backend/replication/logical/proto.c | 32 +
 src/backend/replication/pgoutput/pgoutput.c |  8 +---
 2 files changed, 2 insertions(+), 38 deletions(-)

diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
index 9a2a168b40..6f61357ea3 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -1,7 +1,7 @@
 /*-
  *
  * proto.c
- * 		logical replication protocol functions
+ *		logical replication protocol functions
  *
  * Copyright (c) 2015, PostgreSQL Global Development Group
  *
@@ -12,44 +12,14 @@
  */
 #include "postgres.h"
 
-#include "miscadmin.h"
-
-#include "access/htup_details.h"
-#include "access/heapam.h"
-
 #include "access/sysattr.h"
-#include "access/tuptoaster.h"
-#include "access/xact.h"
-
-#include "catalog/catversion.h"
-#include "catalog/index.h"
-
-#include "catalog/namespace.h"
-#include "catalog/pg_class.h"
-#include "catalog/pg_database.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_type.h"
-
-#include "commands/dbcommands.h"
-
-#include "executor/spi.h"
-
 #include "libpq/pqformat.h"
-
-#include "mb/pg_wchar.h"
-
-#include "nodes/makefuncs.h"
-
 #include "replication/logicalproto.h"
-#include "replication/reorderbuffer.h"
-
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
-#include "utils/memutils.h"
-#include "utils/rel.h"
 #include "utils/syscache.h"
-#include "utils/timestamp.h"
-#include "utils/typcache.h"
 
 /*
  * Protocol message flags.
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 5fc48ac312..04dde5d494 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -1,7 +1,7 @@
 /*-
  *
  * pgoutput.c
- *		  Logical Replication output plugin
+ *		Logical Replication output plugin
  *
  * Copyright (c) 2012-2015, PostgreSQL Global Development Group
  *
@@ -12,13 +12,8 @@
  */
 #include "postgres.h"
 
-#include "access/xact.h"
-
-#include "catalog/catalog.h"
 #include "catalog/pg_publication.h"
 
-#include "mb/pg_wchar.h"
-
 #include "replication/logical.h"
 #include "replication/logicalproto.h"
 #include "replication/origin.h"
@@ -27,7 +22,6 @@
 #include "utils/builtins.h"
 #include "utils/inval.h"
 #include "utils/int8.h"
-#include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
 
-- 
2.11.0


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-04 Thread Peter Eisentraut
Some small patches for 0002-Add-SUBSCRIPTION-catalog-and-DDL-v16.patch.gz:

- Add a get_subscription_name() function

- Remove call for ApplyLauncherWakeupAtCommit() (rebasing error?)

- Remove some unused include files (same as before)

- Rename pg_dump --no-create-subscription-slot to
--no-create-subscription-slots (plural), add documentation.

In CreateSubscription(), I don't think we should connect to the remote
if no slot creation is requested.  Arguably, the point of that option is
to not make network connections.  (That is what my documentation patch
above claims, in any case.)

I don't know why we need to check the PostgreSQL version number of the
remote.  We should rely on the protocol version number, and we should
just make it work.  When PG 11 comes around, subscribing from PG 10 to a
publisher on PG 11 should just work without any warnings, IMO.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 5fd71a1eb99761cca3cd53990b8302fb58e5a01e Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Wed, 4 Jan 2017 12:00:00 -0500
Subject: [PATCH 1/4] fixup! Add SUBSCRIPTION catalog and DDL

Add get_subscription_name()
---
 src/backend/catalog/objectaddress.c   | 18 +-
 src/backend/catalog/pg_subscription.c | 23 +++
 src/include/catalog/pg_subscription.h |  1 +
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index 936adfc42d..b77fbf341d 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -3341,16 +3341,8 @@ getObjectDescription(const ObjectAddress *object)
 
 		case OCLASS_SUBSCRIPTION:
 			{
-HeapTuple	tup;
-
-tup = SearchSysCache1(SUBSCRIPTIONOID,
-	  ObjectIdGetDatum(object->objectId));
-if (!HeapTupleIsValid(tup))
-	elog(ERROR, "cache lookup failed for subscription %u",
-		 object->objectId);
 appendStringInfo(, _("subscription %s"),
-   NameStr(((Form_pg_subscription) GETSTRUCT(tup))->subname));
-ReleaseSysCache(tup);
+ get_subscription_name(object->objectId));
 break;
 			}
 
@@ -4865,13 +4857,13 @@ getObjectIdentityParts(const ObjectAddress *object,
 
 		case OCLASS_SUBSCRIPTION:
 			{
-Subscription *sub;
+char	   *subname;
 
-sub = GetSubscription(object->objectId, false);
+subname = get_subscription_name(object->objectId);
 appendStringInfoString(,
-	   quote_identifier(sub->name));
+	   quote_identifier(subname));
 if (objname)
-	*objname = list_make1(pstrdup(sub->name));
+	*objname = list_make1(subname);
 break;
 			}
 
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
index 2c58cc653e..c15e9b5d74 100644
--- a/src/backend/catalog/pg_subscription.c
+++ b/src/backend/catalog/pg_subscription.c
@@ -173,6 +173,29 @@ get_subscription_oid(const char *subname, bool missing_ok)
 }
 
 /*
+ * get_subscription_oid - given a subscription OID, look up the name
+ */
+char *
+get_subscription_name(Oid subid)
+{
+	HeapTuple		tup;
+	char		   *subname;
+	Form_pg_subscription subform;
+
+	tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for subscription %u", subid);
+
+	subform = (Form_pg_subscription) GETSTRUCT(tup);
+	subname = pstrdup(NameStr(subform->subname));
+
+	ReleaseSysCache(tup);
+
+	return subname;
+}
+
+/*
  * Convert text array to list of strings.
  *
  * Note: the resulting list of strings is pallocated here.
diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h
index 855ed360fb..645541c9b4 100644
--- a/src/include/catalog/pg_subscription.h
+++ b/src/include/catalog/pg_subscription.h
@@ -70,6 +70,7 @@ typedef struct Subscription
 extern Subscription *GetSubscription(Oid subid, bool missing_ok);
 extern void FreeSubscription(Subscription *sub);
 extern Oid get_subscription_oid(const char *subname, bool missing_ok);
+extern char *get_subscription_name(Oid subid);
 
 extern int CountDBSubscriptions(Oid dbid);
 
-- 
2.11.0

From f5f5d6ae0fc627f0fbb3ca7497339c4cd77ab8b7 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Wed, 4 Jan 2017 12:00:00 -0500
Subject: [PATCH 2/4] fixup! Add SUBSCRIPTION catalog and DDL

Remove ApplyLauncherWakeupAtCommit() call for now.
---
 src/backend/commands/subscriptioncmds.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index a6679879db..9dbd6d4c59 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -359,8 +359,6 @@ CreateSubscription(CreateSubscriptionStmt *stmt)
 
 	InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0);
 
-	ApplyLauncherWakeupAtCommit();
-
 	return myself;
 }
 

Re: [HACKERS] Logical Replication WIP

2017-01-04 Thread Petr Jelinek
On 03/01/17 22:51, Peter Eisentraut wrote:
> On 1/3/17 2:39 PM, Peter Eisentraut wrote:
>> In 0001-Add-PUBLICATION-catalogs-and-DDL-v16.patch.gz,
> 
> Attached are a couple of small fixes for this.  Feel free to ignore the
> removal of the header files if they are needed by later patches.
> 

Thanks, merged, no they are not needed by other patches.

I also hopefully resolved the concerns you had about the relcache
invalidation and expanded comment in is_publishable_class to make the
intention there bit clearer.

Only attached the changed patch, the rest should still apply fine on top
of it.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


0001-Add-PUBLICATION-catalogs-and-DDL-v17.patch.gz
Description: application/gzip

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-03 Thread Petr Jelinek
On 03/01/17 20:39, Peter Eisentraut wrote:
> In 0001-Add-PUBLICATION-catalogs-and-DDL-v16.patch.gz,
> 
> +static bool
> +is_publishable_class(Oid relid, Form_pg_class reltuple)
> +{
> +   return reltuple->relkind == RELKIND_RELATION &&
> +   !IsCatalogClass(relid, reltuple) &&
> +   reltuple->relpersistence == RELPERSISTENCE_PERMANENT &&
> +   /* XXX needed to exclude information_schema tables */
> +   relid >= FirstNormalObjectId;
> +}
> 
> I don't think the XXX part is necessary, because IsCatalogClass()
> already checks for the same thing.  (The whole thing is a bit bogus
> anyway, because you can drop and recreate the information schema at run
> time without restriction.)
>

I got this remark about IsCatalogClass() from Andres offline as well,
but it's not true, it only checks for FirstNormalObjectId for objects in
pg_catalog and toast schemas, not anywhere else.

> +#define MAX_RELCACHE_INVAL_MSGS 100
> +   List*relids = GetPublicationRelations(HeapTupleGetOid(tup));
> +
> +   /*
> +* We don't want to send too many individual messages, at some point
> +* it's cheaper to just reset whole relcache.
> +*
> +* XXX: the MAX_RELCACHE_INVAL_MSGS was picked arbitrarily, maybe
> +* there is better limit.
> +*/
> +   if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS)
> 
> Do we have more data on this?  There are people running with 10
> tables, and changing a publication with a 1000 tables would blow all
> that away?
> 
> Maybe at least it should be set relative to INITRELCACHESIZE (400) to
> tie things together a bit?
> 

I am actually thinking this should correspond to MAXNUMMESSAGES (4096)
as that's the limit on buffer size. I didn't find it the first time
around when I was looking for good number.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-03 Thread Peter Eisentraut
On 1/3/17 2:39 PM, Peter Eisentraut wrote:
> In 0001-Add-PUBLICATION-catalogs-and-DDL-v16.patch.gz,

Attached are a couple of small fixes for this.  Feel free to ignore the
removal of the header files if they are needed by later patches.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From d1a509cee9e1c2f6c9c4503f19e55520d8b6617f Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Tue, 3 Jan 2017 12:00:00 -0500
Subject: [PATCH 1/5] fixup! Add PUBLICATION catalogs and DDL

Add documentation for pg_publication_tables view.
---
 doc/src/sgml/catalogs.sgml | 63 +-
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 70533405e2..ae33c56db7 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -5375,7 +5375,8 @@ pg_publication_rel
   
The catalog pg_publication_rel contains the
mapping between relations and publications in the database.  This is a
-   many-to-many mapping.
+   many-to-many mapping.  See also 
+   for a more user-friendly view of this information.
   
 
   
@@ -7729,6 +7730,11 @@ System Views
  
 
  
+  pg_publication_tables
+  publications and their associated tables
+ 
+
+ 
   pg_replication_origin_status
   information about replication origins, including replication progress
  
@@ -9010,6 +9016,61 @@ pg_prepared_xacts Columns
 
  
 
+ 
+  pg_publication_tables
+
+  
+   pg_publication_tables
+  
+
+  
+   The view pg_publication_tables provides
+   information about the mapping between publications and the tables they
+   contain.  Unlike the underlying
+   catalog pg_publication_rel, this view expands
+   publications defined as FOR ALL TABLES, so for such
+   publications there will be a row for each eligible table.
+  
+
+  
+   pg_publication_tables Columns
+
+   
+
+ 
+  Name
+  Type
+  References
+  Description
+ 
+
+
+
+ 
+  pubname
+  name
+  pg_publication.pubname
+  Name of publication
+ 
+
+ 
+  schemaname
+  name
+  pg_namespace.nspname
+  Name of schema containing table
+ 
+
+ 
+  tablename
+  name
+  pg_class.relname
+  Name of table
+ 
+
+   
+  
+ 
+
   
   pg_replication_origin_status
 
-- 
2.11.0

>From 173215232e14ae209373fd69c936fa71ae3b1758 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Tue, 3 Jan 2017 12:00:00 -0500
Subject: [PATCH 2/5] fixup! Add PUBLICATION catalogs and DDL

Add missing clauses to documentation of DROP PUBLICATION.
---
 doc/src/sgml/ref/drop_publication.sgml | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/ref/drop_publication.sgml b/doc/src/sgml/ref/drop_publication.sgml
index d05d522041..1a1be579ad 100644
--- a/doc/src/sgml/ref/drop_publication.sgml
+++ b/doc/src/sgml/ref/drop_publication.sgml
@@ -21,7 +21,7 @@
 
  
 
-DROP PUBLICATION name [, ...]
+DROP PUBLICATION [ IF EXISTS ] name [, ...] [ CASCADE | RESTRICT ]
 
  
 
@@ -43,6 +43,16 @@ Parameters
 
   

+IF EXISTS
+
+ 
+  Do not throw an error if the extension does not exist. A notice is issued
+  in this case.
+ 
+
+   
+
+   
 name
 
  
@@ -51,6 +61,17 @@ Parameters
 

 
+   
+CASCADE
+RESTRICT
+
+
+ 
+  These key words do not have any effect, since there are no dependencies
+  on publications.
+ 
+
+   
   
  
 
-- 
2.11.0

>From 1755433aa69d20b9bcd9520ac65e7ee4d42ff124 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Tue, 3 Jan 2017 12:00:00 -0500
Subject: [PATCH 3/5] fixup! Add PUBLICATION catalogs and DDL

Small tweak of tab completion.
---
 src/bin/psql/tab-complete.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 9a404dc8ae..8b75ac9f4c 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2463,7 +2463,7 @@ psql_completion(const char *text, int start, int end)
 /* DROP */
 	/* Complete DROP object with CASCADE / RESTRICT */
 	else if (Matches3("DROP",
-	  "COLLATION|CONVERSION|DOMAIN|EXTENSION|LANGUAGE|SCHEMA|SEQUENCE|SERVER|TABLE|TYPE|VIEW",
+	  "COLLATION|CONVERSION|DOMAIN|EXTENSION|LANGUAGE|PUBLICATION|SCHEMA|SEQUENCE|SERVER|TABLE|TYPE|VIEW",
 	  MatchAny) ||
 			 Matches4("DROP", "ACCESS", "METHOD", MatchAny) ||
 			 (Matches4("DROP", "AGGREGATE|FUNCTION", MatchAny, MatchAny) &&
-- 
2.11.0

>From da31845bbddc24c9b1885efddeb295f8a93f2158 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Tue, 3 Jan 2017 12:00:00 -0500
Subject: [PATCH 4/5] fixup! Add PUBLICATION catalogs and DDL

Fix typo
---
 src/backend/catalog/pg_publication.c | 4 ++--
 1 file changed, 2 

Re: [HACKERS] Logical Replication WIP

2017-01-03 Thread Peter Eisentraut
In 0001-Add-PUBLICATION-catalogs-and-DDL-v16.patch.gz,

+static bool
+is_publishable_class(Oid relid, Form_pg_class reltuple)
+{
+   return reltuple->relkind == RELKIND_RELATION &&
+   !IsCatalogClass(relid, reltuple) &&
+   reltuple->relpersistence == RELPERSISTENCE_PERMANENT &&
+   /* XXX needed to exclude information_schema tables */
+   relid >= FirstNormalObjectId;
+}

I don't think the XXX part is necessary, because IsCatalogClass()
already checks for the same thing.  (The whole thing is a bit bogus
anyway, because you can drop and recreate the information schema at run
time without restriction.)

+#define MAX_RELCACHE_INVAL_MSGS 100
+   List*relids = GetPublicationRelations(HeapTupleGetOid(tup));
+
+   /*
+* We don't want to send too many individual messages, at some point
+* it's cheaper to just reset whole relcache.
+*
+* XXX: the MAX_RELCACHE_INVAL_MSGS was picked arbitrarily, maybe
+* there is better limit.
+*/
+   if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS)

Do we have more data on this?  There are people running with 10
tables, and changing a publication with a 1000 tables would blow all
that away?

Maybe at least it should be set relative to INITRELCACHESIZE (400) to
tie things together a bit?

Update the documentation of SharedInvalCatalogMsg in sinval.h for the
"all relations" case.  (Maybe look around the whole file to make sure
comments are still valid.)

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-02 Thread Petr Jelinek
On 02/01/17 05:23, Steve Singer wrote:
> On 12/30/2016 05:53 AM, Petr Jelinek wrote:
>> Hi,
>>
>> I rebased this for the changes made to inheritance and merged in the
>> fixes that I previously sent separately.
>>
>>
>>
> 
> 
> I'm not sure if the following is expected or not
> 
> I have 1 publisher and 1 subscriber.
> I then do pg_dump on my subscriber
> ./pg_dump -h localhost --port 5441 --include-subscriptions
> --no-create-subscription-slot test|./psql --port 5441 test_b
> 
> I now can't do a drop database test_b  , which is expected
> 
> but I can't drop the subscription either
> 
> 
> test_b=# drop subscription mysub;
> ERROR:  could not drop replication origin with OID 1, in use by PID 24996
> 
>  alter subscription mysub disable;
> ALTER SUBSCRIPTION
> drop subscription mysub;
> ERROR:  could not drop replication origin with OID 1, in use by PID 24996
> 
> drop subscription mysub nodrop slot;
> 
> doesn't work either.  If I first drop the working/active subscription on
> the original 'test' database it works but I can't seem to drop the
> subscription record on test_b
> 

I guess this is because replication origins are pg instance global and
we use subscription name for origin name internally. Maybe we need to
prefix/suffix it with db oid or something like that, but that's
problematic a bit as well as they both have same length limit. I guess
we could use subscription OID as replication origin name which is
somewhat less user friendly in terms of debugging but would be unique.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2017-01-01 Thread Steve Singer

On 12/30/2016 05:53 AM, Petr Jelinek wrote:

Hi,

I rebased this for the changes made to inheritance and merged in the
fixes that I previously sent separately.






I'm not sure if the following is expected or not

I have 1 publisher and 1 subscriber.
I then do pg_dump on my subscriber
./pg_dump -h localhost --port 5441 --include-subscriptions 
--no-create-subscription-slot test|./psql --port 5441 test_b


I now can't do a drop database test_b  , which is expected

but I can't drop the subscription either


test_b=# drop subscription mysub;
ERROR:  could not drop replication origin with OID 1, in use by PID 24996

 alter subscription mysub disable;
ALTER SUBSCRIPTION
drop subscription mysub;
ERROR:  could not drop replication origin with OID 1, in use by PID 24996

drop subscription mysub nodrop slot;

doesn't work either.  If I first drop the working/active subscription on 
the original 'test' database it works but I can't seem to drop the 
subscription record on test_b






--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-30 Thread Erik Rijkers

On 2016-12-30 11:53, Petr Jelinek wrote:

I rebased this for the changes made to inheritance and merged in the



0002-Add-SUBSCRIPTION-catalog-and-DDL-v16.patch.gz (~31 KB)


couple of orthography errors in messages


--- ./src/backend/commands/subscriptioncmds.c.orig	2016-12-30 16:44:31.957298438 +0100
+++ ./src/backend/commands/subscriptioncmds.c	2016-12-30 16:47:00.848418044 +0100
@@ -585,7 +585,7 @@
 
 	if (!walrcv_command(wrconn, cmd.data, ))
 		ereport(ERROR,
-(errmsg("count not drop the replication slot \"%s\" on publisher",
+(errmsg("could not drop the replication slot \"%s\" on publisher",
 		slotname),
  errdetail("The error was: %s", err)));
 	else
@@ -623,7 +623,7 @@
 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 		  errmsg("permission denied to change owner of subscription \"%s\"",
  NameStr(form->subname)),
-			 errhint("The owner of an subscription must be a superuser.")));
+			 errhint("The owner of a subscription must be a superuser.")));
 
 	form->subowner = newOwnerId;
 	simple_heap_update(rel, >t_self, tup);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-30 Thread Erik Rijkers

On 2016-12-30 11:53, Petr Jelinek wrote:

I rebased this for the changes made to inheritance and merged in the



0002-Add-SUBSCRIPTION-catalog-and-DDL-v16.patch.gz (~31 KB)







--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-20 Thread Petr Jelinek
On 20/12/16 10:56, Erik Rijkers wrote:
> On 2016-12-20 10:48, Petr Jelinek wrote:
> 
> Here is another small thing:
> 
> $ psql -d testdb -p 6972
> psql (10devel_logical_replication_20161220_1008_db80acfc9d50)
> Type "help" for help.
> 
> testdb=# drop publication if exists xxx;
> ERROR:  unrecognized object type: 28
> 
> 
> testdb=# drop subscription if exists xxx;
> WARNING:  relcache reference leak: relation "pg_subscription" not closed
> DROP SUBSCRIPTION
> 
> 
> I don't mind but I suppose eventually other messages need to go there
> 

Yep, attached should fix it.

DDL for completely new db objects surely touches a lot of places.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c
index 61ff8f2..7080c4b 100644
--- a/src/backend/commands/dropcmds.c
+++ b/src/backend/commands/dropcmds.c
@@ -441,6 +441,10 @@ does_not_exist_skipping(ObjectType objtype, List *objname, List *objargs)
 }
 			}
 			break;
+		case OBJECT_PUBLICATION:
+			msg = gettext_noop("publication \"%s\" does not exist, skipping");
+			name = NameListToString(objname);
+			break;
 		default:
 			elog(ERROR, "unrecognized object type: %d", (int) objtype);
 			break;
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 25c8c34..bd27aac 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -492,6 +492,8 @@ DropSubscription(DropSubscriptionStmt *stmt)
 
 	if (!HeapTupleIsValid(tup))
 	{
+		heap_close(rel, NoLock);
+
 		if (!stmt->missing_ok)
 			ereport(ERROR,
 	(errcode(ERRCODE_UNDEFINED_OBJECT),
@@ -501,6 +503,7 @@ DropSubscription(DropSubscriptionStmt *stmt)
 			ereport(NOTICE,
 	(errmsg("subscription \"%s\" does not exist, skipping",
 			stmt->subname)));
+
 		return;
 	}
 

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-20 Thread Erik Rijkers

On 2016-12-20 10:48, Petr Jelinek wrote:

Here is another small thing:

$ psql -d testdb -p 6972
psql (10devel_logical_replication_20161220_1008_db80acfc9d50)
Type "help" for help.

testdb=# drop publication if exists xxx;
ERROR:  unrecognized object type: 28


testdb=# drop subscription if exists xxx;
WARNING:  relcache reference leak: relation "pg_subscription" not closed
DROP SUBSCRIPTION


I don't mind but I suppose eventually other messages need to go there


thanks,

Erik Rijkers


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-20 Thread Petr Jelinek
On 20/12/16 10:41, Erik Rijkers wrote:
> On 2016-12-20 09:43, Petr Jelinek wrote:
> 
>> Thanks, this was very useful. We had wrong attribute index arithmetics
>> in the place where we verify that replica identities match well enough.
> 
> Well, I spent a lot of time on the whole thing so I am glad it's not just
> something stupid I did :)

Yeah sadly it was something stupid I did ;)

> 
>> BTW that script you have for testing has 2 minor flaws in terms of
>> pgbench_history - the order by is not unique enough (adding mtime or
>> something helps)
> 
> yes, in another version I did
>   ALTER TABLE pgbench_history ADD COLUMN hid SERIAL PRIMARY KEY.
> I suppose that's the best way (adding mtime doesn't work; apparently mtime
> gets repeated too).  (I have now added that alter table-statement  again.)
> 
>> and second, the pgbench actually truncates the
>> pgbench_history unless -n is added to command line.
> 
> ok, -n  added.
> 
>> So attached is v15, which fixes this and the
>> ERROR:  unexpected command tag "PUBLICATION
>> as reported by Steve Singer (plus tab completion fixes and doc fixes).
> 
> Great. It seems to fix the problem: I just an an unprecidented
> 5-minute run with correct replication.
> 

Great, thanks.

> The first compile gave the attached diffs in the publication regression
> test; subsequent
> compiles went OK (2x). If I have time later today I'll try to reproduce
> that one FAILED test
> but maybe you can see  immediately what's wrong there .

Seems like tables are just returned in different order but otherwise
it's ok. I guess a way to make this more stable would be to add order by
in the query psql sends to get the list of tables in the publication.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-20 Thread Erik Rijkers

On 2016-12-20 09:43, Petr Jelinek wrote:


Thanks, this was very useful. We had wrong attribute index arithmetics
in the place where we verify that replica identities match well enough.


Well, I spent a lot of time on the whole thing so I am glad it's not 
just

something stupid I did :)


BTW that script you have for testing has 2 minor flaws in terms of
pgbench_history - the order by is not unique enough (adding mtime or
something helps)


yes, in another version I did
  ALTER TABLE pgbench_history ADD COLUMN hid SERIAL PRIMARY KEY.
I suppose that's the best way (adding mtime doesn't work; apparently 
mtime
gets repeated too).  (I have now added that alter table-statement  
again.)



and second, the pgbench actually truncates the
pgbench_history unless -n is added to command line.


ok, -n  added.


So attached is v15, which fixes this and the
ERROR:  unexpected command tag "PUBLICATION
as reported by Steve Singer (plus tab completion fixes and doc fixes).


Great. It seems to fix the problem: I just an an unprecidented
5-minute run with correct replication.

The first compile gave the attached diffs in the publication regression 
test; subsequent
compiles went OK (2x). If I have time later today I'll try to reproduce 
that one FAILED test

but maybe you can see  immediately what's wrong there .

thanks,

Erik Rijkers



*** /home/aardvark/pg_stuff/pg_sandbox/pgsql.logical_replication/src/test/regress/expected/publication.out	2016-12-20 09:53:28.023321098 +0100
--- /home/aardvark/pg_stuff/pg_sandbox/pgsql.logical_replication/src/test/regress/results/publication.out	2016-12-20 09:59:32.380705797 +0100
***
*** 79,86 
  -+-+-
   t   | t   | t
  Tables:
- "public.testpub_tbl1"
  "pub_test.testpub_nopk"
  
  -- fail - view
  ALTER PUBLICATION testpub_default ADD TABLE testpub_view;
--- 79,86 
  -+-+-
   t   | t   | t
  Tables:
  "pub_test.testpub_nopk"
+ "public.testpub_tbl1"
  
  -- fail - view
  ALTER PUBLICATION testpub_default ADD TABLE testpub_view;
***
*** 120,127 
  -+-+-
   t   | t   | t
  Tables:
- "public.testpub_tbl1"
  "pub_test.testpub_nopk"
  
  ALTER PUBLICATION testpub_default DROP TABLE testpub_tbl1, pub_test.testpub_nopk;
  -- fail - nonexistent
--- 120,127 
  -+-+-
   t   | t   | t
  Tables:
  "pub_test.testpub_nopk"
+ "public.testpub_tbl1"
  
  ALTER PUBLICATION testpub_default DROP TABLE testpub_tbl1, pub_test.testpub_nopk;
  -- fail - nonexistent

==


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-19 Thread Erik Rijkers

On 2016-12-19 08:04, Erik Rijkers wrote:

On 2016-12-18 11:12, Petr Jelinek wrote:

(now using latest: patchset:)

0001-Add-PUBLICATION-catalogs-and-DDL-v14.patch
0002-Add-SUBSCRIPTION-catalog-and-DDL-v14.patch
0003-Define-logical-replication-protocol-and-output-plugi-v14.patch
0004-Add-logical-replication-workers-v14.patch
0005-Add-separate-synchronous-commit-control-for-logical--v14.patch

Sometimes  replication (caused by a pgbench run)  runs for a few
seconds replicating all 4 pgbench tables correctly, but never longer
than 10 to 20 seconds.



I've concocted pgbench_derail.sh.  It assumes 2 instances running, 
initially without the publication and subsciption.


There are two separate installations, on the same machine.

To startup the two instances I use instance.sh:

# ./instances.sh
#!/bin/sh
port1=6972
port2=6973
project1=logical_replication
project2=logical_replication2
pg_stuff_dir=$HOME/pg_stuff
PATH1=$pg_stuff_dir/pg_installations/pgsql.$project1/bin:$PATH
PATH2=$pg_stuff_dir/pg_installations/pgsql.$project2/bin:$PATH
server_dir1=$pg_stuff_dir/pg_installations/pgsql.$project1
server_dir2=$pg_stuff_dir/pg_installations/pgsql.$project2
data_dir1=$server_dir1/data
data_dir2=$server_dir2/data
options1="
-c wal_level=logical
-c max_replication_slots=10
-c max_worker_processes=12
-c max_logical_replication_workers=10
-c max_wal_senders=10
-c logging_collector=on
-c log_directory=$server_dir1
-c log_filename=logfile.${project1} "

options2="
-c wal_level=replica
-c max_replication_slots=10
-c max_worker_processes=12
-c max_logical_replication_workers=10
-c max_wal_senders=10
-c logging_collector=on
-c log_directory=$server_dir2
-c log_filename=logfile.${project2} "
which postgres
export PATH=$PATH1; postgres -D $data_dir1 -p $port1 ${options1} &
export PATH=$PATH2; postgres -D $data_dir2 -p $port2 ${options2} &
# end ./instances.sh







#--- pgbench_derail.sh
#!/bin/sh

# assumes both instances are running

# clear logs
# echo > 
$HOME/pg_stuff/pg_installations/pgsql.logical_replication/logfile.logical_replication
# echo > 
$HOME/pg_stuff/pg_installations/pgsql.logical_replication2/logfile.logical_replication2


port1=6972
port2=6973


function cb()
{
  #  display the 4 pgbench tables' accumulated content as md5s
  #  a,b,t,h stand for:  pgbench_accounts, -branches, -tellers, -history
  for port in $port1 $port2
  do
md5_a=$(echo "select * from pgbench_accounts order by aid"
|psql -qtAXp$port|md5sum|cut -b 1-9)
md5_b=$(echo "select * from pgbench_branches order by bid"
|psql -qtAXp$port|md5sum|cut -b 1-9)
md5_t=$(echo "select * from pgbench_tellers  order by tid"
|psql -qtAXp$port|md5sum|cut -b 1-9)
md5_h=$(echo "select * from pgbench_history  order by 
aid,bid,tid"|psql -qtAXp$port|md5sum|cut -b 1-9)
cnt_a=$(echo "select count(*) from pgbench_accounts"|psql -qtAXp 
$port)
cnt_b=$(echo "select count(*) from pgbench_branches"|psql -qtAXp 
$port)
cnt_t=$(echo "select count(*) from pgbench_tellers" |psql -qtAXp 
$port)
cnt_h=$(echo "select count(*) from pgbench_history" |psql -qtAXp 
$port)
printf "$port a,b,t,h: %6d %6d %6d %6d" $cnt_a  $cnt_b  $cnt_t  
$cnt_h

echo -n "   $md5_a  $md5_b  $md5_t  $md5_h"
if   [[ $port -eq $port1 ]]; then echo "   master"
elif [[ $port -eq $port2 ]]; then echo "   replica"
else  echo " ERROR"
fi
  done
}


echo "
drop table if exists pgbench_accounts;
drop table if exists pgbench_branches;
drop table if exists pgbench_tellers;
drop table if exists pgbench_history;" | psql -X -p $port1 \
  && echo "
drop table if exists pgbench_accounts;
drop table if exists pgbench_branches;
drop table if exists pgbench_tellers;
drop table if exists pgbench_history;" | psql -X -p $port2 \
  && pgbench -p $port1 -qis 1 \
  && echo "alter table pgbench_history replica identity full;" | psql 
-1p $port1 \

  && pg_dump -F c  -p $port1 \
 -t pgbench_accounts \
 -t pgbench_branches \
 -t pgbench_tellers  \
 -t pgbench_history  \
| pg_restore -p $port2 -d testdb

echo  "$(cb)"

sleep 2

echo  "$(cb)"

echo "create publication pub1 for all tables;" | psql -p $port1 -aqtAX

echo "
create subscription sub1
  connection 'port=${port1}'
  publication pub1
  with (disabled);
alter subscription sub1 enable;
" | psql -p $port2 -aqtAX
#

# repeat a short (10 s) pgbench-un to show that during such
# short runs the logical replication often remains intact.
# Longer pgbench-runs always derail the logrep of one or more
# of these 4 table
#
# bug:  pgbench_history no longer replicates
#   sometimes also the other 3 table de-synced.

echo  "$(cb)"
echo "-- pgbench -c 1 -T 10 -P 5 (short run, first)"
 pgbench -c 1 -T 10 -P 5
sleep 2
echo  "$(cb)"

echo "-- pgbench -c 1 -T 10 -P 5 (short run, second)"
 pgbench -c 1 -T 10 -P 5
sleep 2
echo  "$(cb)"

echo "-- pgbench -c 1 -T 120 -P 15 (long run)"
   

Re: [HACKERS] Logical Replication WIP

2016-12-19 Thread Petr Jelinek
On 19/12/16 15:39, Steve Singer wrote:
> On 12/18/2016 09:04 PM, Petr Jelinek wrote:
>> On 18/12/16 19:02, Steve Singer wrote:
>>
>>> pg_dump is also generating warnings
>>>
>>> pg_dump: [archiver] WARNING: don't know how to set owner for object type
>>> SUBSCRIPTION
>>>
>>> I know that the plan is to add proper ACL's for publications and
>>> subscriptions later. I don't know if we want to leave the warning in
>>> until then or do something about it.
>>>
>> No, ACLs are separate from owner. This is thinko on my side. I was
>> thinking we can live without ALTER ... OWNER TO for now, but we actually
>> need it for pg_dump and for REASSIGN OWNED. So now I added the OWNER TO
>> for both PUBLICATION and SUBSCRIPTION.
> 
> 
> When I try to restore my pg_dump with publications I get
> 
> ./pg_dump  -h localhost --port 5440 test |./psql -h localhost --port
> 5440 test2
> 
> 
> ALTER TABLE
> CREATE PUBLICATION
> ERROR:  unexpected command tag "PUBLICATION
> 
> This comes from a
> ALTER PUBLICATION mypub OWNER TO ssinger;
> 
> 
> Does the OWNER TO  clause need to be added to AlterPublicationStmt:
> instead of AlterOwnerStmt ?

Nah that's just bug in what command tag string we return in the
utility.c, I noticed this myself after sending the v14, it's one line fix.

> Also we should update the tab-complete for ALTER PUBLICATION to show the
> OWNER to options  + the \h help in psql and the reference SGML
> 

Yeah.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-19 Thread Steve Singer

On 12/18/2016 09:04 PM, Petr Jelinek wrote:

On 18/12/16 19:02, Steve Singer wrote:


pg_dump is also generating warnings

pg_dump: [archiver] WARNING: don't know how to set owner for object type
SUBSCRIPTION

I know that the plan is to add proper ACL's for publications and
subscriptions later. I don't know if we want to leave the warning in
until then or do something about it.


No, ACLs are separate from owner. This is thinko on my side. I was
thinking we can live without ALTER ... OWNER TO for now, but we actually
need it for pg_dump and for REASSIGN OWNED. So now I added the OWNER TO
for both PUBLICATION and SUBSCRIPTION.



When I try to restore my pg_dump with publications I get

./pg_dump  -h localhost --port 5440 test |./psql -h localhost --port 
5440 test2



ALTER TABLE
CREATE PUBLICATION
ERROR:  unexpected command tag "PUBLICATION

This comes from a
ALTER PUBLICATION mypub OWNER TO ssinger;


Does the OWNER TO  clause need to be added to AlterPublicationStmt: 
instead of AlterOwnerStmt ?
Also we should update the tab-complete for ALTER PUBLICATION to show the 
OWNER to options  + the \h help in psql and the reference SGML






--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-19 Thread Petr Jelinek
On 19/12/16 08:04, Erik Rijkers wrote:
> On 2016-12-18 11:12, Petr Jelinek wrote:
> 
> (now using latest: patchset:)
> 
> 0001-Add-PUBLICATION-catalogs-and-DDL-v14.patch
> 0002-Add-SUBSCRIPTION-catalog-and-DDL-v14.patch
> 0003-Define-logical-replication-protocol-and-output-plugi-v14.patch
> 0004-Add-logical-replication-workers-v14.patch
> 0005-Add-separate-synchronous-commit-control-for-logical--v14.patch
> 
>> BTW you don't need to add primary key to pgbench_history. Simply ALTER
>> TABLE pgbench_history REPLICA IDENTITY FULL; should be enough.
> 
> Either should, but neither is.
> 
> set-up:
> Before creating the publication/subscription:
> On master I run   pgbench -qis 1,  then set replica identity (and/or add
> serial column) for pgbench_history, then dump/restore the 4 pgbench
> tables from master to replica.
> Then enabling publication/subscription.  logs looks well.  (Other tests 
> I've devised earlier (on other tables) still work nicely.)
> 
> Now when I do a pgbench-run on master, something like:
> 
>pgbench -c 1 -T 20 -P 1
> 
> I often see this (when running pgbench):
> 
> ERROR:  publisher does not send replica identity column expected by the
> logical replication target public.pgbench_tellers
> or, sometimes  (less often) the same ERROR for pgbench_accounts appears
> (as in the subsciber-log below)
> 
> -- publisher log
> 2016-12-19 07:44:22.738 CET 22690 LOG:  logical decoding found
> consistent point at 0/14598C78
> 2016-12-19 07:44:22.738 CET 22690 DETAIL:  There are no running
> transactions.
> 2016-12-19 07:44:22.738 CET 22690 LOG:  exported logical decoding
> snapshot: "000130FA-1" with 0 transaction IDs
> 2016-12-19 07:44:22.886 CET 22729 LOG:  starting logical decoding for
> slot "sub1"
> 2016-12-19 07:44:22.886 CET 22729 DETAIL:  streaming transactions
> committing after 0/14598CB0, reading WAL from 0/14598C78
> 2016-12-19 07:44:22.886 CET 22729 LOG:  logical decoding found
> consistent point at 0/14598C78
> 2016-12-19 07:44:22.886 CET 22729 DETAIL:  There are no running
> transactions.
> 2016-12-19 07:45:25.568 CET 22729 LOG:  could not receive data from
> client: Connection reset by peer
> 2016-12-19 07:45:25.568 CET 22729 LOG:  unexpected EOF on standby
> connection
> 2016-12-19 07:45:25.580 CET 26696 LOG:  starting logical decoding for
> slot "sub1"
> 2016-12-19 07:45:25.580 CET 26696 DETAIL:  streaming transactions
> committing after 0/1468E0D0, reading WAL from 0/1468DC90
> 2016-12-19 07:45:25.589 CET 26696 LOG:  logical decoding found
> consistent point at 0/1468DC90
> 2016-12-19 07:45:25.589 CET 26696 DETAIL:  There are no running
> transactions.
> 
> -- subscriber log
> 2016-12-19 07:44:22.878 CET 17027 LOG:  starting logical replication
> worker for subscription 24581
> 2016-12-19 07:44:22.883 CET 22726 LOG:  logical replication apply for
> subscription sub1 started
> 2016-12-19 07:45:11.069 CET 22726 WARNING:  leaked hash_seq_search scan
> for hash table 0x2def1a8
> 2016-12-19 07:45:25.566 CET 22726 ERROR:  publisher does not send
> replica identity column expected by the logical replication target
> public.pgbench_accounts
> 2016-12-19 07:45:25.568 CET 16984 LOG:  worker process: logical
> replication worker 24581 (PID 22726) exited with exit code 1
> 2016-12-19 07:45:25.568 CET 17027 LOG:  starting logical replication
> worker for subscription 24581
> 2016-12-19 07:45:25.574 CET 26695 LOG:  logical replication apply for
> subscription sub1 started
> 2016-12-19 07:46:10.950 CET 26695 WARNING:  leaked hash_seq_search scan
> for hash table 0x2def2c8
> 2016-12-19 07:46:10.950 CET 26695 WARNING:  leaked hash_seq_search scan
> for hash table 0x2def2c8
> 2016-12-19 07:46:10.950 CET 26695 WARNING:  leaked hash_seq_search scan
> for hash table 0x2def2c8
> 
> 
> Sometimes  replication (caused by a pgbench run)  runs for a few seconds
> replicating all 4 pgbench tables correctly, but never longer than 10 to
> 20 seconds.
> 
> If you cannot reproduce with the provided info it  I will make a more
> precise setup-desciption, but it's so solidly failing here that I hope
> that won't be necessary.
> 

Hi,

nope can't reproduce that. I can reproduce the leaked hash_seq_search.
The attached fixes that. But no issues with replication itself.

The error basically means that pkey on publisher and subscriber isn't
the same.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c
index d795c5b..7ab94f5 100644
--- a/src/backend/replication/logical/relation.c
+++ b/src/backend/replication/logical/relation.c
@@ -59,6 +59,7 @@ logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
if (entry->reloid == reloid)
{
entry->reloid = InvalidOid;
+   hash_seq_term();
break;
   

Re: [HACKERS] Logical Replication WIP

2016-12-18 Thread Erik Rijkers

On 2016-12-18 11:12, Petr Jelinek wrote:

(now using latest: patchset:)

0001-Add-PUBLICATION-catalogs-and-DDL-v14.patch
0002-Add-SUBSCRIPTION-catalog-and-DDL-v14.patch
0003-Define-logical-replication-protocol-and-output-plugi-v14.patch
0004-Add-logical-replication-workers-v14.patch
0005-Add-separate-synchronous-commit-control-for-logical--v14.patch


BTW you don't need to add primary key to pgbench_history. Simply ALTER
TABLE pgbench_history REPLICA IDENTITY FULL; should be enough.


Either should, but neither is.

set-up:
Before creating the publication/subscription:
On master I run   pgbench -qis 1,  then set replica identity (and/or add 
serial column) for pgbench_history, then dump/restore the 4 pgbench 
tables from master to replica.
Then enabling publication/subscription.  logs looks well.  (Other tests  
I've devised earlier (on other tables) still work nicely.)


Now when I do a pgbench-run on master, something like:

   pgbench -c 1 -T 20 -P 1

I often see this (when running pgbench):

ERROR:  publisher does not send replica identity column expected by the 
logical replication target public.pgbench_tellers
or, sometimes  (less often) the same ERROR for pgbench_accounts appears 
(as in the subsciber-log below)


-- publisher log
2016-12-19 07:44:22.738 CET 22690 LOG:  logical decoding found 
consistent point at 0/14598C78
2016-12-19 07:44:22.738 CET 22690 DETAIL:  There are no running 
transactions.
2016-12-19 07:44:22.738 CET 22690 LOG:  exported logical decoding 
snapshot: "000130FA-1" with 0 transaction IDs
2016-12-19 07:44:22.886 CET 22729 LOG:  starting logical decoding for 
slot "sub1"
2016-12-19 07:44:22.886 CET 22729 DETAIL:  streaming transactions 
committing after 0/14598CB0, reading WAL from 0/14598C78
2016-12-19 07:44:22.886 CET 22729 LOG:  logical decoding found 
consistent point at 0/14598C78
2016-12-19 07:44:22.886 CET 22729 DETAIL:  There are no running 
transactions.
2016-12-19 07:45:25.568 CET 22729 LOG:  could not receive data from 
client: Connection reset by peer
2016-12-19 07:45:25.568 CET 22729 LOG:  unexpected EOF on standby 
connection
2016-12-19 07:45:25.580 CET 26696 LOG:  starting logical decoding for 
slot "sub1"
2016-12-19 07:45:25.580 CET 26696 DETAIL:  streaming transactions 
committing after 0/1468E0D0, reading WAL from 0/1468DC90
2016-12-19 07:45:25.589 CET 26696 LOG:  logical decoding found 
consistent point at 0/1468DC90
2016-12-19 07:45:25.589 CET 26696 DETAIL:  There are no running 
transactions.


-- subscriber log
2016-12-19 07:44:22.878 CET 17027 LOG:  starting logical replication 
worker for subscription 24581
2016-12-19 07:44:22.883 CET 22726 LOG:  logical replication apply for 
subscription sub1 started
2016-12-19 07:45:11.069 CET 22726 WARNING:  leaked hash_seq_search scan 
for hash table 0x2def1a8
2016-12-19 07:45:25.566 CET 22726 ERROR:  publisher does not send 
replica identity column expected by the logical replication target 
public.pgbench_accounts
2016-12-19 07:45:25.568 CET 16984 LOG:  worker process: logical 
replication worker 24581 (PID 22726) exited with exit code 1
2016-12-19 07:45:25.568 CET 17027 LOG:  starting logical replication 
worker for subscription 24581
2016-12-19 07:45:25.574 CET 26695 LOG:  logical replication apply for 
subscription sub1 started
2016-12-19 07:46:10.950 CET 26695 WARNING:  leaked hash_seq_search scan 
for hash table 0x2def2c8
2016-12-19 07:46:10.950 CET 26695 WARNING:  leaked hash_seq_search scan 
for hash table 0x2def2c8
2016-12-19 07:46:10.950 CET 26695 WARNING:  leaked hash_seq_search scan 
for hash table 0x2def2c8



Sometimes  replication (caused by a pgbench run)  runs for a few seconds 
replicating all 4 pgbench tables correctly, but never longer than 10 to 
20 seconds.


If you cannot reproduce with the provided info it  I will make a more 
precise setup-desciption, but it's so solidly failing here that I hope 
that won't be necessary.



Erik Rijkers






--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-18 Thread Steve Singer

On 12/18/2016 05:28 AM, Petr Jelinek wrote:

On 17/12/16 18:34, Steve Singer wrote:

On 12/16/2016 07:49 AM, Petr Jelinek wrote:
Yeah subscriptions are per database. I don't want to make v14 just 
for these 2 changes as that would make life harder for anybody 
code-reviewing the v13 so attached is diff with above fixes that 
applies on top of v13. 





Thanks that fixes those issues.

A few more I've noticed


pg_dumping subscriptions doesn't seem to work

./pg_dump -h localhost --port 5441 --include-subscriptions test
pg_dump: [archiver (db)] query failed: ERROR:  missing FROM-clause entry 
for table "p"

LINE 1: ...LECT rolname FROM pg_catalog.pg_roles WHERE oid = p.subowner...
 ^
pg_dump: [archiver (db)] query was: SELECT s.tableoid, s.oid, 
s.subname,(SELECT rolname FROM pg_catalog.pg_roles WHERE oid = 
p.subowner) AS rolname, s.subenabled,  s.subconninfo, s.subslotname, 
s.subpublications FROM pg_catalog.pg_subscription s WHERE s.subdbid = 
(SELECT oid FROM pg_catalog.pg_database   WHERE datname 
= current_database())


I have attached a patch that fixes this.

pg_dump is also generating warnings

pg_dump: [archiver] WARNING: don't know how to set owner for object type 
SUBSCRIPTION


I know that the plan is to add proper ACL's for publications and 
subscriptions later. I don't know if we want to leave the warning in 
until then or do something about it.



Also the tab-competion for create subscription doesn't seem to work as 
intended.
I've attached a patch that fixes it and patches to add tab completion 
for alter publication|subscription




>From 3ed2ad471766490310b1102d8c9166a597ac11af Mon Sep 17 00:00:00 2001
From: Steve Singer 
Date: Sun, 18 Dec 2016 12:37:29 -0500
Subject: [PATCH 3/4] Fix tab complete for CREATE SUBSCRIPTION

---
 src/bin/psql/tab-complete.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 8816f6c..6dd47f6 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2298,8 +2298,10 @@ psql_completion(const char *text, int start, int end)
 /* CREATE SUBSCRIPTION */
 	else if (Matches3("CREATE", "SUBSCRIPTION", MatchAny))
 		COMPLETE_WITH_CONST("CONNECTION");
+	else if (Matches5("CREATE", "SUBSCRIPTION", MatchAny, "CONNECTION",MatchAny))
+		COMPLETE_WITH_CONST("PUBLICATION");	
 	/* Complete "CREATE SUBSCRIPTION  ...  WITH ( " */
-	else if (Matches2("CREATE", "SUBSCRIPTION") && TailMatches2("WITH", "("))
+	else if (HeadMatches2("CREATE", "SUBSCRIPTION") && TailMatches2("WITH", "("))
 		COMPLETE_WITH_LIST5("ENABLED", "DISABLED", "CREATE SLOT",
 			"NOCREATE SLOT", "SLOT NAME");
 
-- 
2.1.4

>From 36a0d4382f7ffd2b740298a2bafd49471766bdad Mon Sep 17 00:00:00 2001
From: Steve Singer 
Date: Sun, 18 Dec 2016 12:51:14 -0500
Subject: [PATCH 2/4] Add tab-complete for ALTER SUBSCRIPTION

---
 src/bin/psql/tab-complete.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 4cbb848..8816f6c 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1410,7 +1410,7 @@ psql_completion(const char *text, int start, int end)
 			"EVENT TRIGGER", "EXTENSION", "FOREIGN DATA WRAPPER", "FOREIGN TABLE", "FUNCTION",
 			"GROUP", "INDEX", "LANGUAGE", "LARGE OBJECT", "MATERIALIZED VIEW", "OPERATOR",
 			"POLICY", "PUBLICATION", "ROLE", "RULE", "SCHEMA", "SERVER", "SEQUENCE",
-			"SYSTEM", "TABLE", "TABLESPACE", "TEXT SEARCH", "TRIGGER", "TYPE",
+			"SUBSCRIPTION", "SYSTEM", "TABLE", "TABLESPACE", "TEXT SEARCH", "TRIGGER", "TYPE",
 		"USER", "USER MAPPING FOR", "VIEW", NULL};
 
 		COMPLETE_WITH_LIST(list_ALTER);
@@ -1446,6 +1446,15 @@ psql_completion(const char *text, int start, int end)
 		COMPLETE_WITH_LIST6("PUBLISH INSERT", "NOPUBLISH INSERT", "PUBLISH UPDATE",
 			"NOPUBLISH UPDATE", "PUBLISH DELETE", "NOPUBLISH DELETE");
 	}
+	/* ALTER SUBSCRIPTION  ... */
+	else if (Matches3("ALTER","SUBSCRIPTION",MatchAny))
+	{
+		COMPLETE_WITH_LIST5("WITH", "CONNECTION", "SET PUBLICATION", "ENABLE", "DISABLE");
+	}
+	else if (HeadMatches3("ALTER", "SUBSCRIPTION", MatchAny) && TailMatches2("WITH", "("))
+	{
+		COMPLETE_WITH_CONST("SLOT NAME");
+	}
 	/* ALTER SCHEMA  */
 	else if (Matches3("ALTER", "SCHEMA", MatchAny))
 		COMPLETE_WITH_LIST2("OWNER TO", "RENAME TO");
-- 
2.1.4

>From d4685b991245270221a33e0cf8e61d00fb0bf67e Mon Sep 17 00:00:00 2001
From: Steve Singer 
Date: Sun, 18 Dec 2016 12:47:15 -0500
Subject: [PATCH 1/4] Add tab-complete for ALTER PUBLICATION

---
 src/bin/psql/tab-complete.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 6ad05b7..4cbb848 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1409,8 +1409,8 @@ 

Re: [HACKERS] Logical Replication WIP

2016-12-18 Thread Petr Jelinek
On 17/12/16 18:34, Steve Singer wrote:
> On 12/16/2016 07:49 AM, Petr Jelinek wrote:
>> Hi,
>>
>> attached is version 13 of the patch.
>>
>> I merged in changes from PeterE. And did following changes:
>> - fixed the ownership error messages for both provider and subscriber
>> - added ability to send invalidation message to invalidate whole
>> relcache and use it in publication code
>> - added the post creation/alter/drop hooks
>> - removed parts of docs that refer to initial sync (which does not exist
>> yet)
>> - added timeout handling/retry, etc to apply/launcher based on the GUCs
>> that exist for wal receiver (they could use renaming though)
>> - improved feedback behavior
>> - apply worker now uses owner of the subscription as connection user
>> - more tests
>> - check for max_replication_slots in launcher
>> - clarify the update 'K' sub-message description in protocol
> 
> A few things I've noticed so far
> 
> If I shutdown the publisher I see the following in the log
> 
> 2016-12-17 11:33:49.548 EST [1891] LOG:  worker process: ?)G? (PID 1987)
> exited with exit code 1
> 
> but then if I shutdown the subscriber postmaster and restart it switches to
> 2016-12-17 11:43:09.628 EST [2373] LOG:  worker process:  (PID 2393)
> exited with exit code 1
> 
> Not sure where the 'G' was coming from (other times I have seen an 'I'
> here or other random characters)
> 

Uninitialized bgw_name for apply worker. Rather silly bug. Fixed.

> 
> I don't think we are cleaning up subscriptions on a drop database
> 
> If I do the following
> 
> 1) Create a subscription in a new database
> 2) Stop the publisher
> 3) Drop the database on the subscriber
> 
> test=# create subscription mysuba connection 'host=localhost dbname=test
> port=5440' publication mypub;
> test=# \c b
> b=# drop database test;
> DROP DATABASE
> b=# select * FROM pg_subscription ;
>  subdbid | subname | subowner | subenabled | subconninfo  |
> subslotname | subpublications
> -+-+--++--+-+-
> 
>16384 | mysuba  |   10 | t  | host=localhost dbname=test
> port=5440 | mysuba  | {mypub}
> 

Good one. I added check that prevents dropping database when there is
subscription defined for it. I think we can't cascade here as
subscription may or may not hold resources (slot) in another
instance/database so preventing the drop is best we can do.

> 
> Also I don't think I can now drop mysuba
> b=# drop subscription mysuba;
> ERROR:  subscription "mysuba" does not exist
> 

Yeah subscriptions are per database.

I don't want to make v14 just for these 2 changes as that would make
life harder for anybody code-reviewing the v13 so attached is diff with
above fixes that applies on top of v13.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
index 8be9f39..2c58cc6 100644
--- a/src/backend/catalog/pg_subscription.c
+++ b/src/backend/catalog/pg_subscription.c
@@ -107,7 +107,41 @@ GetSubscription(Oid subid, bool missing_ok)
 }
 
 /*
- * Free memory allocated by subscription struct. */
+ * Return number of subscriptions defined in given database.
+ * Used by dropdb() to check if database can indeed be dropped.
+ */
+int
+CountDBSubscriptions(Oid dbid)
+{
+	intnsubs = 0;
+	Relation		rel;
+	ScanKeyData		scankey;
+	SysScanDesc		scan;
+	HeapTuple		tup;
+
+	rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+	ScanKeyInit(,
+Anum_pg_subscription_subdbid,
+BTEqualStrategyNumber, F_OIDEQ,
+ObjectIdGetDatum(dbid));
+
+	scan = systable_beginscan(rel, InvalidOid, false,
+			  NULL, 1, );
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+		nsubs++;
+
+	systable_endscan(scan);
+
+	heap_close(rel, NoLock);
+
+	return nsubs;
+}
+
+/*
+ * Free memory allocated by subscription struct.
+ */
 void
 FreeSubscription(Subscription *sub)
 {
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 0919ad8..45d152c 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -37,6 +37,7 @@
 #include "catalog/pg_authid.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_db_role_setting.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "commands/comment.h"
 #include "commands/dbcommands.h"
@@ -790,6 +791,7 @@ dropdb(const char *dbname, bool missing_ok)
 	int			npreparedxacts;
 	int			nslots,
 nslots_active;
+	int			nsubscriptions;
 
 	/*
 	 * Look up the target database's OID, and get exclusive lock on it. We
@@ -875,6 +877,21 @@ dropdb(const char *dbname, bool missing_ok)
  errdetail_busy_db(notherbackends, npreparedxacts)));
 
 	/*
+	 * Check if there are subscriptions defined in the target database.
+	 *
+	 * We can't drop them automatically because 

Re: [HACKERS] Logical Replication WIP

2016-12-18 Thread Petr Jelinek
On 17/12/16 13:37, Erik Rijkers wrote:
> On 2016-12-16 13:49, Petr Jelinek wrote:
>>
>> version 13 of the patch.
>>
>> 0001-Add-PUBLICATION-catalogs-and-DDL-v13.patch.gz (~32 KB)
>> 0002-Add-SUBSCRIPTION-catalog-and-DDL-v13.patch.gz (~28 KB)
>> 0003-Define-logical-rep...utput-plugi-v13.patch.gz (~13 KB)
>> 0004-Add-logical-replication-workers-v13.patch.gz (~44 KB)
>> 0005-Add-separate-synch...or-logical--v13.patch.gz (~2 KB)
> 
> Hi,
> 
> You wrote on 2016-08-05: :
> 
>> What's missing:
>>  - sequences, I'd like to have them in 10.0 but I don't have good
>>way to implement it. PGLogical uses periodical syncing with some
>>buffer value but that's suboptimal. I would like to decode them
>>but that has proven to be complicated due to their sometimes
>>transactional sometimes nontransactional nature, so I probably
>>won't have time to do it within 10.0 by myself.
> 
> I ran into problems with sequences and I wonder if sequences-problems
> are still expected, as the above seems to imply.
> 
> (short story: I tried to run pgbench across logical replication; and
> therefore
> added a sequence to pgbench_history to give it a replica identity, and
> cannot get it to work reliably ).
> 

Sequences are not replicated but that should not prevent pgbench_history
itself from being replicated when you add serial to it.

BTW you don't need to add primary key to pgbench_history. Simply ALTER
TABLE pgbench_history REPLICA IDENTITY FULL; should be enough.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-17 Thread Steve Singer

On 12/16/2016 07:49 AM, Petr Jelinek wrote:

Hi,

attached is version 13 of the patch.

I merged in changes from PeterE. And did following changes:
- fixed the ownership error messages for both provider and subscriber
- added ability to send invalidation message to invalidate whole
relcache and use it in publication code
- added the post creation/alter/drop hooks
- removed parts of docs that refer to initial sync (which does not exist
yet)
- added timeout handling/retry, etc to apply/launcher based on the GUCs
that exist for wal receiver (they could use renaming though)
- improved feedback behavior
- apply worker now uses owner of the subscription as connection user
- more tests
- check for max_replication_slots in launcher
- clarify the update 'K' sub-message description in protocol


A few things I've noticed so far

If I shutdown the publisher I see the following in the log

2016-12-17 11:33:49.548 EST [1891] LOG:  worker process: ?)G? (PID 1987) 
exited with exit code 1


but then if I shutdown the subscriber postmaster and restart it switches to
2016-12-17 11:43:09.628 EST [2373] LOG:  worker process:  (PID 2393) 
exited with exit code 1


Not sure where the 'G' was coming from (other times I have seen an 'I' 
here or other random characters)



I don't think we are cleaning up subscriptions on a drop database

If I do the following

1) Create a subscription in a new database
2) Stop the publisher
3) Drop the database on the subscriber

test=# create subscription mysuba connection 'host=localhost dbname=test 
port=5440' publication mypub;

test=# \c b
b=# drop database test;
DROP DATABASE
b=# select * FROM pg_subscription ;
 subdbid | subname | subowner | subenabled | subconninfo  | 
subslotname | subpublications

-+-+--++--+-+-
   16384 | mysuba  |   10 | t  | host=localhost dbname=test 
port=5440 | mysuba  | {mypub}


b=# select datname FROM pg_database where oid=16384;
 datname
-
(0 rows)

Also I don't think I can now drop mysuba
b=# drop subscription mysuba;
ERROR:  subscription "mysuba" does not exist











--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-17 Thread Erik Rijkers

On 2016-12-16 13:49, Petr Jelinek wrote:


version 13 of the patch.

0001-Add-PUBLICATION-catalogs-and-DDL-v13.patch.gz (~32 KB)
0002-Add-SUBSCRIPTION-catalog-and-DDL-v13.patch.gz (~28 KB)
0003-Define-logical-rep...utput-plugi-v13.patch.gz (~13 KB)
0004-Add-logical-replication-workers-v13.patch.gz (~44 KB)
0005-Add-separate-synch...or-logical--v13.patch.gz (~2 KB)


Hi,

You wrote on 2016-08-05: :


What's missing:
 - sequences, I'd like to have them in 10.0 but I don't have good
   way to implement it. PGLogical uses periodical syncing with some
   buffer value but that's suboptimal. I would like to decode them
   but that has proven to be complicated due to their sometimes
   transactional sometimes nontransactional nature, so I probably
   won't have time to do it within 10.0 by myself.


I ran into problems with sequences and I wonder if sequences-problems
are still expected, as the above seems to imply.

(short story: I tried to run pgbench across logical replication; and 
therefore

added a sequence to pgbench_history to give it a replica identity, and
cannot get it to work reliably ).


thanks,

Eik Rijkers







--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-15 Thread Petr Jelinek
On 15/12/16 13:06, Craig Ringer wrote:
> On 15 Dec. 2016 18:19, "Petr Jelinek"  > wrote:
> 
> On 13/12/16 21:42, Peter Eisentraut wrote:
> > On 12/10/16 2:48 AM, Petr Jelinek wrote:
> >> Attached new version with your updates and rebased on top of the
> current
> >> HEAD (the partitioning patch produced quite a few conflicts).
> >
> > I have attached a few more "fixup" patches, mostly with some
> editing of
> > documentation and comments and some compiler warnings.
> >
> > In 0006 in the protocol documentation I have left a "XXX ???" where I
> > didn't understand what it was trying to say.
> >
> 
> Ah so you didn't understand the
> > +Identifies the following TupleData submessage as
> a key.
> > +This field is optional and is only present if
> > +the update changed the REPLICA IDENTITY index. XXX???
> 
> So what happens here is that the update message can contain one or two
> out of 3 possible tuple submessages. It always contains 'N' message
> which is the new data. Then it can optionally contain 'O' message with
> old data if the table has REPLICA IDENTITY FULL (ie, not REPLICA
> IDENTITY index like pkey, etc). Or it can include 'K' message that only
> contains old data for the columns in the REPLICA IDENTITY index. But if
> the REPLICA IDENTITY index didn't change (ie, old and new would be same
> for those columns) we simply omit the 'K' message and let the downstream
> take the key data from the 'N' message to save space.
> 
> 
> Something we forgot to bake into pglogical that might be worth leaving
> room for here: sending the whole old tuple, with some fields marked as key.
> 
> So you can use replica identity pkey or whatever and the downstream
> knows which are the key fields. But can still transmit the whole old
> tuple in case the downstream wants it for conflict resolution/logging/etc.
> 
> We don't have the logical decoding and wal output for this yet, nor a
> way of requesting old tuple recording table by table. So all i'm
> suggesting is leaving room in the protocol.
> 

Not really sure I follow, which columns are keys is not part of the info
in the data message, it's part of relation message, so it's already
possible in the protocol. Also the current implementation is fully
capable of taking advantage of PK on downstream even with REPLICA
IDENTITY FULL.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-15 Thread Craig Ringer
On 15 Dec. 2016 18:19, "Petr Jelinek"  wrote:

On 13/12/16 21:42, Peter Eisentraut wrote:
> On 12/10/16 2:48 AM, Petr Jelinek wrote:
>> Attached new version with your updates and rebased on top of the current
>> HEAD (the partitioning patch produced quite a few conflicts).
>
> I have attached a few more "fixup" patches, mostly with some editing of
> documentation and comments and some compiler warnings.
>
> In 0006 in the protocol documentation I have left a "XXX ???" where I
> didn't understand what it was trying to say.
>

Ah so you didn't understand the
> +Identifies the following TupleData submessage as a key.
> +This field is optional and is only present if
> +the update changed the REPLICA IDENTITY index. XXX???

So what happens here is that the update message can contain one or two
out of 3 possible tuple submessages. It always contains 'N' message
which is the new data. Then it can optionally contain 'O' message with
old data if the table has REPLICA IDENTITY FULL (ie, not REPLICA
IDENTITY index like pkey, etc). Or it can include 'K' message that only
contains old data for the columns in the REPLICA IDENTITY index. But if
the REPLICA IDENTITY index didn't change (ie, old and new would be same
for those columns) we simply omit the 'K' message and let the downstream
take the key data from the 'N' message to save space.


Something we forgot to bake into pglogical that might be worth leaving room
for here: sending the whole old tuple, with some fields marked as key.

So you can use replica identity pkey or whatever and the downstream knows
which are the key fields. But can still transmit the whole old tuple in
case the downstream wants it for conflict resolution/logging/etc.

We don't have the logical decoding and wal output for this yet, nor a way
of requesting old tuple recording table by table. So all i'm suggesting is
leaving room in the protocol.


Re: [HACKERS] Logical Replication WIP

2016-12-15 Thread Petr Jelinek
On 13/12/16 21:42, Peter Eisentraut wrote:
> On 12/10/16 2:48 AM, Petr Jelinek wrote:
>> Attached new version with your updates and rebased on top of the current
>> HEAD (the partitioning patch produced quite a few conflicts).
> 
> I have attached a few more "fixup" patches, mostly with some editing of
> documentation and comments and some compiler warnings.
> 
> In 0006 in the protocol documentation I have left a "XXX ???" where I
> didn't understand what it was trying to say.
> 

Ah so you didn't understand the
> +Identifies the following TupleData submessage as a key.
> +This field is optional and is only present if
> +the update changed the REPLICA IDENTITY index. XXX???

So what happens here is that the update message can contain one or two
out of 3 possible tuple submessages. It always contains 'N' message
which is the new data. Then it can optionally contain 'O' message with
old data if the table has REPLICA IDENTITY FULL (ie, not REPLICA
IDENTITY index like pkey, etc). Or it can include 'K' message that only
contains old data for the columns in the REPLICA IDENTITY index. But if
the REPLICA IDENTITY index didn't change (ie, old and new would be same
for those columns) we simply omit the 'K' message and let the downstream
take the key data from the 'N' message to save space.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-15 Thread Petr Jelinek
On 13/12/16 01:33, Andres Freund wrote:
> 
> On 2016-12-12 09:18:48 -0500, Peter Eisentraut wrote:
>> On 12/8/16 4:10 PM, Petr Jelinek wrote:
>>> On 08/12/16 20:16, Peter Eisentraut wrote:
 On 12/6/16 11:58 AM, Peter Eisentraut wrote:
> On 12/5/16 6:24 PM, Petr Jelinek wrote:
>> I think that the removal of changes to ReplicationSlotAcquire() that you
>> did will result in making it impossible to reacquire temporary slot once
>> you switched to different one in the session as the if (active_pid != 0)
>> will always be true for temp slot.
>
> I see.  I suppose it's difficult to get a test case for this.

 I created a test case, saw the error of my ways, and added your code
 back in.  Patch attached.

>>>
>>> Hi,
>>>
>>> I am happy with this version, thanks for moving it forward.
>>
>> committed
> 
> Hm.
> 
>  /*
> + * Cleanup all temporary slots created in current session.
> + */
> +void
> +ReplicationSlotCleanup()
> 
> I'd rather see a (void) there. The prototype has it, but still.
> 
> 
> +
> + /*
> +  * No need for locking as we are only interested in slots active in
> +  * current process and those are not touched by other processes.
> 
> I'm a bit suspicious of this claim.  Without a memory barrier you could
> actually look at outdated versions of active_pid. In practice there's
> enough full memory barriers in the slot creation code that it's
> guaranteed to not be the same pid from before a wraparound though.
> 
> I think that doing iterations of slots without
> ReplicationSlotControlLock makes things more fragile, because suddenly
> assumptions that previously held aren't true anymore.   E.g. factually
>   /*
>* The slot is definitely gone.  Lock out concurrent scans of the array
>* long enough to kill it.  It's OK to clear the active flag here 
> without
>* grabbing the mutex because nobody else can be scanning the array 
> here,
>* and nobody can be attached to this slot and thus access it without
>* scanning the array.
>*/
> is now simply not true anymore.  It's probably not harmfully broken, but
> at least you've changed the locking protocol without adapting comments.
> 
> 

Any thoughts on attached? Yes it does repeated scans which can in theory
be slow but as I explained in the comment, in practice there is not much
need to have many temporary slots active within single session so it
should not be big issue.

I am not quite convinced that all the locking is necessary from the
current logic perspective TBH but it should help prevent mistakes by
whoever changes things in slot.c next.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
From 3ddeac5eb01da1642a6c7eb9a290a3ded08045dd Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Thu, 15 Dec 2016 09:43:17 +0100
Subject: [PATCH 2/2] Improve behavior of ReplicationSlotCleanup()

Make sure we have slot locked properly for modification everywhere and
also cleanup MyReplicationSlot so to reduce code duplication.
---
 src/backend/replication/slot.c  | 58 -
 src/backend/replication/walsender.c |  3 --
 src/backend/storage/lmgr/proc.c |  6 +---
 src/backend/tcop/postgres.c |  4 ---
 4 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index d8ed005..ed50e49 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -412,30 +412,60 @@ ReplicationSlotRelease(void)
 }
 
 /*
- * Cleanup all temporary slots created in current session.
+ * Search the replication slot list for temporary slot owned by current
+ * session and return it. Returns NULL if not found.
  */
-void
-ReplicationSlotCleanup()
+static ReplicationSlot *
+FindMyNextTempSlot(void)
 {
-	int			i;
-
-	Assert(MyReplicationSlot == NULL);
+	int	i;
+	ReplicationSlot	   *slot;
 
-	/*
-	 * No need for locking as we are only interested in slots active in
-	 * current process and those are not touched by other processes.
-	 */
+	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
 	for (i = 0; i < max_replication_slots; i++)
 	{
-		ReplicationSlot *s = >replication_slots[i];
+		slot = >replication_slots[i];
 
-		if (s->active_pid == MyProcPid)
+		SpinLockAcquire(>mutex);
+		if (slot->active_pid == MyProcPid)
 		{
-			Assert(s->in_use && s->data.persistency == RS_TEMPORARY);
+			Assert(slot->in_use && slot->data.persistency == RS_TEMPORARY);
 
-			ReplicationSlotDropPtr(s);
+			SpinLockRelease(>mutex);
+			LWLockRelease(ReplicationSlotControlLock);
+			return slot;
 		}
+		else
+			SpinLockRelease(>mutex);
 	}
+	LWLockRelease(ReplicationSlotControlLock);
+
+	return NULL;
+}
+
+/*
+ * Cleanup all any slot state we might have. This includes releasing any
+ * active replication slot in current session and dropping all temporary
+ * slots created in 

Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Petr Jelinek
On 13/12/16 21:42, Peter Eisentraut wrote:
> On 12/10/16 2:48 AM, Petr Jelinek wrote:
>> Attached new version with your updates and rebased on top of the current
>> HEAD (the partitioning patch produced quite a few conflicts).
> 
> I have attached a few more "fixup" patches, mostly with some editing of
> documentation and comments and some compiler warnings.
> 
> In 0006 in the protocol documentation I have left a "XXX ???" where I
> didn't understand what it was trying to say.
> 

Okay I'll address that separately, thanks.

> All issues from (my) previous reviews appear to have been addressed.
> 
> Comments besides that:
> 
> 
> 0003-Add-SUBSCRIPTION-catalog-and-DDL-v12.patch
> 
> Still wondering about the best workflow with pg_dump, but it seems all
> the pieces are there right now, and the interfaces can be tweaked later.

Right, either way there needs to be some special handling for
subscriptions, having to request them specifically seems safest option
to me, but I am open to suggestions there.

> 
> DROP SUBSCRIPTION requires superuser, but should perhaps be owner check
> only?
> 

Hmm not sure that it requires superuser, I actually think it mistakenly
didn't require anything. In any case will make sure it just does owner
check.

> DROP SUBSCRIPTION IF EXISTS crashes if the subscription does not in fact
> exist.
>

Right, missing return.

> Maybe write the grammar so that SLOT does not need to be a new key word.
>  The changes you made for CREATE PUBLICATION should allow that.
> 

Hmm how would that look like? The opt_drop_slot would become IDENT
IDENT? Or maybe you want me to add the WITH (definition) kind of thing?

> The tests are not added to serial_schedule.  Intentional?  If so, document?
> 

Not intentional, will fix. Never use it, easy to forget about it.

> 
> 0004-Define-logical-replication-protocol-and-output-plugi-v12.patch
> 
> Not sure why pg_catalog is encoded as a zero-length string.  I guess it
> saves some space.  Maybe that could be explained in a brief code comment?
> 

Yes it's to save space, mainly for built-in types.

> 
> 0005-Add-logical-replication-workers-v12.patch
> 
> The way the executor stuff is organized now looks better to me.
> 
> The subscriber crashes if max_replication_slots is 0:
> 
> TRAP: FailedAssertion("!(max_replication_slots > 0)", File: "origin.c",
> Line: 999)
> 
> The documentation says that replication slots are required on the
> subscriber, but from a user's perspective, it's not clear why that is.

Yeah honestly I think origins should not depend on
max_replication_slots. They are not really connected (you can have many
of one and none of the other and vice versa). Also max_replication_slots
should IMHO default to max_wal_senders at this point. (In ideal world
all of those 3 would be in DSM instead of SHM and only governed by some
implementation maximum which is probably 2^16 and the GUCs would be removed)

But yes as it is, we should check for that, probably both during CREATE
SUBSCRIPTION and during apply start.

> 
> Dropping a table that is part of a live subscription results in log
> messages like
> 
> WARNING:  leaked hash_seq_search scan for hash table 0x7f9d2a807238
> 
> I was testing replicating into a temporary table, which failed like this:
> 
> FATAL:  the logical replication target public.test1 not found
> LOG:  worker process:  (PID 2879) exited with exit code 1
> LOG:  starting logical replication worker for subscription 16392
> LOG:  logical replication apply for subscription mysub started
> 
> That's okay, but those messages were repeated every few seconds or so
> and would create quite some log volume.  I wonder if that needs to be
> reigned in somewhat.

It retries every 5s or so I think, I am not sure how that could be
improved besides using the wal_retrieve_retry_interval instead of
hardcoded 5s (or maybe better add GUC for apply). Maybe some kind of
backoff algorithm could be added as well.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Petr Jelinek
On 13/12/16 22:05, Andres Freund wrote:
> Hi,
> 
> On 2016-12-13 15:42:17 -0500, Peter Eisentraut wrote:
>> I think this is getting very close to the point where it's committable.
>> So if anyone else has major concerns about the whole approach and
>> perhaps the way the new code in 0005 is organized, now would be the time ...
> 
> Uh. The whole cache invalidation thing is completely unresolved, and
> that's just the publication patch. I've not looked in detail at later
> patches.  So no, I don't think so.
> 

I already have code for that. I'll submit next version once I'll go over
PeterE's review. BTW the relcache thing is not as bad as it seems from
the publication patch because output plugin has to deal with
relcache/publication cache invalidations, it handles most of the updates
correctly. But there was still problem in terms of the write filtering
so the publications still have to reset relcache too.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Petr Jelinek
On 14/12/16 01:26, Peter Eisentraut wrote:
> On 12/12/16 7:33 PM, Andres Freund wrote:
>> +-- test switching between slots in a session
>> +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot1', 
>> 'test_decoding', true);
>> +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot2', 
>> 'test_decoding', true);
>> +SELECT * FROM pg_logical_slot_get_changes('regression_slot1', NULL, NULL);
>> +SELECT * FROM pg_logical_slot_get_changes('regression_slot2', NULL, NULL);
>>
>> Can we actually output something? Right now this doesn't test that
>> much...
> 
> This test was added because an earlier version of the patch would crash
> on this.
> 

I did improve the test as part of the tests improvements that were sent
to committers list btw.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Peter Eisentraut
On 12/12/16 7:33 PM, Andres Freund wrote:
> +-- test switching between slots in a session
> +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot1', 
> 'test_decoding', true);
> +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot2', 
> 'test_decoding', true);
> +SELECT * FROM pg_logical_slot_get_changes('regression_slot1', NULL, NULL);
> +SELECT * FROM pg_logical_slot_get_changes('regression_slot2', NULL, NULL);
> 
> Can we actually output something? Right now this doesn't test that
> much...

This test was added because an earlier version of the patch would crash
on this.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Andres Freund
On 2016-12-13 06:55:31 +0100, Petr Jelinek wrote:
> >> This is a quadratic algorithm - that could bite us... Not sure if we
> >> need to care.  If we want to fix it, one approach owuld be to use
> >> RangeVarGetRelid() instead, and then do a qsort/deduplicate before
> >> actually opening the relations.
> >>
> > 
> > I guess it could get really slow only with big inheritance tree, I'll
> > look into how much work is the other way of doing things (this is not
> > exactly hot code path).
> > 
> 
> Actually looking at it, it only processes user input so I don't think
> it's very problematic in terms of performance. You'd have to pass many
> thousands of tables in single DDL to notice.

Well, at least we should put a CHECK_FOR_INTERRUPTS there. At the moment
it's IIRC uninterruptible, which isn't good for something directly
triggered by the user.  A comment that it's known to be O(n^2), but
considered acceptable, would be good too.

Andres


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Andres Freund
Hi,

On 2016-12-13 15:42:17 -0500, Peter Eisentraut wrote:
> I think this is getting very close to the point where it's committable.
> So if anyone else has major concerns about the whole approach and
> perhaps the way the new code in 0005 is organized, now would be the time ...

Uh. The whole cache invalidation thing is completely unresolved, and
that's just the publication patch. I've not looked in detail at later
patches.  So no, I don't think so.

I think after the invalidation issue is resolved the publication patch
might be close to being ready. I'm doubtful the later patches are.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-13 Thread Peter Eisentraut
On 12/10/16 2:48 AM, Petr Jelinek wrote:
> Attached new version with your updates and rebased on top of the current
> HEAD (the partitioning patch produced quite a few conflicts).

I have attached a few more "fixup" patches, mostly with some editing of
documentation and comments and some compiler warnings.

In 0006 in the protocol documentation I have left a "XXX ???" where I
didn't understand what it was trying to say.

All issues from (my) previous reviews appear to have been addressed.

Comments besides that:


0003-Add-SUBSCRIPTION-catalog-and-DDL-v12.patch

Still wondering about the best workflow with pg_dump, but it seems all
the pieces are there right now, and the interfaces can be tweaked later.

DROP SUBSCRIPTION requires superuser, but should perhaps be owner check
only?

DROP SUBSCRIPTION IF EXISTS crashes if the subscription does not in fact
exist.

Maybe write the grammar so that SLOT does not need to be a new key word.
 The changes you made for CREATE PUBLICATION should allow that.

The tests are not added to serial_schedule.  Intentional?  If so, document?


0004-Define-logical-replication-protocol-and-output-plugi-v12.patch

Not sure why pg_catalog is encoded as a zero-length string.  I guess it
saves some space.  Maybe that could be explained in a brief code comment?


0005-Add-logical-replication-workers-v12.patch

The way the executor stuff is organized now looks better to me.

The subscriber crashes if max_replication_slots is 0:

TRAP: FailedAssertion("!(max_replication_slots > 0)", File: "origin.c",
Line: 999)

The documentation says that replication slots are required on the
subscriber, but from a user's perspective, it's not clear why that is.

Dropping a table that is part of a live subscription results in log
messages like

WARNING:  leaked hash_seq_search scan for hash table 0x7f9d2a807238

I was testing replicating into a temporary table, which failed like this:

FATAL:  the logical replication target public.test1 not found
LOG:  worker process:  (PID 2879) exited with exit code 1
LOG:  starting logical replication worker for subscription 16392
LOG:  logical replication apply for subscription mysub started

That's okay, but those messages were repeated every few seconds or so
and would create quite some log volume.  I wonder if that needs to be
reigned in somewhat.


I think this is getting very close to the point where it's committable.
So if anyone else has major concerns about the whole approach and
perhaps the way the new code in 0005 is organized, now would be the time ...

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From df169d3fec6d67c3daf301d9ed9903545358dd7e Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Mon, 12 Dec 2016 12:00:00 -0500
Subject: [PATCH 2/8] fixup! Add PUBLICATION catalogs and DDL

---
 src/backend/commands/publicationcmds.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 954b2bd65d..f1d7404ffe 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -389,7 +389,6 @@ AlterPublication(AlterPublicationStmt *stmt)
 {
 	Relation		rel;
 	HeapTuple		tup;
-	AclResult		aclresult;
 
 	rel = heap_open(PublicationRelationId, RowExclusiveLock);
 
@@ -564,12 +563,11 @@ PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
 	foreach(lc, rels)
 	{
 		Relation	rel = (Relation) lfirst(lc);
-		AclResult	aclresult;
 		ObjectAddress	obj;
 
 		/* Must be owner of the table or superuser. */
 		if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
-			aclcheck_error(aclresult, ACL_KIND_CLASS,
+			aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
 		   RelationGetRelationName(rel));
 
 		obj = publication_add_relation(pubid, rel, if_not_exists);
-- 
2.11.0

From 2cab724505487663b53f8af09c8e16a70c52014d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Fri, 9 Dec 2016 12:00:00 -0500
Subject: [PATCH 4/8] fixup! Add SUBSCRIPTION catalog and DDL

---
 doc/src/sgml/catalogs.sgml | 28 ++---
 doc/src/sgml/ref/alter_subscription.sgml   | 25 +---
 doc/src/sgml/ref/create_subscription.sgml  | 47 ++
 doc/src/sgml/ref/drop_subscription.sgml| 23 ++-
 doc/src/sgml/ref/psql-ref.sgml |  6 +--
 src/backend/catalog/pg_subscription.c  |  2 +-
 src/backend/commands/subscriptioncmds.c|  6 +--
 src/backend/parser/gram.y  |  2 +-
 .../libpqwalreceiver/libpqwalreceiver.c|  2 +-
 src/backend/tcop/utility.c |  5 +++
 src/bin/pg_dump/pg_dump.c  |  7 +---
 src/test/regress/parallel_schedule |  2 +-
 12 files changed, 75 insertions(+), 80 deletions(-)

diff --git 

Re: [HACKERS] Logical Replication WIP

2016-12-12 Thread Petr Jelinek
On 13/12/16 03:26, Petr Jelinek wrote:
> On 13/12/16 02:41, Andres Freund wrote:
>> On 2016-12-10 08:48:55 +0100, Petr Jelinek wrote: 
>>
>>> +static List *
>>> +OpenTableList(List *tables)
>>> +{
>>> +   List   *relids = NIL;
>>> +   List   *rels = NIL;
>>> +   ListCell   *lc;
>>> +
>>> +   /*
>>> +* Open, share-lock, and check all the explicitly-specified relations
>>> +*/
>>> +   foreach(lc, tables)
>>> +   {
>>> +   RangeVar   *rv = lfirst(lc);
>>> +   Relationrel;
>>> +   boolrecurse = interpretInhOption(rv->inhOpt);
>>> +   Oid myrelid;
>>> +
>>> +   rel = heap_openrv(rv, ShareUpdateExclusiveLock);
>>> +   myrelid = RelationGetRelid(rel);
>>> +   /* filter out duplicates when user specifies "foo, foo" */
>>> +   if (list_member_oid(relids, myrelid))
>>> +   {
>>> +   heap_close(rel, ShareUpdateExclusiveLock);
>>> +   continue;
>>> +   }
>>
>> This is a quadratic algorithm - that could bite us... Not sure if we
>> need to care.  If we want to fix it, one approach owuld be to use
>> RangeVarGetRelid() instead, and then do a qsort/deduplicate before
>> actually opening the relations.
>>
> 
> I guess it could get really slow only with big inheritance tree, I'll
> look into how much work is the other way of doing things (this is not
> exactly hot code path).
> 

Actually looking at it, it only processes user input so I don't think
it's very problematic in terms of performance. You'd have to pass many
thousands of tables in single DDL to notice.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-12 Thread Petr Jelinek
On 13/12/16 02:41, Andres Freund wrote:
> On 2016-12-10 08:48:55 +0100, Petr Jelinek wrote:
> 
>> diff --git a/src/backend/catalog/pg_publication.c 
>> b/src/backend/catalog/pg_publication.c
>> new file mode 100644
>> index 000..e3560b7
>> --- /dev/null
>> +++ b/src/backend/catalog/pg_publication.c
>> +
>> +Datum pg_get_publication_tables(PG_FUNCTION_ARGS);
> 
> Don't we usually put these in a header?
>

We put these to rather random places, I don't mind either way.

> 
>> +/*
>> + * Gets list of relation oids for a publication.
>> + *
>> + * This should only be used for normal publications, the FOR ALL TABLES
>> + * should use GetAllTablesPublicationRelations().
>> + */
>> +List *
>> +GetPublicationRelations(Oid pubid)
>> +{
>> +List   *result;
>> +Relationpubrelsrel;
>> +ScanKeyData scankey;
>> +SysScanDesc scan;
>> +HeapTuple   tup;
>> +
>> +/* Find all publications associated with the relation. */
>> +pubrelsrel = heap_open(PublicationRelRelationId, AccessShareLock);
>> +
>> +ScanKeyInit(,
>> +Anum_pg_publication_rel_prpubid,
>> +BTEqualStrategyNumber, F_OIDEQ,
>> +ObjectIdGetDatum(pubid));
>> +
>> +scan = systable_beginscan(pubrelsrel, PublicationRelMapIndexId, true,
>> +  NULL, 1, );
>> +
>> +result = NIL;
>> +while (HeapTupleIsValid(tup = systable_getnext(scan)))
>> +{
>> +Form_pg_publication_rel pubrel;
>> +
>> +pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
>> +
>> +result = lappend_oid(result, pubrel->prrelid);
>> +}
>> +
>> +systable_endscan(scan);
>> +heap_close(pubrelsrel, NoLock);
> 
> In other parts of this you drop the lock, but not here?
> 
> 
>> +heap_close(rel, NoLock);
>> +
>> +return result;
>> +}
> 
> and here.
> 

Meh, ignore, that's some pglogical legacy.


> 
> Btw, why are matviews not publishable?
> 

Because standard way of updating them is REFRESH MATERIALIZED VIEW which
is decoded as inserts into pg_temp_ table. I think we'll have to
rethink how we do this before we can sanely support them.

>> +/*
>> + * Get Publication using name.
>> + */
>> +Publication *
>> +GetPublicationByName(const char *pubname, bool missing_ok)
>> +{
>> +Oid oid;
>> +
>> +oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname));
>> +if (!OidIsValid(oid))
>> +{
>> +if (missing_ok)
>> +return NULL;
>> +
>> +ereport(ERROR,
>> +(errcode(ERRCODE_UNDEFINED_OBJECT),
>> + errmsg("publication \"%s\" does not exist", 
>> pubname)));
>> +}
>> +
>> +return GetPublication(oid);
>> +}
> 
> That's racy... Also, shouldn't we specify for how to deal with the
> returned memory for Publication * returning methods?
> 

So are most of the other existing functions with similar purpose. The
worst case is that with enough concurrency around same publication name
DDL you'll get cache lookup failure.

I added comment to GetPublication saying that memory is palloced.

> 
>> diff --git a/src/backend/commands/publicationcmds.c 
>> b/src/backend/commands/publicationcmds.c
>> new file mode 100644
>> index 000..954b2bd
>> --- /dev/null
>> +++ b/src/backend/commands/publicationcmds.c
>> @@ -0,0 +1,613 @@
> 
>> +/*
>> + * Create new publication.
>> + */
>> +ObjectAddress
>> +CreatePublication(CreatePublicationStmt *stmt)
>> +{
>> +Relationrel;
> 
>> +
>> +values[Anum_pg_publication_puballtables - 1] =
>> +BoolGetDatum(stmt->for_all_tables);
>> +values[Anum_pg_publication_pubinsert - 1] =
>> +BoolGetDatum(publish_insert);
>> +values[Anum_pg_publication_pubupdate - 1] =
>> +BoolGetDatum(publish_update);
>> +values[Anum_pg_publication_pubdelete - 1] =
>> +BoolGetDatum(publish_delete);
> 
> I remain convinced that a different representation would be
> better. There'll be more options over time (truncate, DDL at least).
> 

So? It's boolean properties, it's not like we store bitmaps in catalogs
much. I very much expect DDL to be much more complex than boolean btw.

> 
>> +static void
>> +AlterPublicationOptions(AlterPublicationStmt *stmt, Relation rel,
>> +   HeapTuple tup)
>> +{
>> +boolpublish_insert_given;
>> +boolpublish_update_given;
>> +boolpublish_delete_given;
>> +boolpublish_insert;
>> +boolpublish_update;
>> +boolpublish_delete;
>> +ObjectAddress   obj;
>> +
>> +parse_publication_options(stmt->options,
>> +  
>> _insert_given, _insert,
>> + 

Re: [HACKERS] Logical Replication WIP

2016-12-12 Thread Andres Freund
On 2016-12-10 08:48:55 +0100, Petr Jelinek wrote:

> diff --git a/src/backend/catalog/pg_publication.c 
> b/src/backend/catalog/pg_publication.c
> new file mode 100644
> index 000..e3560b7
> --- /dev/null
> +++ b/src/backend/catalog/pg_publication.c
> +
> +Datum pg_get_publication_tables(PG_FUNCTION_ARGS);

Don't we usually put these in a header?

> +/*
> + * Insert new publication / relation mapping.
> + */
> +ObjectAddress
> +publication_add_relation(Oid pubid, Relation targetrel,
> +  bool if_not_exists)
> +{
> + Relationrel;
> + HeapTuple   tup;
> + Datum   values[Natts_pg_publication_rel];
> + boolnulls[Natts_pg_publication_rel];
> + Oid relid = RelationGetRelid(targetrel);
> + Oid prrelid;
> + Publication *pub = GetPublication(pubid);
> + ObjectAddress   myself,
> + referenced;
> +
> + rel = heap_open(PublicationRelRelationId, RowExclusiveLock);
> +
> + /* Check for duplicates */

Maybe mention that that check is racy, but a unique index protects
against the race?


> + /* Insert tuple into catalog. */
> + prrelid = simple_heap_insert(rel, tup);
> + CatalogUpdateIndexes(rel, tup);
> + heap_freetuple(tup);
> +
> + ObjectAddressSet(myself, PublicationRelRelationId, prrelid);
> +
> + /* Add dependency on the publication */
> + ObjectAddressSet(referenced, PublicationRelationId, pubid);
> + recordDependencyOn(, , DEPENDENCY_AUTO);
> +
> + /* Add dependency on the relation */
> + ObjectAddressSet(referenced, RelationRelationId, relid);
> + recordDependencyOn(, , DEPENDENCY_AUTO);
> +
> + /* Close the table. */
> + heap_close(rel, RowExclusiveLock);

I'm not quite sure abou the policy, but shouldn't we invoke
InvokeObjectPostCreateHook etc here?


> +/*
> + * Gets list of relation oids for a publication.
> + *
> + * This should only be used for normal publications, the FOR ALL TABLES
> + * should use GetAllTablesPublicationRelations().
> + */
> +List *
> +GetPublicationRelations(Oid pubid)
> +{
> + List   *result;
> + Relationpubrelsrel;
> + ScanKeyData scankey;
> + SysScanDesc scan;
> + HeapTuple   tup;
> +
> + /* Find all publications associated with the relation. */
> + pubrelsrel = heap_open(PublicationRelRelationId, AccessShareLock);
> +
> + ScanKeyInit(,
> + Anum_pg_publication_rel_prpubid,
> + BTEqualStrategyNumber, F_OIDEQ,
> + ObjectIdGetDatum(pubid));
> +
> + scan = systable_beginscan(pubrelsrel, PublicationRelMapIndexId, true,
> +   NULL, 1, );
> +
> + result = NIL;
> + while (HeapTupleIsValid(tup = systable_getnext(scan)))
> + {
> + Form_pg_publication_rel pubrel;
> +
> + pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
> +
> + result = lappend_oid(result, pubrel->prrelid);
> + }
> +
> + systable_endscan(scan);
> + heap_close(pubrelsrel, NoLock);

In other parts of this you drop the lock, but not here?


> + heap_close(rel, NoLock);
> +
> + return result;
> +}

and here.


> +/*
> + * Gets list of all relation published by FOR ALL TABLES publication(s).
> + */
> +List *
> +GetAllTablesPublicationRelations(void)
> +{
> + RelationclassRel;
> + ScanKeyData key[1];
> + HeapScanDesc scan;
> + HeapTuple   tuple;
> + List   *result = NIL;
> +
> + classRel = heap_open(RelationRelationId, AccessShareLock);

> + heap_endscan(scan);
> + heap_close(classRel, AccessShareLock);
> +
> + return result;
> +}

but here.


Btw, why are matviews not publishable?

> +/*
> + * Get Publication using name.
> + */
> +Publication *
> +GetPublicationByName(const char *pubname, bool missing_ok)
> +{
> + Oid oid;
> +
> + oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname));
> + if (!OidIsValid(oid))
> + {
> + if (missing_ok)
> + return NULL;
> +
> + ereport(ERROR,
> + (errcode(ERRCODE_UNDEFINED_OBJECT),
> +  errmsg("publication \"%s\" does not exist", 
> pubname)));
> + }
> +
> + return GetPublication(oid);
> +}

That's racy... Also, shouldn't we specify for how to deal with the
returned memory for Publication * returning methods?



> diff --git a/src/backend/commands/publicationcmds.c 
> b/src/backend/commands/publicationcmds.c
> new file mode 100644
> index 000..954b2bd
> --- /dev/null
> +++ b/src/backend/commands/publicationcmds.c
> @@ -0,0 +1,613 @@

> +/*
> + * Create new publication.
> + */
> +ObjectAddress
> +CreatePublication(CreatePublicationStmt *stmt)

Re: [HACKERS] Logical Replication WIP

2016-12-12 Thread Petr Jelinek
On 13/12/16 01:33, Andres Freund wrote:
> HJi,
> 
> On 2016-12-12 09:18:48 -0500, Peter Eisentraut wrote:
>> On 12/8/16 4:10 PM, Petr Jelinek wrote:
>>> On 08/12/16 20:16, Peter Eisentraut wrote:
 On 12/6/16 11:58 AM, Peter Eisentraut wrote:
> On 12/5/16 6:24 PM, Petr Jelinek wrote:
>> I think that the removal of changes to ReplicationSlotAcquire() that you
>> did will result in making it impossible to reacquire temporary slot once
>> you switched to different one in the session as the if (active_pid != 0)
>> will always be true for temp slot.
>
> I see.  I suppose it's difficult to get a test case for this.

 I created a test case, saw the error of my ways, and added your code
 back in.  Patch attached.

>>>
>>> Hi,
>>>
>>> I am happy with this version, thanks for moving it forward.
>>
>> committed
> 
> Hm.
> 
>  /*
> + * Cleanup all temporary slots created in current session.
> + */
> +void
> +ReplicationSlotCleanup()
> 
> I'd rather see a (void) there. The prototype has it, but still.
> 
> 
> +
> + /*
> +  * No need for locking as we are only interested in slots active in
> +  * current process and those are not touched by other processes.
> 
> I'm a bit suspicious of this claim.  Without a memory barrier you could
> actually look at outdated versions of active_pid. In practice there's
> enough full memory barriers in the slot creation code that it's
> guaranteed to not be the same pid from before a wraparound though.
> 
> I think that doing iterations of slots without
> ReplicationSlotControlLock makes things more fragile, because suddenly
> assumptions that previously held aren't true anymore.   E.g. factually
>   /*
>* The slot is definitely gone.  Lock out concurrent scans of the array
>* long enough to kill it.  It's OK to clear the active flag here 
> without
>* grabbing the mutex because nobody else can be scanning the array 
> here,
>* and nobody can be attached to this slot and thus access it without
>* scanning the array.
>*/
> is now simply not true anymore.  It's probably not harmfully broken, but
> at least you've changed the locking protocol without adapting comments.
> 

Well it's protected by being called only by ReplicationSlotCleanup() and
ReplicationSlotDropAcquired(). The comment could be improved though, yes.

Holding the ReplicationSlotControlLock in the scan is somewhat
problematic because ReplicationSlotDropPtr tryes to use it as well (and
in exclusive mode), so we'd have to do exclusive lock in
ReplicationSlotCleanup() which I don't really like much.

> 
>  /*
> - * Permanently drop the currently acquired replication slot which will be
> - * released by the point this function returns.
> + * Permanently drop the currently acquired replication slot.
>   */
>  static void
>  ReplicationSlotDropAcquired(void)
> 
> Isn't that actually removing interesting information? Yes, the comment's
> been moved to ReplicationSlotDropPtr(), but that routine is an internal
> one...
> 

ReplicationSlotDropAcquired() is internal one as well.

> 
> @@ -810,6 +810,9 @@ ProcKill(int code, Datum arg)
>   if (MyReplicationSlot != NULL)
>   ReplicationSlotRelease();
> 
> + /* Also cleanup all the temporary slots. */
> + ReplicationSlotCleanup();
> +
> 
> So we now have exactly this code in several places. Why does a
> generically named Cleanup routine not also deal with a currently
> acquired slot? Right now it'd be more appropriately named
> ReplicationSlotDropTemporary() or such.
> 

It definitely could release MyReplicationSlot as well.

> 
> @@ -1427,13 +1427,14 @@ pg_replication_slots| SELECT l.slot_name,
>  l.slot_type,
>  l.datoid,
>  d.datname AS database,
> +l.temporary,
>  l.active,
>  l.active_pid,
>  l.xmin,
>  l.catalog_xmin,
>  l.restart_lsn,
>  l.confirmed_flush_lsn
> -   FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, 
> active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn)
> +   FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, 
> temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, 
> confirmed_flush_lsn)
>   LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
>  pg_roles| SELECT pg_authid.rolname,
>  pg_authid.rolsuper,
> 
> If we start to expose this, shouldn't we expose the persistency instead
> (i.e. persistent/ephemeral/temporary)?
> 

Not sure how much is that useful given that ephemeral is transient state
only present during slot creation.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-12 Thread Andres Freund
HJi,

On 2016-12-12 09:18:48 -0500, Peter Eisentraut wrote:
> On 12/8/16 4:10 PM, Petr Jelinek wrote:
> > On 08/12/16 20:16, Peter Eisentraut wrote:
> >> On 12/6/16 11:58 AM, Peter Eisentraut wrote:
> >>> On 12/5/16 6:24 PM, Petr Jelinek wrote:
>  I think that the removal of changes to ReplicationSlotAcquire() that you
>  did will result in making it impossible to reacquire temporary slot once
>  you switched to different one in the session as the if (active_pid != 0)
>  will always be true for temp slot.
> >>>
> >>> I see.  I suppose it's difficult to get a test case for this.
> >>
> >> I created a test case, saw the error of my ways, and added your code
> >> back in.  Patch attached.
> >>
> >
> > Hi,
> >
> > I am happy with this version, thanks for moving it forward.
>
> committed

Hm.

 /*
+ * Cleanup all temporary slots created in current session.
+ */
+void
+ReplicationSlotCleanup()

I'd rather see a (void) there. The prototype has it, but still.


+
+   /*
+* No need for locking as we are only interested in slots active in
+* current process and those are not touched by other processes.

I'm a bit suspicious of this claim.  Without a memory barrier you could
actually look at outdated versions of active_pid. In practice there's
enough full memory barriers in the slot creation code that it's
guaranteed to not be the same pid from before a wraparound though.

I think that doing iterations of slots without
ReplicationSlotControlLock makes things more fragile, because suddenly
assumptions that previously held aren't true anymore.   E.g. factually
/*
 * The slot is definitely gone.  Lock out concurrent scans of the array
 * long enough to kill it.  It's OK to clear the active flag here 
without
 * grabbing the mutex because nobody else can be scanning the array 
here,
 * and nobody can be attached to this slot and thus access it without
 * scanning the array.
 */
is now simply not true anymore.  It's probably not harmfully broken, but
at least you've changed the locking protocol without adapting comments.


 /*
- * Permanently drop the currently acquired replication slot which will be
- * released by the point this function returns.
+ * Permanently drop the currently acquired replication slot.
  */
 static void
 ReplicationSlotDropAcquired(void)

Isn't that actually removing interesting information? Yes, the comment's
been moved to ReplicationSlotDropPtr(), but that routine is an internal
one...


@@ -810,6 +810,9 @@ ProcKill(int code, Datum arg)
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();

+   /* Also cleanup all the temporary slots. */
+   ReplicationSlotCleanup();
+

So we now have exactly this code in several places. Why does a
generically named Cleanup routine not also deal with a currently
acquired slot? Right now it'd be more appropriately named
ReplicationSlotDropTemporary() or such.


@@ -1427,13 +1427,14 @@ pg_replication_slots| SELECT l.slot_name,
 l.slot_type,
 l.datoid,
 d.datname AS database,
+l.temporary,
 l.active,
 l.active_pid,
 l.xmin,
 l.catalog_xmin,
 l.restart_lsn,
 l.confirmed_flush_lsn
-   FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, 
active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn)
+   FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, 
temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, 
confirmed_flush_lsn)
  LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
 pg_roles| SELECT pg_authid.rolname,
 pg_authid.rolsuper,

If we start to expose this, shouldn't we expose the persistency instead
(i.e. persistent/ephemeral/temporary)?


new file   contrib/test_decoding/sql/slot.sql
@@ -0,0 +1,20 @@
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 
'test_decoding');
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 
'test_decoding', true);
+
+SELECT pg_drop_replication_slot('regression_slot_p');
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 
'test_decoding', false);
+
+-- reconnect to clean temp slots
+\c

Can we add multiple slots to clean up here? Can we also add a test for
the cleanup on error for temporary slots? E.g. something like in
ddl.sql (maybe we should actually move some of the relevant tests from
there to here).

It'd also be good to test this with physical slots?


+-- test switching between slots in a session
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot1', 
'test_decoding', true);
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot2', 
'test_decoding', true);
+SELECT * FROM pg_logical_slot_get_changes('regression_slot1', NULL, NULL);
+SELECT * FROM pg_logical_slot_get_changes('regression_slot2', NULL, NULL);

Can we actually output something? Right now this doesn't test that

Re: [HACKERS] Logical Replication WIP

2016-12-12 Thread Peter Eisentraut
On 12/8/16 4:10 PM, Petr Jelinek wrote:
> On 08/12/16 20:16, Peter Eisentraut wrote:
>> On 12/6/16 11:58 AM, Peter Eisentraut wrote:
>>> On 12/5/16 6:24 PM, Petr Jelinek wrote:
 I think that the removal of changes to ReplicationSlotAcquire() that you
 did will result in making it impossible to reacquire temporary slot once
 you switched to different one in the session as the if (active_pid != 0)
 will always be true for temp slot.
>>>
>>> I see.  I suppose it's difficult to get a test case for this.
>>
>> I created a test case, saw the error of my ways, and added your code
>> back in.  Patch attached.
>>
> 
> Hi,
> 
> I am happy with this version, thanks for moving it forward.

committed

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-09 Thread Petr Jelinek
Hi,

On 09/12/16 22:00, Erik Rijkers wrote:
> On 2016-12-09 17:08, Peter Eisentraut wrote:
> 
> Your earlier 0001-Add-support-for-temporary-replication-slots.patch
> could be applied instead of the similarly named, original patch by Petr.
> (I used 19fcc0058ecc8e5eb756547006bc1b24a93cbb80 to apply this patch-set
> to)
> 
> (And it was, by the way,  pretty stable and running well.)
> 

Great, thanks for testing.

> I'd like to get it running again but now I can't find a way to also
> include your newer 0001-fixup-Add-PUBLICATION-catalogs-and-DDL.patch of
> today.
> 
> How should these patches be applied (and at what level)?
> 
> 20161208: 0001-Add-support-for-temporary-replication-slots__petere.patch
>  # petere
> 20161202: 0002-Add-PUBLICATION-catalogs-and-DDL-v11.patch  # PJ
> 20161209: 0001-fixup-Add-PUBLICATION-catalogs-and-DDL.patch  # petere
> 20161202: 0003-Add-SUBSCRIPTION-catalog-and-DDL-v11.patch  # PJ
> 20161202:
> 0004-Define-logical-replication-protocol-and-output-plugi-v11.patch  # PJ
> 20161202: 0005-Add-logical-replication-workers-v11.patch  # PJ
> 20161202:
> 0006-Add-separate-synchronous-commit-control-for-logical--v11.patch  # PJ
> 
> Could (one of) you give me a hint?
> 

I just sent in a rebased patch that includes all of it.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-09 Thread Erik Rijkers

On 2016-12-09 17:08, Peter Eisentraut wrote:

Your earlier 0001-Add-support-for-temporary-replication-slots.patch 
could be applied instead of the similarly named, original patch by Petr.
(I used 19fcc0058ecc8e5eb756547006bc1b24a93cbb80 to apply this patch-set 
to)


(And it was, by the way,  pretty stable and running well.)

I'd like to get it running again but now I can't find a way to also 
include your newer 0001-fixup-Add-PUBLICATION-catalogs-and-DDL.patch of 
today.


How should these patches be applied (and at what level)?

20161208: 0001-Add-support-for-temporary-replication-slots__petere.patch 
 # petere

20161202: 0002-Add-PUBLICATION-catalogs-and-DDL-v11.patch  # PJ
20161209: 0001-fixup-Add-PUBLICATION-catalogs-and-DDL.patch  # petere
20161202: 0003-Add-SUBSCRIPTION-catalog-and-DDL-v11.patch  # PJ
20161202: 
0004-Define-logical-replication-protocol-and-output-plugi-v11.patch  # 
PJ

20161202: 0005-Add-logical-replication-workers-v11.patch  # PJ
20161202: 
0006-Add-separate-synchronous-commit-control-for-logical--v11.patch  # 
PJ


Could (one of) you give me a hint?

Thanks,

Erik Rijkers


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-09 Thread Peter Eisentraut
Here is a "fixup" patch for
0002-Add-PUBLICATION-catalogs-and-DDL-v11.patch.gz with some minor fixes.

Two issues that should be addressed:

1. I think ALTER PUBLICATION does not need to require CREATE privilege
on the database.  That should be easy to change.

2. By requiring only SELECT privilege to include a table in a
publication, someone could include a table without replica identity into
a publication and thus prevent updates to the table.

A while ago I had been working on a patch to create a new PUBLICATION
privilege for this purpose.  I have attached the in-progress patch here.
 We could either finish that up and include it, or commit your patch
initially with requiring superuser and then refine the permissions later.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 38aba08eb00ffc0b1f0d52a38864f825435a1694 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Thu, 8 Dec 2016 12:00:00 -0500
Subject: [PATCH] fixup! Add PUBLICATION catalogs and DDL

---
 doc/src/sgml/catalogs.sgml| 35 ++-
 doc/src/sgml/ref/alter_publication.sgml   | 41 +--
 doc/src/sgml/ref/create_publication.sgml  | 46 +--
 doc/src/sgml/ref/drop_publication.sgml|  6 ++--
 doc/src/sgml/ref/psql-ref.sgml|  6 ++--
 src/backend/catalog/Makefile  |  2 +-
 src/backend/catalog/dependency.c  |  4 +--
 src/backend/catalog/objectaddress.c   | 25 +++--
 src/backend/catalog/pg_publication.c  | 27 ++
 src/backend/commands/publicationcmds.c| 12 
 src/backend/commands/tablecmds.c  |  9 +++---
 src/backend/parser/gram.y |  2 +-
 src/backend/utils/cache/relcache.c|  2 +-
 src/backend/utils/cache/syscache.c|  2 +-
 src/bin/pg_dump/pg_dump.c |  3 --
 src/bin/psql/tab-complete.c   |  8 +++---
 src/test/regress/expected/publication.out | 22 +++
 17 files changed, 115 insertions(+), 137 deletions(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index aacd4bcb23..5213aa4f5e 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -5149,12 +5149,11 @@ pg_publication
   
 
   
-   The pg_publication catalog contains
-   all publications created in the database.
+   The catalog pg_publication contains all
+   publications created in the database.
   
 
   
-
pg_publication Columns
 

@@ -5179,7 +5178,7 @@ pg_publication Columns
   pubname
   Name
   
-  A unique, database-wide identifier for the publication.
+  Name of the publication
  
 
  
@@ -5202,26 +5201,25 @@ pg_publication Columns
   pubinsert
   bool
   
-  If true, INSERT operations are replicated for tables in the
-   publication.
+  If true, INSERT operations are replicated for
+   tables in the publication.
  
 
  
   pubupdate
   bool
   
-  If true, UPDATE operations are replicated for tables in the
-   publication.
+  If true, UPDATE operations are replicated for
+   tables in the publication.
  
 
  
   pubdelete
   bool
   
-  If true, DELETE operations are replicated for tables in the
-   publication.
+  If true, DELETE operations are replicated for
+   tables in the publication.
  
-
 

   
@@ -5235,13 +5233,12 @@ pg_publication_rel
   
 
   
-   The pg_publication_rel catalog contains
-   mapping between tables and publications in the database. This is many to
-   many mapping.
+   The catalog pg_publication_rel contains the
+   mapping between relations and publications in the database.  This is a
+   many-to-many mapping.
   
 
   
-
pg_publication_rel Columns
 

@@ -5255,21 +5252,19 @@ pg_publication_rel Columns
 
 
 
-
  
   prpubid
   oid
   pg_publication.oid
-  Publication reference.
+  Reference to publication
  
 
  
   prrelid
-  bool
+  oid
   pg_class.oid
-  Relation reference.
+  Reference to relation
  
-
 

   
diff --git a/doc/src/sgml/ref/alter_publication.sgml b/doc/src/sgml/ref/alter_publication.sgml
index c17666c97f..eb8cb07126 100644
--- a/doc/src/sgml/ref/alter_publication.sgml
+++ b/doc/src/sgml/ref/alter_publication.sgml
@@ -40,21 +40,20 @@ Description
 
   
The first variant of this command listed in the synopsis can change
-   all of the publication attributes specified in
-   .
-   Attributes not mentioned in the command retain their previous settings.
-   Database superusers can change any of these settings for any role.
+   all of the publication properties specified in
+   .  Properties not mentioned in the
+   command retain their previous settings.  Database superusers can change any
+   of these settings for any role.
   
 
   

Re: [HACKERS] Logical Replication WIP

2016-12-08 Thread Petr Jelinek
On 08/12/16 20:16, Peter Eisentraut wrote:
> On 12/6/16 11:58 AM, Peter Eisentraut wrote:
>> On 12/5/16 6:24 PM, Petr Jelinek wrote:
>>> I think that the removal of changes to ReplicationSlotAcquire() that you
>>> did will result in making it impossible to reacquire temporary slot once
>>> you switched to different one in the session as the if (active_pid != 0)
>>> will always be true for temp slot.
>>
>> I see.  I suppose it's difficult to get a test case for this.
> 
> I created a test case, saw the error of my ways, and added your code
> back in.  Patch attached.
> 

Hi,

I am happy with this version, thanks for moving it forward.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-08 Thread Peter Eisentraut
On 12/6/16 11:58 AM, Peter Eisentraut wrote:
> On 12/5/16 6:24 PM, Petr Jelinek wrote:
>> I think that the removal of changes to ReplicationSlotAcquire() that you
>> did will result in making it impossible to reacquire temporary slot once
>> you switched to different one in the session as the if (active_pid != 0)
>> will always be true for temp slot.
> 
> I see.  I suppose it's difficult to get a test case for this.

I created a test case, saw the error of my ways, and added your code
back in.  Patch attached.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From b6c389b5ccd17a4c8a68d429048c2e7db34bfd51 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Thu, 8 Dec 2016 12:00:00 -0500
Subject: [PATCH] Add support for temporary replication slots

This allows creating temporary replication slots that are removed
automatically at the end of the session or on error.
---
 contrib/test_decoding/Makefile  |  2 +-
 contrib/test_decoding/expected/ddl.out  |  4 +-
 contrib/test_decoding/expected/slot.out | 58 +++
 contrib/test_decoding/sql/slot.sql  | 20 ++
 doc/src/sgml/func.sgml  | 16 ++--
 doc/src/sgml/protocol.sgml  | 13 ++-
 src/backend/catalog/system_views.sql| 11 ++
 src/backend/replication/repl_gram.y | 22 +++
 src/backend/replication/repl_scanner.l  |  1 +
 src/backend/replication/slot.c  | 69 ++---
 src/backend/replication/slotfuncs.c | 24 
 src/backend/replication/walsender.c | 28 +++--
 src/backend/storage/lmgr/proc.c |  3 ++
 src/backend/tcop/postgres.c |  3 ++
 src/include/catalog/pg_proc.h   |  6 +--
 src/include/nodes/replnodes.h   |  1 +
 src/include/replication/slot.h  |  4 +-
 src/test/regress/expected/rules.out |  3 +-
 18 files changed, 237 insertions(+), 51 deletions(-)
 create mode 100644 contrib/test_decoding/expected/slot.out
 create mode 100644 contrib/test_decoding/sql/slot.sql

diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index a6641f5040..d2bc8b8350 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -39,7 +39,7 @@ submake-test_decoding:
 
 REGRESSCHECKS=ddl xact rewrite toast permissions decoding_in_xact \
 	decoding_into_rel binary prepared replorigin time messages \
-	spill
+	spill slot
 
 regresscheck: | submake-regress submake-test_decoding temp-install
 	$(MKDIR_P) regression_output
diff --git a/contrib/test_decoding/expected/ddl.out b/contrib/test_decoding/expected/ddl.out
index a9ba615b5b..c104c4802d 100644
--- a/contrib/test_decoding/expected/ddl.out
+++ b/contrib/test_decoding/expected/ddl.out
@@ -702,7 +702,7 @@ SELECT pg_drop_replication_slot('regression_slot');
 
 /* check that the slot is gone */
 SELECT * FROM pg_replication_slots;
- slot_name | plugin | slot_type | datoid | database | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn 
++---++--+++--+--+-+-
+ slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn 
+---++---++--+---+++--+--+-+-
 (0 rows)
 
diff --git a/contrib/test_decoding/expected/slot.out b/contrib/test_decoding/expected/slot.out
new file mode 100644
index 00..5e6b70ba38
--- /dev/null
+++ b/contrib/test_decoding/expected/slot.out
@@ -0,0 +1,58 @@
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding');
+ ?column? 
+--
+ init
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 'test_decoding', true);
+ ?column? 
+--
+ init
+(1 row)
+
+SELECT pg_drop_replication_slot('regression_slot_p');
+ pg_drop_replication_slot 
+--
+ 
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding', false);
+ ?column? 
+--
+ init
+(1 row)
+
+-- reconnect to clean temp slots
+\c
+SELECT pg_drop_replication_slot('regression_slot_p');
+ pg_drop_replication_slot 
+--
+ 
+(1 row)
+
+-- should fail because the temporary slot was dropped automatically
+SELECT pg_drop_replication_slot('regression_slot_t');
+ERROR:  replication slot "regression_slot_t" does not exist
+-- test switching between slots in a session
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot1', 'test_decoding', true);
+ ?column? 
+--
+ init
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot2', 'test_decoding', true);
+ ?column? 
+--
+ init

Re: [HACKERS] Logical Replication WIP

2016-12-06 Thread Peter Eisentraut
On 12/5/16 6:24 PM, Petr Jelinek wrote:
> I think that the removal of changes to ReplicationSlotAcquire() that you
> did will result in making it impossible to reacquire temporary slot once
> you switched to different one in the session as the if (active_pid != 0)
> will always be true for temp slot.

I see.  I suppose it's difficult to get a test case for this.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-05 Thread Andres Freund
On 2016-12-02 12:37:49 -0500, Peter Eisentraut wrote:
> On 11/20/16 1:02 PM, Petr Jelinek wrote:
> > 0001:
> > This is the reworked approach to temporary slots that I sent earlier.
> 
> Andres, you had expressed an interest in this.  Will you be able to
> review it soon?

Yep. Needed to get that WIP stuff about expression evaluation and JITing
out of the door first though.

Regards,

Andres


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-05 Thread Petr Jelinek
On 04/12/16 02:06, Peter Eisentraut wrote:
> I massaged the temporary replication slot patch a bit.  I changed the
> column name in pg_stat_replication_slots from "persistent" to
> "temporary" and flipped the logical sense, so that it is consistent with
> the creation commands.  I also adjusted some comments and removed some
> changes in ReplicationSlotCreate() that didn't seem to do anything
> useful (might have been from a previous patch).
> 
> The attached patch looks good to me.
> 

I think that the removal of changes to ReplicationSlotAcquire() that you
did will result in making it impossible to reacquire temporary slot once
you switched to different one in the session as the if (active_pid != 0)
will always be true for temp slot.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Logical Replication WIP

2016-12-04 Thread Haribabu Kommi
On Sun, Dec 4, 2016 at 12:06 PM, Peter Eisentraut <
peter.eisentr...@2ndquadrant.com> wrote:

> I massaged the temporary replication slot patch a bit.  I changed the
> column name in pg_stat_replication_slots from "persistent" to
> "temporary" and flipped the logical sense, so that it is consistent with
> the creation commands.  I also adjusted some comments and removed some
> changes in ReplicationSlotCreate() that didn't seem to do anything
> useful (might have been from a previous patch).
>
> The attached patch looks good to me.
>

Moved to next CF with "needs review" status.


Regards,
Hari Babu
Fujitsu Australia


Re: [HACKERS] Logical Replication WIP

2016-12-03 Thread Peter Eisentraut
I massaged the temporary replication slot patch a bit.  I changed the
column name in pg_stat_replication_slots from "persistent" to
"temporary" and flipped the logical sense, so that it is consistent with
the creation commands.  I also adjusted some comments and removed some
changes in ReplicationSlotCreate() that didn't seem to do anything
useful (might have been from a previous patch).

The attached patch looks good to me.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From e4a814f7abb86e78daa63bbbd37cb00f2b7d9180 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Sat, 3 Dec 2016 12:00:00 -0500
Subject: [PATCH] Add support for temporary replication slots

This allows creating temporary replication slots that are removed
automatically at the end of the session or on error.
---
 contrib/test_decoding/Makefile  |  2 +-
 contrib/test_decoding/expected/ddl.out  |  4 +--
 contrib/test_decoding/expected/slot.out | 35 ++
 contrib/test_decoding/sql/slot.sql  | 13 +++
 doc/src/sgml/func.sgml  | 16 ++---
 doc/src/sgml/protocol.sgml  | 13 ++-
 src/backend/catalog/system_views.sql| 11 ++
 src/backend/replication/repl_gram.y | 22 
 src/backend/replication/repl_scanner.l  |  1 +
 src/backend/replication/slot.c  | 63 +++--
 src/backend/replication/slotfuncs.c | 24 +
 src/backend/replication/walsender.c | 28 +--
 src/backend/storage/lmgr/proc.c |  3 ++
 src/backend/tcop/postgres.c |  3 ++
 src/include/catalog/pg_proc.h   |  6 ++--
 src/include/nodes/replnodes.h   |  1 +
 src/include/replication/slot.h  |  4 ++-
 src/test/regress/expected/rules.out |  3 +-
 18 files changed, 204 insertions(+), 48 deletions(-)
 create mode 100644 contrib/test_decoding/expected/slot.out
 create mode 100644 contrib/test_decoding/sql/slot.sql

diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index a6641f5..d2bc8b8 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -39,7 +39,7 @@ submake-test_decoding:
 
 REGRESSCHECKS=ddl xact rewrite toast permissions decoding_in_xact \
 	decoding_into_rel binary prepared replorigin time messages \
-	spill
+	spill slot
 
 regresscheck: | submake-regress submake-test_decoding temp-install
 	$(MKDIR_P) regression_output
diff --git a/contrib/test_decoding/expected/ddl.out b/contrib/test_decoding/expected/ddl.out
index a9ba615..c104c48 100644
--- a/contrib/test_decoding/expected/ddl.out
+++ b/contrib/test_decoding/expected/ddl.out
@@ -702,7 +702,7 @@ SELECT pg_drop_replication_slot('regression_slot');
 
 /* check that the slot is gone */
 SELECT * FROM pg_replication_slots;
- slot_name | plugin | slot_type | datoid | database | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn 
++---++--+++--+--+-+-
+ slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn 
+---++---++--+---+++--+--+-+-
 (0 rows)
 
diff --git a/contrib/test_decoding/expected/slot.out b/contrib/test_decoding/expected/slot.out
new file mode 100644
index 000..1a8c8df
--- /dev/null
+++ b/contrib/test_decoding/expected/slot.out
@@ -0,0 +1,35 @@
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding');
+ ?column? 
+--
+ init
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 'test_decoding', true);
+ ?column? 
+--
+ init
+(1 row)
+
+SELECT pg_drop_replication_slot('regression_slot_p');
+ pg_drop_replication_slot 
+--
+ 
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding', false);
+ ?column? 
+--
+ init
+(1 row)
+
+-- reconnect to clean temp slots
+\c
+SELECT pg_drop_replication_slot('regression_slot_p');
+ pg_drop_replication_slot 
+--
+ 
+(1 row)
+
+-- should fail because the temporary slot was dropped automatically
+SELECT pg_drop_replication_slot('regression_slot_t');
+ERROR:  replication slot "regression_slot_t" does not exist
diff --git a/contrib/test_decoding/sql/slot.sql b/contrib/test_decoding/sql/slot.sql
new file mode 100644
index 000..8728db5
--- /dev/null
+++ b/contrib/test_decoding/sql/slot.sql
@@ -0,0 +1,13 @@
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding');
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 'test_decoding', true);
+
+SELECT 

  1   2   3   >