Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails
Rebased over b663a4136 --- no substantive changes, just keeping the cfbot happy. regards, tom lane diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 6faf499f9a..c38e2419d5 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -320,7 +320,9 @@ foreign_expr_walker(Node *node, /* * If the Var is from the foreign table, we consider its - * collation (if any) safe to use. If it is from another + * collation (if any) safe to use, *unless* it's + * DEFAULT_COLLATION_OID. We treat that as meaning "we don't + * know which collation this is". If it is from another * table, we treat its collation the same way as we would a * Param's collation, ie it's not safe for it to have a * non-default collation. @@ -342,7 +344,12 @@ foreign_expr_walker(Node *node, /* Else check the collation */ collation = var->varcollid; - state = OidIsValid(collation) ? FDW_COLLATE_SAFE : FDW_COLLATE_NONE; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (collation == DEFAULT_COLLATION_OID) + state = FDW_COLLATE_UNSAFE; + else + state = FDW_COLLATE_SAFE; } else { @@ -813,8 +820,24 @@ foreign_expr_walker(Node *node, /* * Now, merge my collation information into my parent's state. + * + * If one branch of an expression derives a non-default collation safely + * (that is, from a foreign Var) and another one derives the same + * collation unsafely, we can consider the expression safe overall. This + * allows cases such as "foreign_var = ('foo' COLLATE x)" where x is the + * same collation the foreign_var has anyway. Note that we will not ship + * any explicit COLLATE clause to the remote, but rely on it to re-derive + * the correct collation based on the foreign_var. */ - if (state > outer_cxt->state) + if (collation == outer_cxt->collation && + ((state == FDW_COLLATE_UNSAFE && + outer_cxt->state == FDW_COLLATE_SAFE) || + (state == FDW_COLLATE_SAFE && + outer_cxt->state == FDW_COLLATE_UNSAFE))) + { + outer_cxt->state = FDW_COLLATE_SAFE; + } + else if (state > outer_cxt->state) { /* Override previous parent state */ outer_cxt->collation = collation; diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 60c7e115d6..05628d8aa7 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -32,29 +32,29 @@ CREATE SCHEMA "S 1"; CREATE TABLE "S 1"."T 1" ( "C 1" int NOT NULL, c2 int NOT NULL, - c3 text, + c3 text collate "C", c4 timestamptz, c5 timestamp, - c6 varchar(10), - c7 char(10), + c6 varchar(10) collate "C", + c7 char(10) collate "C", c8 user_enum, CONSTRAINT t1_pkey PRIMARY KEY ("C 1") ); CREATE TABLE "S 1"."T 2" ( c1 int NOT NULL, - c2 text, + c2 text collate "C", CONSTRAINT t2_pkey PRIMARY KEY (c1) ); CREATE TABLE "S 1"."T 3" ( c1 int NOT NULL, c2 int NOT NULL, - c3 text, + c3 text collate "C", CONSTRAINT t3_pkey PRIMARY KEY (c1) ); CREATE TABLE "S 1"."T 4" ( c1 int NOT NULL, c2 int NOT NULL, - c3 text, + c3 text collate "C", CONSTRAINT t4_pkey PRIMARY KEY (c1) ); -- Disable autovacuum for these tables to avoid unexpected effects of that @@ -94,16 +94,18 @@ ANALYZE "S 1"."T 3"; ANALYZE "S 1"."T 4"; -- === -- create foreign tables +-- Note: to ensure stable regression results, all collatable columns +-- in these tables must have explicitly-specified collations. -- === CREATE FOREIGN TABLE ft1 ( c0 int, c1 int NOT NULL, c2 int NOT NULL, - c3 text, + c3 text collate "C", c4 timestamptz, c5 timestamp, - c6 varchar(10), - c7 char(10) default 'ft1', + c6 varchar(10) collate "C", + c7 char(10) default 'ft1' collate "C", c8 user_enum ) SERVER loopback; ALTER FOREIGN TABLE ft1 DROP COLUMN c0; @@ -111,28 +113,28 @@ CREATE FOREIGN TABLE ft2 ( c1 int NOT NULL, c2 int NOT NULL, cx int, - c3 text, + c3 text collate "C", c4 timestamptz, c5 timestamp, - c6 varchar(10), - c7 char(10) default 'ft2', + c6 varchar(10) collate "C", + c7 char(10) default 'ft2' collate "C", c8 user_enum ) SERVER loopback; ALTER FOREIGN TABLE ft2 DROP COLUMN cx; CREATE FOREIGN TABLE ft4 ( c1 int NOT NULL, c2 int NOT NULL, - c3 text + c3 text collate "C" ) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 3'); CREATE FOREIGN TABLE ft5 ( c1 int NOT NULL, c2 int NOT NULL, - c3 text + c3 text collate "C" ) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 4'); CREATE FOREIGN TABLE ft6 ( c1 int NOT NULL, c2 int NOT NULL, - c3 text + c3 text collate "C" ) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4'); CREATE FOREIGN TABLE ft7 ( c1 int NOT NULL, @@ -288,7 +290,7 @@ EXPLAIN
Re: REINDEX backend filtering
On Thu, Jan 21, 2021 at 11:12:56AM +0800, Julien Rouhaud wrote: > > There was a conflict with a3dc926009be8 (Refactor option handling of > CLUSTER, REINDEX and VACUUM), so rebased version attached. No other > changes included yet. New conflict, v3 attached. >From 63afe51453d4413ad7e73c66268e6ff12bfe5436 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud Date: Thu, 3 Dec 2020 15:54:42 +0800 Subject: [PATCH v3] Add a new COLLATION option to REINDEX. --- doc/src/sgml/ref/reindex.sgml | 13 + src/backend/catalog/index.c| 59 +- src/backend/commands/indexcmds.c | 13 +++-- src/backend/utils/cache/relcache.c | 43 src/include/catalog/index.h| 4 ++ src/include/utils/relcache.h | 1 + src/test/regress/expected/create_index.out | 10 src/test/regress/sql/create_index.sql | 10 8 files changed, 149 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index 07795b5737..ead2904b67 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -25,6 +25,7 @@ REINDEX [ ( option [, ...] ) ] { IN where option can be one of: +COLLATION [ text ] CONCURRENTLY [ boolean ] TABLESPACE new_tablespace VERBOSE [ boolean ] @@ -169,6 +170,18 @@ REINDEX [ ( option [, ...] ) ] { IN + +COLLATION + + + This option can be used to filter the list of indexes to rebuild. The + only allowed value is 'not_current', which will only + process indexes that depend on a collation version different than the + current one. + + + + CONCURRENTLY diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 1cb9172a5f..b74ee79d38 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -100,6 +100,12 @@ typedef struct Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER]; } SerializedReindexState; +typedef struct +{ + Oid relid; /* targetr index oid */ + bool deprecated;/* depends on at least on deprected collation? */ +} IndexHasDeprecatedColl; + /* non-export function prototypes */ static bool relationHasPrimaryKey(Relation rel); static TupleDesc ConstructTupleDescriptor(Relation heapRelation, @@ -1350,6 +1356,57 @@ index_check_collation_versions(Oid relid) list_free(context.warned_colls); } +/* + * Detect if an index depends on at least one deprecated collation. + * This is a callback for visitDependenciesOf(). + */ +static bool +do_check_index_has_deprecated_collation(const ObjectAddress *otherObject, + const char *version, + char **new_version, + void *data) +{ + IndexHasDeprecatedColl *context = data; + char *current_version; + + /* We only care about dependencies on collations. */ + if (otherObject->classId != CollationRelationId) + return false; + + /* Fast exit if we already found a deprecated collation version. */ + if (context->deprecated) + return false; + + /* Ask the provider for the current version. Give up if unsupported. */ + current_version = get_collation_version_for_oid(otherObject->objectId); + if (!current_version) + return false; + + if (!version || strcmp(version, current_version) != 0) + context->deprecated = true; + + return false; +} + +bool +index_has_deprecated_collation(Oid relid) +{ + ObjectAddress object; + IndexHasDeprecatedColl context; + + object.classId = RelationRelationId; + object.objectId = relid; + object.objectSubId = 0; + + context.relid = relid; + context.deprecated = false; + + visitDependenciesOf(, _check_index_has_deprecated_collation, + ); + + return context.deprecated; +} + /* * Update the version for collations. A callback for visitDependenciesOf(). */ @@ -3930,7 +3987,7 @@ reindex_relation(Oid relid, int flags, ReindexParams *params) * relcache to get this with a sequential scan if ignoring system * indexes.) */ - indexIds = RelationGetIndexList(rel); + indexIds = RelationGetIndexListFiltered(rel, params->options); if (flags & REINDEX_REL_SUPPRESS_INDEX_USE) { diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 127ba7835d..9bf69ff9d7 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -2475,6 +2475,7 @@ ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel) boolconcurrently =
Support tab completion for upper character inputs in psql
Hi Hackers, When using psql I found there's no tab completion for upper character inputs. It's really inconvenient sometimes so I try to fix this problem in the attached patch. Here is the examples to show what this patch can do. Action: 1. connect the db using psql 2. input SQL command 3. enter TAB key(twice at the very first time) Results: [master] postgres=# set a all allow_system_table_mods application_name array_nulls postgres=# set A postgres=# set A [patched] postgres=# set a all allow_system_table_mods application_name array_nulls postgres=# set A ALL ALLOW_SYSTEM_TABLE_MODS APPLICATION_NAME ARRAY_NULLS postgres=# set A Please take a check at this patch. Any comment is welcome. Regards, Tang 0001-Support-tab-completion-for-upper-character-inputs-in.patch Description: 0001-Support-tab-completion-for-upper-character-inputs-in.patch
Re: GlobalVisIsRemovableFullXid() vs GlobalVisCheckRemovableXid()
On Sat, Feb 6, 2021 at 7:40 PM Andres Freund wrote: > Looks like a mistake on my part... Probably a rename regex that somehow > went wrong - I went back and forth on those names way too many > times. Want me to push the fix? Yes, please do. I could do it myself, but better that you do it yourself, just in case. -- Peter Geoghegan
Re: GlobalVisIsRemovableFullXid() vs GlobalVisCheckRemovableXid()
Hi, On 2021-02-06 12:27:30 -0800, Peter Geoghegan wrote: > Why is GlobalVisIsRemovableFullXid() not named > GlobalVisCheckRemovableFullXid() instead? ISTM that that name makes > much more sense, since it is what I'd expect for a function that is > the "Full XID equivalent" of GlobalVisCheckRemovableXid(). > > Note also that GlobalVisIsRemovableFullXid() is the only symbol name > matching "GlobalVisIsRemovable*". Looks like a mistake on my part... Probably a rename regex that somehow went wrong - I went back and forth on those names way too many times. Want me to push the fix? Greetings, Andres Freund
Re: Single transaction in the tablesync worker?
On Sat, Feb 6, 2021 at 2:10 AM Petr Jelinek wrote: > > Hi, > > Some minor comments about code: > > > + else if (res->status == WALRCV_ERROR && missing_ok) > > + { > > + /* WARNING. Error, but missing_ok = true. */ > > + ereport(WARNING, > > I wonder if we need to add error code to the WalRcvExecResult and check > for the appropriate ones here. Because this can for example return error > because of timeout, not because slot is missing. Not sure if it matters > for current callers though (but then maybe don't call the param > missign_ok?). You are right. The way we are using this function has evolved beyond the original intention. Probably renaming the param to something like "error_ok" would be more appropriate now. Kind Regards, Peter Smith. Fujitsu Australia
Re: There doesn't seem to be any case where PQputCopyEnd() returns 0
On Fri, Feb 5, 2021 at 11:01 PM Fujii Masao wrote: > > > > On 2021/02/05 16:52, Kasahara Tatsuhito wrote: > > Hi, > > > > The following is written in the comments of PQputCopyEnd(). > > > > (snip) > > * Returns 1 if successful, 0 if data could not be sent (only possible > > * in nonblock mode), or -1 if an error occurs. > > (snip) > > > > The PQputCopyEnd() section of the manual (libpq.sgml) describes the > > following. > > > > The result is 1 if the termination message was sent; or in > > nonblocking mode, this may only indicate that the termination > > message was successfully queued. (In nonblocking mode, to be > > certain that the data has been sent, you should next wait for > > write-ready and call , repeating > > until it > > returns zero.) Zero indicates that the function could not queue > > the termination message because of full buffers; this will only > > happen in nonblocking mode. (In this case, wait for > > write-ready and try the call > > again.) If a hard error occurs, -1 is returned; you can use > > to retrieve details. > > > > > > These says that 0 may be returned if a non-blocking mode is used, but > > there doesn't seem to be any case where 0 is returned in the code of > > PQputCopyEnd(). > > I found the past discussion [1] about this issue. > > [1] > https://www.postgresql.org/message-id/CA+Tgmobjj+0modbnmjy7ezeBFOBo9d2mAVcSPkzLx4LtZmc==g...@mail.gmail.com Oh, thank you. I understood what was unclear. Best regards, > > Regards, > > -- > Fujii Masao > Advanced Computing Technology Center > Research and Development Headquarters > NTT DATA CORPORATION -- Tatsuhito Kasahara kasahara.tatsuhito _at_ gmail.com
Re: Preserve attstattarget on REINDEX CONCURRENTLY
On Sat, Feb 06, 2021 at 10:39:53PM +0100, Tomas Vondra wrote: > Copying this info in index_concurrently_swap seems a bit strange - we're > copying other stuff there, but this is modifying something we've already > copied before. I understand why we do it there to make this backpatchable, > but maybe it'd be good to mention this in a comment (or at least the commit > message). We could do this in the backbranches only and the "correct" way in > master, but that does not seem worth it. Thanks. > One minor comment - the code says this: > > /* no need for a refresh if both match */ > if (attstattarget == att->attstattarget) > continue; > > Isn't that just a different way to say "attstattarget is not default")? For REINDEX CONCURRENTLY, yes. I was thinking here about the case where this code is used for other purposes in the future, where attstattarget may not be -1. I'll see about applying this stuff after the next version is tagged then. -- Michael signature.asc Description: PGP signature
Re: Bug in query rewriter - hasModifyingCTE not getting set
After poking around a bit more, I notice that the hasRecursive flag really ought to get propagated as well, since that's also an attribute of the CTE list. That omission doesn't seem to have any ill effect today, since nothing in planning or execution looks at that flag, but someday it might. So what I think we should do is as attached. (I re-integrated your example into with.sql, too.) Given the very limited time remaining before the release wrap, I'm going to go ahead and push this. regards, tom lane diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 0672f497c6..ba6f8a0507 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -536,6 +536,9 @@ rewriteRuleAction(Query *parsetree, * * This could possibly be fixed by using some sort of internally * generated ID, instead of names, to link CTE RTEs to their CTEs. + * However, decompiling the results would be quite confusing; note the + * merge of hasRecursive flags below, which could change the apparent + * semantics of such redundantly-named CTEs. */ foreach(lc, parsetree->cteList) { @@ -557,6 +560,9 @@ rewriteRuleAction(Query *parsetree, /* OK, it's safe to combine the CTE lists */ sub_action->cteList = list_concat(sub_action->cteList, copyObject(parsetree->cteList)); + /* ... and don't forget about the associated flags */ + sub_action->hasRecursive |= parsetree->hasRecursive; + sub_action->hasModifyingCTE |= parsetree->hasModifyingCTE; } /* diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index c519a61c4f..5e4ea29243 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -2277,6 +2277,33 @@ SELECT * FROM bug6051_2; 3 (3 rows) +-- silly example to verify that hasModifyingCTE flag is propagated +CREATE TEMP TABLE bug6051_3 AS + select a from generate_series(11,13) as a; +CREATE RULE bug6051_3_ins AS ON INSERT TO bug6051_3 DO INSTEAD + SELECT i FROM bug6051_2; +BEGIN; SET LOCAL force_parallel_mode = on; +WITH t1 AS ( DELETE FROM bug6051_3 RETURNING * ) + INSERT INTO bug6051_3 SELECT * FROM t1; + i +--- + 1 + 2 + 3 + 1 + 2 + 3 + 1 + 2 + 3 +(9 rows) + +COMMIT; +SELECT * FROM bug6051_3; + a +--- +(0 rows) + -- a truly recursive CTE in the same list WITH RECURSIVE t(a) AS ( SELECT 0 diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql index f4ba0d8e39..0ffa44afa7 100644 --- a/src/test/regress/sql/with.sql +++ b/src/test/regress/sql/with.sql @@ -1070,6 +1070,22 @@ INSERT INTO bug6051 SELECT * FROM t1; SELECT * FROM bug6051; SELECT * FROM bug6051_2; +-- silly example to verify that hasModifyingCTE flag is propagated +CREATE TEMP TABLE bug6051_3 AS + select a from generate_series(11,13) as a; + +CREATE RULE bug6051_3_ins AS ON INSERT TO bug6051_3 DO INSTEAD + SELECT i FROM bug6051_2; + +BEGIN; SET LOCAL force_parallel_mode = on; + +WITH t1 AS ( DELETE FROM bug6051_3 RETURNING * ) + INSERT INTO bug6051_3 SELECT * FROM t1; + +COMMIT; + +SELECT * FROM bug6051_3; + -- a truly recursive CTE in the same list WITH RECURSIVE t(a) AS ( SELECT 0
Re: Bug in query rewriter - hasModifyingCTE not getting set
Greg Nancarrow writes: > I found a bug in the query rewriter. If a query that has a modifying > CTE is re-written, the hasModifyingCTE flag is not getting set in the > re-written query. Ugh. > I've attached the patch with the suggested fix (reviewed by Amit Langote). I think either the bit about rule_action is unnecessary, or most of the code immediately above this is wrong, because it's only updating flags in sub_action. Why do you think it's necessary to change rule_action in addition to sub_action? regards, tom lane
Bug in query rewriter - hasModifyingCTE not getting set
Hi Hackers, I found a bug in the query rewriter. If a query that has a modifying CTE is re-written, the hasModifyingCTE flag is not getting set in the re-written query. This bug can result in the query being allowed to execute in parallel-mode, which results in an error. I originally found the problem using INSERT (which doesn't actually affect the current Postgres code, as it doesn't support INSERT in parallel mode) but a colleague of mine (Hou, Zhijie) managed to reproduce it using SELECT as well (see example below), and helped to minimize the patch size. I've attached the patch with the suggested fix (reviewed by Amit Langote). The following reproduces the issue (adapted from a test case in the "with" regression tests): drop table if exists test_data1; create table test_data1(a int, b int) ; insert into test_data1 select generate_series(1,1000), generate_series(1,1000); set force_parallel_mode=on; CREATE TEMP TABLE bug6051 AS select i from generate_series(1,3) as i; SELECT * FROM bug6051; CREATE RULE bug6051_ins AS ON INSERT TO bug6051 DO INSTEAD select a as i from test_data1; WITH t1 AS ( DELETE FROM bug6051 RETURNING * ) INSERT INTO bug6051 SELECT * FROM t1; produces the error: ERROR: cannot assign XIDs during a parallel operation Regards, Greg Nancarrow Fujitsu Australia v1-0001-Fix-bug-in-the-query-rewriter-hasModifyingCTE-not-set.patch Description: Binary data
Re: psql \copy from sends a lot of packets
Heikki Linnakangas writes: > I just noticed that if you load a file using psql: > it sends every line as a separate FE/BE protocol CopyData packet. > ... > I'll add this to the next commitfest. There's similar inefficiency in > the server side in COPY TO, but I'll leave that for another patch. The FE/BE protocol documentation is pretty explicit about this: Copy-in mode (data transfer to the server) is initiated when the backend executes a COPY FROM STDIN SQL statement. The backend sends a CopyInResponse message to the frontend. The frontend should then send zero or more CopyData messages, forming a stream of input data. (The message boundaries are not required to have anything to do with row boundaries, although that is often a reasonable choice.) ... Copy-out mode (data transfer from the server) is initiated when the backend executes a COPY TO STDOUT SQL statement. The backend sends a CopyOutResponse message to the frontend, followed by zero or more CopyData messages (always one per row), followed by CopyDone. So while changing psql isn't so much a problem, changing the server is a wire protocol break. Maybe we should do it anyway, but I'm not sure. regards, tom lane
psql \copy from sends a lot of packets
Hi, I just noticed that if you load a file using psql: \copy from it sends every line as a separate FE/BE protocol CopyData packet. That's pretty wasteful if the lines are narrow. The overhead of each CopyData packet is 5 bytes. To demonstrate, I generated a simple test file with the string "foobar" repeated 10 million times: $ perl -le 'for (1..1000) { print "foobar" }' > /tmp/testdata and loaded that into a temp table with psql: create temporary table copytest (t text) on commit delete rows; \copy copytest from '/tmp/testdata'; I repeated and timed the \copy a few times; it takes about about 3 seconds on my laptop: postgres=# \copy copytest from '/tmp/testdata'; COPY 1000 Time: 3039.625 ms (00:03.040) Wireshark says that that involved about 120 MB of network traffic. The size of the file on disk is only 70 MB. The attached patch modifies psql so that it buffers up 8 kB of data into each CopyData message, instead of sending one per line. That makes the operation faster: postgres=# \copy copytest from '/tmp/testdata'; COPY 1000 Time: 2490.268 ms (00:02.490) And wireshark confirms that there's now only a bit over 70 MB of network traffic. I'll add this to the next commitfest. There's similar inefficiency in the server side in COPY TO, but I'll leave that for another patch. - Heikki >From e49334de4eb51f5ba061bf48d1979a7ab8ef0de9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sun, 7 Feb 2021 00:10:05 +0200 Subject: [PATCH 1/1] In psql \copy from, send data to server in larger chunks. Previously, we would send each line as a separate CopyData message. That's pretty wasteful if the table is narrow, as each CopyData message has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input data into each CopyData message. --- src/bin/psql/copy.c | 114 +++- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c index 78f0dc5a507..8c56f2470e2 100644 --- a/src/bin/psql/copy.c +++ b/src/bin/psql/copy.c @@ -581,77 +581,101 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res) else { bool copydone = false; + int buflen; + bool at_line_begin = true; + if (showprompt) + { + const char *prompt = get_prompt(PROMPT_COPY, NULL); + + fputs(prompt, stdout); + fflush(stdout); + } + + /* + * In text mode, We have to read the input one line at a time, so that + * we can stop reading at the EOF marker (\.). We mustn't read beyond + * the EOF marker, because if the data is inlined in a SQL script, we + * would eat up the commands after the EOF marker. + */ + buflen = 0; while (!copydone) - { /* for each input line ... */ - bool firstload; - bool linedone; + { + int linelen; + char *fgresult; - if (showprompt) + if (at_line_begin && showprompt) { -const char *prompt = get_prompt(PROMPT_COPY, NULL); +const char *prompt; +sigint_interrupt_enabled = false; + +prompt = get_prompt(PROMPT_COPY, NULL); fputs(prompt, stdout); fflush(stdout); - } - firstload = true; - linedone = false; - - while (!linedone) - { /* for each bufferload in line ... */ -int linelen; -char *fgresult; - -/* enable longjmp while waiting for input */ sigint_interrupt_enabled = true; + } -fgresult = fgets(buf, sizeof(buf), copystream); - -sigint_interrupt_enabled = false; + /* enable longjmp while waiting for input */ + sigint_interrupt_enabled = true; -if (!fgresult) -{ - copydone = true; - break; -} + fgresult = fgets([buflen], COPYBUFSIZ - buflen, copystream); -linelen = strlen(buf); + sigint_interrupt_enabled = false; -/* current line is done? */ -if (linelen > 0 && buf[linelen - 1] == '\n') - linedone = true; + if (!fgresult) +copydone = true; + else + { +linelen = strlen(fgresult); +buflen += linelen; -/* check for EOF marker, but not on a partial line */ -if (firstload) +if (buf[buflen - 1] == '\n') { - /* - * This code erroneously assumes '\.' on a line alone - * inside a quoted CSV string terminates the \copy. - * https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org - */ - if (strcmp(buf, "\\.\n") == 0 || - strcmp(buf, "\\.\r\n") == 0) + /* check for EOF marker, but not on a partial line */ + if (at_line_begin) { - copydone = true; - break; + /* + * This code erroneously assumes '\.' on a line alone + * inside a quoted CSV string terminates the \copy. + * https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org + */ + if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) || + (linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0)) + { + copydone = true; + } } - firstload =
Re: Preserve attstattarget on REINDEX CONCURRENTLY
On 2/5/21 8:43 AM, Michael Paquier wrote: On Fri, Feb 05, 2021 at 08:22:17AM +0100, Ronan Dunklau wrote: I'm not surprised by this answer, the good news is it's being back-patched. Yes, I have no problem with that. Until this gets fixed, the damage can be limited with an extra ALTER INDEX, that takes a ShareUpdateExclusiveLock so there is no impact on the concurrent activity. Looks good to me ! Thank you. Thanks for looking at it. Tomas, do you have any comments? -- Not really. Copying this info in index_concurrently_swap seems a bit strange - we're copying other stuff there, but this is modifying something we've already copied before. I understand why we do it there to make this backpatchable, but maybe it'd be good to mention this in a comment (or at least the commit message). We could do this in the backbranches only and the "correct" way in master, but that does not seem worth it. One minor comment - the code says this: /* no need for a refresh if both match */ if (attstattarget == att->attstattarget) continue; Isn't that just a different way to say "attstattarget is not default")? regards -- Tomas Vondra EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
GlobalVisIsRemovableFullXid() vs GlobalVisCheckRemovableXid()
Why is GlobalVisIsRemovableFullXid() not named GlobalVisCheckRemovableFullXid() instead? ISTM that that name makes much more sense, since it is what I'd expect for a function that is the "Full XID equivalent" of GlobalVisCheckRemovableXid(). Note also that GlobalVisIsRemovableFullXid() is the only symbol name matching "GlobalVisIsRemovable*". Have I missed something? -- Peter Geoghegan
Re: CLUSTER on partitioned index
Hi, For v7-0002-Implement-CLUSTER-of-partitioned-table.patch: +* We have to build the list in a different memory context so it will +* survive the cross-transaction processing +*/ + old_context = MemoryContextSwitchTo(cluster_context); cluster_context is not modified within the loop. Can the memory context switching code be moved outside the loop ? Cheers On Sat, Feb 6, 2021 at 6:46 AM Justin Pryzby wrote: > On Mon, Jan 18, 2021 at 12:34:59PM -0600, Justin Pryzby wrote: > > On Sat, Nov 28, 2020 at 08:03:02PM -0600, Justin Pryzby wrote: > > > On Sun, Nov 15, 2020 at 07:53:35PM -0600, Justin Pryzby wrote: > > > > On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote: > > > > > On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote: > > > > > > I'm attaching a counter-proposal to your catalog change, which > preserves > > > > > > indisclustered on children of clustered, partitioned indexes, > and invalidates > > > > > > indisclustered when attaching unclustered indexes. > > > > > > > > > > ..and now propagates CLUSTER ON to child indexes. > > > > > > > > > > I left this as separate patches to show what I mean and what's new > while we > > > > > discuss it. > > > > > > > > This fixes some omissions in the previous patch and error in its > test cases. > > > > > > > > CLUSTER ON recurses to children, since I think a clustered parent > index means > > > > that all its child indexes are clustered. "SET WITHOUT CLUSTER" > doesn't have > > > > to recurse to children, but I did it like that for consistency and > it avoids > > > > the need to special case InvalidOid. > > > > > > The previous patch failed pg_upgrade when restoring a clustered, > parent index, > > > since it's marked INVALID until indexes have been built on all child > tables, so > > > CLUSTER ON was rejected on invalid index. > > > > > > So I think CLUSTER ON needs to be a separate pg_dump object, to allow > attaching > > > the child index (thereby making the parent "valid") to happen before > SET > > > CLUSTER on the parent index. > > > > Rebased on b5913f612 and now a3dc92600. > > This resolves ORDER BY test failure with COLLATE "C". > > -- > Justin >
Re: TRUNCATE on foreign table
Hi, + if (strcmp(defel->defname, "truncatable") == 0) + server_truncatable = defGetBoolean(defel); Looks like we can break out of the loop when the condition is met. + /* ExecForeignTruncate() is invoked for each server */ The method name in the comment is slightly different from the actual method name. + if (strcmp(defel->defname, "truncatable") == 0) + truncatable = defGetBoolean(defel); We can break out of the loop when the condition is met. Cheers On Sat, Feb 6, 2021 at 5:11 AM Kazutaka Onishi wrote: > Hello, > > The attached patch is for supporting "TRUNCATE" on foreign tables. > > This patch includes: > * Adding "ExecForeignTruncate" function into FdwRoutine. > * Enabling "postgres_fdw" to use TRUNCATE. > > This patch was proposed by Kaigai-san in March 2020, > but it was returned because it can't be applied to the latest source codes. > > Please refer to the discussion. > > https://www.postgresql.org/message-id/flat/CAOP8fzb-t3WVNLjGMC%2B4sV4AZa9S%3DMAQ7Q6pQoADMCf_1jp4ew%40mail.gmail.com#3b6c6ff85ff5c722b36c7a09b2dd7165 > > I have fixed the patch due to submit it to Commit Fest 2021-03. > > regards, > > -- > -- > Kazutaka Onishi > (oni...@heterodb.com) >
Re: CLUSTER on partitioned index
On Mon, Jan 18, 2021 at 12:34:59PM -0600, Justin Pryzby wrote: > On Sat, Nov 28, 2020 at 08:03:02PM -0600, Justin Pryzby wrote: > > On Sun, Nov 15, 2020 at 07:53:35PM -0600, Justin Pryzby wrote: > > > On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote: > > > > On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote: > > > > > I'm attaching a counter-proposal to your catalog change, which > > > > > preserves > > > > > indisclustered on children of clustered, partitioned indexes, and > > > > > invalidates > > > > > indisclustered when attaching unclustered indexes. > > > > > > > > ..and now propagates CLUSTER ON to child indexes. > > > > > > > > I left this as separate patches to show what I mean and what's new > > > > while we > > > > discuss it. > > > > > > This fixes some omissions in the previous patch and error in its test > > > cases. > > > > > > CLUSTER ON recurses to children, since I think a clustered parent index > > > means > > > that all its child indexes are clustered. "SET WITHOUT CLUSTER" doesn't > > > have > > > to recurse to children, but I did it like that for consistency and it > > > avoids > > > the need to special case InvalidOid. > > > > The previous patch failed pg_upgrade when restoring a clustered, parent > > index, > > since it's marked INVALID until indexes have been built on all child > > tables, so > > CLUSTER ON was rejected on invalid index. > > > > So I think CLUSTER ON needs to be a separate pg_dump object, to allow > > attaching > > the child index (thereby making the parent "valid") to happen before SET > > CLUSTER on the parent index. > > Rebased on b5913f612 and now a3dc92600. This resolves ORDER BY test failure with COLLATE "C". -- Justin >From 1101d6b8a44f5cc170816f305476750352dc06de Mon Sep 17 00:00:00 2001 From: Justin Pryzby Date: Thu, 26 Nov 2020 14:37:08 -0600 Subject: [PATCH v7 1/7] pg_dump: make CLUSTER ON a separate dump object.. ..since it needs to be restored after any child indexes are restored *and attached*. The order needs to be: 1) restore child and parent index (order doesn't matter); 2) attach child index; 3) set cluster on child and parent index (order doesn't matter); --- src/bin/pg_dump/pg_dump.c | 86 ++ src/bin/pg_dump/pg_dump.h | 8 src/bin/pg_dump/pg_dump_sort.c | 8 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index d99b61e621..8dc8a42964 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -208,6 +208,7 @@ static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, IndxInfo *indxinfo); static void dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo); +static void dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo); static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo); @@ -7092,6 +7093,11 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_inddependcollversions; int ntups; + int ncluster = 0; + IndexClusterInfo *clusterinfo; + clusterinfo = (IndexClusterInfo *) + pg_malloc0(numTables * sizeof(IndexClusterInfo)); + for (i = 0; i < numTables; i++) { TableInfo *tbinfo = [i]; @@ -7471,6 +7477,27 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) /* Plain secondary index */ indxinfo[j].indexconstraint = 0; } + + /* Record each table's CLUSTERed index, if any */ + if (indxinfo[j].indisclustered) + { +IndxInfo *index = [j]; +IndexClusterInfo *cluster = [ncluster]; + +cluster->dobj.objType = DO_INDEX_CLUSTER_ON; +cluster->dobj.catId.tableoid = 0; +cluster->dobj.catId.oid = 0; +AssignDumpId(>dobj); +cluster->dobj.name = pg_strdup(index->dobj.name); +cluster->dobj.namespace = index->indextable->dobj.namespace; +cluster->index = index; +cluster->indextable = [i]; + +/* The CLUSTER ON depends on its index.. */ +addObjectDependency(>dobj, index->dobj.dumpId); + +ncluster++; + } } PQclear(res); @@ -10323,6 +10350,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_SUBSCRIPTION: dumpSubscription(fout, (SubscriptionInfo *) dobj); break; + case DO_INDEX_CLUSTER_ON: + dumpIndexClusterOn(fout, (IndexClusterInfo *) dobj); + break; case DO_PRE_DATA_BOUNDARY: case DO_POST_DATA_BOUNDARY: /* never dumped, nothing to do */ @@ -16543,6 +16573,41 @@ getAttrName(int attrnum, TableInfo *tblInfo) return NULL;/* keep compiler quiet */ } +/* + * dumpIndexClusterOn + * record that the index is clustered. + */ +static void +dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo) +{ + DumpOptions *dopt =
Re: Fuzz testing COPY FROM parsing
On 2/5/21 2:50 PM, Heikki Linnakangas wrote: > On 05/02/2021 21:16, Andrew Dunstan wrote: >> >> On 2/5/21 10:54 AM, Stephen Frost wrote: >>> * Heikki Linnakangas (hlinn...@iki.fi) wrote: I ran it for about 2 h on my laptop with the patch I was working on [2]. It didn't find any crashes, but it generated about 1300 input files that it considered "interesting" based on code coverage analysis. When I took those generated inputs, and ran them against unpatched and patched server, some inputs produced different results. So that revealed a couple of bugs in the patch. (I'll post a fixed patched version on that thread soon.) I hope others find this useful, too. >>> Nice! I wonder if there's a way to have a buildfarm member or other >>> system doing this automatically on new commits and perhaps adding >>> coverage for other things like the JSON code.. >> >> Not easily in the buildfarm as it is today. We can easily create modules >> for extensions and other things that don't require modification of core >> code, but things that require patching core code are a whole different >> story. > > It might be possible to call the fuzzer's HF_ITER() function from a C > extension instead. So you would run a query like "SELECT > next_fuzz_iter()" in a loop, and next_fuzz_iter() would be a C > function that calls HF_ITER(), and executes the actual query with SPI. > > That said, I don't think it's important to run the fuzzer in the > buildfarm. It should be enough to do that every once in a while, when > you modify the COPY FROM code (or something else that you want to fuzz > test). But we could easily include the test inputs generated by the > fuzzer in the regular tests. We've usually been very frugal in adding > tests, though, to keep the time it takes to run all the tests short. > > This strikes me as a better design in any case. cheers andrew -- Andrew Dunstan EDB: https://www.enterprisedb.com
TRUNCATE on foreign table
Hello, The attached patch is for supporting "TRUNCATE" on foreign tables. This patch includes: * Adding "ExecForeignTruncate" function into FdwRoutine. * Enabling "postgres_fdw" to use TRUNCATE. This patch was proposed by Kaigai-san in March 2020, but it was returned because it can't be applied to the latest source codes. Please refer to the discussion. https://www.postgresql.org/message-id/flat/CAOP8fzb-t3WVNLjGMC%2B4sV4AZa9S%3DMAQ7Q6pQoADMCf_1jp4ew%40mail.gmail.com#3b6c6ff85ff5c722b36c7a09b2dd7165 I have fixed the patch due to submit it to Commit Fest 2021-03. regards, -- -- Kazutaka Onishi (oni...@heterodb.com) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 6faf499f9a..a9ce323a67 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) deparseRelation(buf, rel); } +/* + * Construct a simple "TRUNCATE rel" statement + */ +void +deparseTruncateSql(StringInfo buf, + List *frels_list, + List *frels_extra, + DropBehavior behavior, + bool restart_seqs) +{ + ListCell *lc1, *lc2; + + appendStringInfoString(buf, "TRUNCATE "); + forboth (lc1, frels_list, + lc2, frels_extra) + { + Relation frel = lfirst(lc1); + int extra = lfirst_int(lc2); + + if (lc1 != list_head(frels_list)) + appendStringInfoString(buf, ", "); + if (extra != 0) + appendStringInfoString(buf, "ONLY "); + deparseRelation(buf, frel); + } + appendStringInfo(buf, " %s IDENTITY", + restart_seqs ? "RESTART" : "CONTINUE"); + switch (behavior) + { + case DROP_RESTRICT: + appendStringInfoString(buf, " RESTRICT"); + break; + case DROP_CASCADE: + appendStringInfoString(buf, " CASCADE"); + break; + } +} + /* * Construct name to use for given column, and emit it into buf. * If it has a column_name FDW option, use that instead of attribute name. diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index b09dce63f5..17e0f250c8 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -8229,6 +8229,239 @@ select * from rem3; drop foreign table rem3; drop table loc3; +-- === +-- test for TRUNCATE +-- === +CREATE TABLE tru_rtable0 (id int primary key, x text); +CREATE TABLE tru_rtable1 (id int primary key, y text); +CREATE FOREIGN TABLE tru_ftable (id int, x text) + SERVER loopback OPTIONS (table_name 'tru_rtable0'); +INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x); +CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id); +CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable +FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable +FOR VALUES WITH (MODULUS 2, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'tru_rtable1'); +INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x); +CREATE TABLE tru_pk_table(id int primary key, z text); +CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id)); +INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x); +INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x); +CREATE FOREIGN TABLE tru_pk_ftable (id int, z text) + SERVER loopback OPTIONS (table_name 'tru_pk_table'); +CREATE TABLE tru_rtable_parent (id int, a text); +CREATE TABLE tru_rtable_child (id int, a text); +CREATE FOREIGN TABLE tru_ftable_parent (id int, a text) + SERVER loopback OPTIONS (table_name 'tru_rtable_parent'); +CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent) + SERVER loopback OPTIONS (table_name 'tru_rtable_child'); +INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x); +INSERT INTO tru_rtable_child (SELECT x, md5(x::text) FROM generate_series(10, 18) x); +-- normal truncate +SELECT * FROM tru_ftable; + id |x ++-- + 1 | c4ca4238a0b923820dcc509a6f75849b + 2 | c81e728d9d4c2f636f067f89cc14862c + 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 + 4 | a87ff679a2f3e71d9181a67b7542122c + 5 | e4da3b7fbbce2345d7772b0674a318d5 + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 7 | 8f14e45fceea167a5a36dedd4bea2543 + 8 | c9f0f895fb98ab9159f51fd0297e236d + 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 10 | d3d9446802a44259755d38e6d163e820 +(10 rows) + +TRUNCATE tru_ftable; +SELECT * FROM tru_rtable0; -- empty + id | x ++--- +(0 rows) + +SELECT * FROM tru_ftable; -- empty + id | x ++--- +(0 rows) + +-- 'truncatable' option +ALTER SERVER loopback OPTIONS (ADD truncatable 'false'); +TRUNCATE tru_ftable; -- error +ERROR:
Re: pg_replication_origin_drop API potential race condition
On Sat, Feb 6, 2021 at 3:26 PM Petr Jelinek wrote: > > On 06/02/2021 07:29, Amit Kapila wrote: > > On Fri, Feb 5, 2021 at 6:45 PM Euler Taveira wrote: > >> - replorigin_drop(roident, true); > >> + replorigin_drop_by_name(name, false /* missing_ok */ , true /* nowait */ > >> ); > >> > >> A modern IDE would certainly show you the function definition that allows > >> you > >> to check what each parameter value is without having to go back and forth. > >> I > >> saw a few occurrences of this pattern in the source code and IMO it could > >> be > >> used when it is not obvious what that value means. Booleans are easier to > >> figure out, however, sometimes integer and text are not. > >> > > Fair enough, removed in the attached patch. > > > To be fair the logical replication framework is full of these comments > so it's pretty natural to add them to new code as well, but I agree with > Euler that it's unnecessary with any reasonable development tooling. > > The patch as posted looks good to me, > Thanks, but today again testing this API, I observed that we can still get "tuple concurrently deleted" because we are releasing the lock on ReplicationOriginRelationId at the end of API replorigin_drop_by_name. So there is no guarantee that invalidation reaches other backend doing the same operation. I think we need to keep the lock till the end of xact as we do in other drop operations (see DropTableSpace, dropdb). -- With Regards, Amit Kapila.
Re: pg_replication_origin_drop API potential race condition
On 06/02/2021 07:29, Amit Kapila wrote: On Fri, Feb 5, 2021 at 6:45 PM Euler Taveira wrote: On Fri, Feb 5, 2021, at 4:01 AM, Amit Kapila wrote: I am not completely whether we should retire replorigin_drop or just keep it for backward compatibility? What do you think? Anybody else has any opinion? We could certainly keep some code for backward compatibility, however, we have to consider if it is (a) an exposed API and/or (b) a critical path. We break several extensions every release due to Postgres extensibility. For (a), it is not an exposed function, I mean, we are not changing `pg_replication_origin_drop`. Hence, there is no need to keep it. In (b), we could risk slowing down some critical paths that we decide to keep the old function and create a new one that contains additional features. It is not the case for this function. It is rare that an extension does not have a few #ifdef if it supports multiple Postgres versions. IMO we should keep as little code as possible into the core in favor of maintainability. Yeah, that makes. I was a bit worried about pglogical but I think they can easily update it if required, so removed as per your suggestion. Petr, any opinion on this matter? I am planning to push this early next week (by Tuesday) unless you or someone else think it is not a good idea. - replorigin_drop(roident, true); + replorigin_drop_by_name(name, false /* missing_ok */ , true /* nowait */ ); A modern IDE would certainly show you the function definition that allows you to check what each parameter value is without having to go back and forth. I saw a few occurrences of this pattern in the source code and IMO it could be used when it is not obvious what that value means. Booleans are easier to figure out, however, sometimes integer and text are not. Fair enough, removed in the attached patch. To be fair the logical replication framework is full of these comments so it's pretty natural to add them to new code as well, but I agree with Euler that it's unnecessary with any reasonable development tooling. The patch as posted looks good to me, as an extension author I normally have origin cached by id, so the api change means I have to do name lookup now, but given this is just for drop, it does not really matter. -- Petr
Re: Single transaction in the tablesync worker?
On 06/02/2021 06:07, Amit Kapila wrote: On Sat, Feb 6, 2021 at 6:22 AM Peter Smith wrote: On Sat, Feb 6, 2021 at 2:10 AM Petr Jelinek wrote: +ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char syncslotname[NAMEDATALEN]) +{ + if (syncslotname) + sprintf(syncslotname, "pg_%u_sync_%u", suboid, relid); + else + syncslotname = psprintf("pg_%u_sync_%u", suboid, relid); + + return syncslotname; +} Given that we are now explicitly dropping slots, what happens here if we have 2 different downstreams that happen to get same suboid and reloid, will one of the drop the slot of the other one? Previously with the cleanup being left to temp slot we'd at maximum got error when creating it but with the new logic in LogicalRepSyncTableStart it feels like we could get into situation where 2 downstreams are fighting over slot no? I think so. See, if the alternative suggested below works or if you have any other suggestions for the same? The PG docs [1] says "there is only one copy of pg_subscription per cluster, not one per database". IIUC that means it is not possible for 2 different subscriptions to have the same suboid. I think he is talking about two different clusters having separate subscriptions but point to the same publisher. In different clusters, we can get the same subid/relid. I think we need a cluster-wide unique identifier to distinguish among different subscribers. How about using the system_identifier stored in the control file (we can use GetSystemIdentifier to retrieve it). I think one concern could be that adding that to slot name could exceed the max length of slot (NAMEDATALEN -1) but I don't think that is the case here (pg_%u_sync_%u_UINT64_FORMAT (3 + 10 + 6 + 10 + 20 + '\0')). Note last is system_identifier in this scheme. Yep that's what I mean and system_identifier seems like a good choice to me. Do you guys think that works or let me know if you have any other better idea? Petr, is there a reason why such an identifier is not considered originally, is there any risk in it? Originally it was not considered likely because it's all based on pglogical/BDR work where ids are hashes of stuff that's unique across group of instances, not counter based like Oids in PostgreSQL and I simply didn't realize it could be a problem until reading this patch :) -- Petr Jelinek