Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

2021-02-06 Thread Tom Lane
Rebased over b663a4136 --- no substantive changes, just keeping
the cfbot happy.

regards, tom lane

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..c38e2419d5 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -320,7 +320,9 @@ foreign_expr_walker(Node *node,
 
 /*
  * If the Var is from the foreign table, we consider its
- * collation (if any) safe to use.  If it is from another
+ * collation (if any) safe to use, *unless* it's
+ * DEFAULT_COLLATION_OID.  We treat that as meaning "we don't
+ * know which collation this is".  If it is from another
  * table, we treat its collation the same way as we would a
  * Param's collation, ie it's not safe for it to have a
  * non-default collation.
@@ -342,7 +344,12 @@ foreign_expr_walker(Node *node,
 
 	/* Else check the collation */
 	collation = var->varcollid;
-	state = OidIsValid(collation) ? FDW_COLLATE_SAFE : FDW_COLLATE_NONE;
+	if (collation == InvalidOid)
+		state = FDW_COLLATE_NONE;
+	else if (collation == DEFAULT_COLLATION_OID)
+		state = FDW_COLLATE_UNSAFE;
+	else
+		state = FDW_COLLATE_SAFE;
 }
 else
 {
@@ -813,8 +820,24 @@ foreign_expr_walker(Node *node,
 
 	/*
 	 * Now, merge my collation information into my parent's state.
+	 *
+	 * If one branch of an expression derives a non-default collation safely
+	 * (that is, from a foreign Var) and another one derives the same
+	 * collation unsafely, we can consider the expression safe overall.  This
+	 * allows cases such as "foreign_var = ('foo' COLLATE x)" where x is the
+	 * same collation the foreign_var has anyway.  Note that we will not ship
+	 * any explicit COLLATE clause to the remote, but rely on it to re-derive
+	 * the correct collation based on the foreign_var.
 	 */
-	if (state > outer_cxt->state)
+	if (collation == outer_cxt->collation &&
+		((state == FDW_COLLATE_UNSAFE &&
+		  outer_cxt->state == FDW_COLLATE_SAFE) ||
+		 (state == FDW_COLLATE_SAFE &&
+		  outer_cxt->state == FDW_COLLATE_UNSAFE)))
+	{
+		outer_cxt->state = FDW_COLLATE_SAFE;
+	}
+	else if (state > outer_cxt->state)
 	{
 		/* Override previous parent state */
 		outer_cxt->collation = collation;
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 60c7e115d6..05628d8aa7 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -32,29 +32,29 @@ CREATE SCHEMA "S 1";
 CREATE TABLE "S 1"."T 1" (
 	"C 1" int NOT NULL,
 	c2 int NOT NULL,
-	c3 text,
+	c3 text collate "C",
 	c4 timestamptz,
 	c5 timestamp,
-	c6 varchar(10),
-	c7 char(10),
+	c6 varchar(10) collate "C",
+	c7 char(10) collate "C",
 	c8 user_enum,
 	CONSTRAINT t1_pkey PRIMARY KEY ("C 1")
 );
 CREATE TABLE "S 1"."T 2" (
 	c1 int NOT NULL,
-	c2 text,
+	c2 text collate "C",
 	CONSTRAINT t2_pkey PRIMARY KEY (c1)
 );
 CREATE TABLE "S 1"."T 3" (
 	c1 int NOT NULL,
 	c2 int NOT NULL,
-	c3 text,
+	c3 text collate "C",
 	CONSTRAINT t3_pkey PRIMARY KEY (c1)
 );
 CREATE TABLE "S 1"."T 4" (
 	c1 int NOT NULL,
 	c2 int NOT NULL,
-	c3 text,
+	c3 text collate "C",
 	CONSTRAINT t4_pkey PRIMARY KEY (c1)
 );
 -- Disable autovacuum for these tables to avoid unexpected effects of that
@@ -94,16 +94,18 @@ ANALYZE "S 1"."T 3";
 ANALYZE "S 1"."T 4";
 -- ===
 -- create foreign tables
+-- Note: to ensure stable regression results, all collatable columns
+-- in these tables must have explicitly-specified collations.
 -- ===
 CREATE FOREIGN TABLE ft1 (
 	c0 int,
 	c1 int NOT NULL,
 	c2 int NOT NULL,
-	c3 text,
+	c3 text collate "C",
 	c4 timestamptz,
 	c5 timestamp,
-	c6 varchar(10),
-	c7 char(10) default 'ft1',
+	c6 varchar(10) collate "C",
+	c7 char(10) default 'ft1' collate "C",
 	c8 user_enum
 ) SERVER loopback;
 ALTER FOREIGN TABLE ft1 DROP COLUMN c0;
@@ -111,28 +113,28 @@ CREATE FOREIGN TABLE ft2 (
 	c1 int NOT NULL,
 	c2 int NOT NULL,
 	cx int,
-	c3 text,
+	c3 text collate "C",
 	c4 timestamptz,
 	c5 timestamp,
-	c6 varchar(10),
-	c7 char(10) default 'ft2',
+	c6 varchar(10) collate "C",
+	c7 char(10) default 'ft2' collate "C",
 	c8 user_enum
 ) SERVER loopback;
 ALTER FOREIGN TABLE ft2 DROP COLUMN cx;
 CREATE FOREIGN TABLE ft4 (
 	c1 int NOT NULL,
 	c2 int NOT NULL,
-	c3 text
+	c3 text collate "C"
 ) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 3');
 CREATE FOREIGN TABLE ft5 (
 	c1 int NOT NULL,
 	c2 int NOT NULL,
-	c3 text
+	c3 text collate "C"
 ) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 4');
 CREATE FOREIGN TABLE ft6 (
 	c1 int NOT NULL,
 	c2 int NOT NULL,
-	c3 text
+	c3 text collate "C"
 ) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4');
 CREATE FOREIGN TABLE ft7 (
 	c1 int NOT NULL,
@@ -288,7 +290,7 @@ EXPLAIN 

Re: REINDEX backend filtering

2021-02-06 Thread Julien Rouhaud
On Thu, Jan 21, 2021 at 11:12:56AM +0800, Julien Rouhaud wrote:
> 
> There was a conflict with a3dc926009be8 (Refactor option handling of
> CLUSTER, REINDEX and VACUUM), so rebased version attached.  No other
> changes included yet.

New conflict, v3 attached.
>From 63afe51453d4413ad7e73c66268e6ff12bfe5436 Mon Sep 17 00:00:00 2001
From: Julien Rouhaud 
Date: Thu, 3 Dec 2020 15:54:42 +0800
Subject: [PATCH v3] Add a new COLLATION option to REINDEX.

---
 doc/src/sgml/ref/reindex.sgml  | 13 +
 src/backend/catalog/index.c| 59 +-
 src/backend/commands/indexcmds.c   | 13 +++--
 src/backend/utils/cache/relcache.c | 43 
 src/include/catalog/index.h|  4 ++
 src/include/utils/relcache.h   |  1 +
 src/test/regress/expected/create_index.out | 10 
 src/test/regress/sql/create_index.sql  | 10 
 8 files changed, 149 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml
index 07795b5737..ead2904b67 100644
--- a/doc/src/sgml/ref/reindex.sgml
+++ b/doc/src/sgml/ref/reindex.sgml
@@ -25,6 +25,7 @@ REINDEX [ ( option [, ...] ) ] { IN
 
 where option can be one 
of:
 
+COLLATION [ text ]
 CONCURRENTLY [ boolean ]
 TABLESPACE new_tablespace
 VERBOSE [ boolean ]
@@ -169,6 +170,18 @@ REINDEX [ ( option [, ...] ) ] { IN
 

 
+   
+COLLATION
+
+ 
+  This option can be used to filter the list of indexes to rebuild.  The
+  only allowed value is 'not_current', which will only
+  process indexes that depend on a collation version different than the
+  current one.
+ 
+
+   
+

 CONCURRENTLY
 
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 1cb9172a5f..b74ee79d38 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -100,6 +100,12 @@ typedef struct
Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
 } SerializedReindexState;
 
+typedef struct
+{
+   Oid relid;  /* targetr index oid */
+   bool deprecated;/* depends on at least on deprected collation? 
*/
+} IndexHasDeprecatedColl;
+
 /* non-export function prototypes */
 static bool relationHasPrimaryKey(Relation rel);
 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
@@ -1350,6 +1356,57 @@ index_check_collation_versions(Oid relid)
list_free(context.warned_colls);
 }
 
+/*
+ * Detect if an index depends on at least one deprecated collation.
+ * This is a callback for visitDependenciesOf().
+ */
+static bool
+do_check_index_has_deprecated_collation(const ObjectAddress *otherObject,
+   
const char *version,
+   
char **new_version,
+   
void *data)
+{
+   IndexHasDeprecatedColl *context = data;
+   char *current_version;
+
+   /* We only care about dependencies on collations. */
+   if (otherObject->classId != CollationRelationId)
+   return false;
+
+   /* Fast exit if we already found a deprecated collation version. */
+   if (context->deprecated)
+   return false;
+
+   /* Ask the provider for the current version.  Give up if unsupported. */
+   current_version = get_collation_version_for_oid(otherObject->objectId);
+   if (!current_version)
+   return false;
+
+   if (!version || strcmp(version, current_version) != 0)
+   context->deprecated = true;
+
+   return false;
+}
+
+bool
+index_has_deprecated_collation(Oid relid)
+{
+   ObjectAddress object;
+   IndexHasDeprecatedColl context;
+
+   object.classId = RelationRelationId;
+   object.objectId = relid;
+   object.objectSubId = 0;
+
+   context.relid = relid;
+   context.deprecated = false;
+
+   visitDependenciesOf(, _check_index_has_deprecated_collation,
+   );
+
+   return context.deprecated;
+}
+
 /*
  * Update the version for collations.  A callback for visitDependenciesOf().
  */
@@ -3930,7 +3987,7 @@ reindex_relation(Oid relid, int flags, ReindexParams 
*params)
 * relcache to get this with a sequential scan if ignoring system
 * indexes.)
 */
-   indexIds = RelationGetIndexList(rel);
+   indexIds = RelationGetIndexListFiltered(rel, params->options);
 
if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
{
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 127ba7835d..9bf69ff9d7 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -2475,6 +2475,7 @@ ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool 
isTopLevel)
boolconcurrently = 

Support tab completion for upper character inputs in psql

2021-02-06 Thread Tang, Haiying
Hi Hackers,

When using psql I found there's no tab completion for upper character inputs. 
It's really inconvenient sometimes so I try to fix this problem in the attached 
patch.

Here is the examples to show what this patch can do.
Action: 
1. connect the db using psql 
2. input SQL command
3. enter TAB key(twice at the very first time)

Results:
[master]
postgres=# set a
all  allow_system_table_mods  application_name 
array_nulls
postgres=# set A

postgres=# set A

[patched]
postgres=# set a
all  allow_system_table_mods  application_name 
array_nulls
postgres=# set A
ALL  ALLOW_SYSTEM_TABLE_MODS  APPLICATION_NAME 
ARRAY_NULLS
postgres=# set A

Please take a check at this patch. Any comment is welcome.

Regards,
Tang 




0001-Support-tab-completion-for-upper-character-inputs-in.patch
Description: 0001-Support-tab-completion-for-upper-character-inputs-in.patch


Re: GlobalVisIsRemovableFullXid() vs GlobalVisCheckRemovableXid()

2021-02-06 Thread Peter Geoghegan
On Sat, Feb 6, 2021 at 7:40 PM Andres Freund  wrote:
> Looks like a mistake on my part... Probably a rename regex that somehow
> went wrong - I went back and forth on those names way too many
> times. Want me to push the fix?

Yes, please do. I could do it myself, but better that you do it
yourself, just in case.

-- 
Peter Geoghegan




Re: GlobalVisIsRemovableFullXid() vs GlobalVisCheckRemovableXid()

2021-02-06 Thread Andres Freund
Hi,

On 2021-02-06 12:27:30 -0800, Peter Geoghegan wrote:
> Why is GlobalVisIsRemovableFullXid() not named
> GlobalVisCheckRemovableFullXid() instead? ISTM that that name makes
> much more sense, since it is what I'd expect for a function that is
> the "Full XID equivalent" of GlobalVisCheckRemovableXid().
> 
> Note also that GlobalVisIsRemovableFullXid() is the only symbol name
> matching "GlobalVisIsRemovable*".

Looks like a mistake on my part... Probably a rename regex that somehow
went wrong - I went back and forth on those names way too many
times. Want me to push the fix?

Greetings,

Andres Freund




Re: Single transaction in the tablesync worker?

2021-02-06 Thread Peter Smith
On Sat, Feb 6, 2021 at 2:10 AM Petr Jelinek
 wrote:
>
> Hi,
>
> Some minor comments about code:
>
> > + else if (res->status == WALRCV_ERROR && missing_ok)
> > + {
> > + /* WARNING. Error, but missing_ok = true. */
> > + ereport(WARNING,
>
> I wonder if we need to add error code to the WalRcvExecResult and check
> for the appropriate ones here. Because this can for example return error
> because of timeout, not because slot is missing. Not sure if it matters
> for current callers though (but then maybe don't call the param
> missign_ok?).

You are right. The way we are using this function has evolved beyond
the original intention.
Probably renaming the param to something like "error_ok" would be more
appropriate now.


Kind Regards,
Peter Smith.
Fujitsu Australia




Re: There doesn't seem to be any case where PQputCopyEnd() returns 0

2021-02-06 Thread Kasahara Tatsuhito
On Fri, Feb 5, 2021 at 11:01 PM Fujii Masao  wrote:
>
>
>
> On 2021/02/05 16:52, Kasahara Tatsuhito wrote:
> > Hi,
> >
> > The following is written in the comments of PQputCopyEnd().
> >
> >   (snip)
> >   * Returns 1 if successful, 0 if data could not be sent (only possible
> >   * in nonblock mode), or -1 if an error occurs.
> >   (snip)
> >
> > The PQputCopyEnd() section of the manual (libpq.sgml) describes the 
> > following.
> >
> > The result is 1 if the termination message was sent; or in
> > nonblocking mode, this may only indicate that the termination
> > message was successfully queued.  (In nonblocking mode, to be
> > certain that the data has been sent, you should next wait for
> > write-ready and call , repeating 
> > until it
> > returns zero.)  Zero indicates that the function could not queue
> > the termination message because of full buffers; this will only
> > happen in nonblocking mode.  (In this case, wait for
> > write-ready and try the  call
> > again.)  If a hard error occurs, -1 is returned; you can use
> >  to retrieve details.
> >
> >
> > These says that 0 may be returned if a non-blocking mode is used, but
> > there doesn't seem to be any case where 0 is returned in the code of
> > PQputCopyEnd().
>
> I found the past discussion [1] about this issue.
>
> [1]
> https://www.postgresql.org/message-id/CA+Tgmobjj+0modbnmjy7ezeBFOBo9d2mAVcSPkzLx4LtZmc==g...@mail.gmail.com
Oh, thank you.
I understood what was unclear.

Best regards,

>
> Regards,
>
> --
> Fujii Masao
> Advanced Computing Technology Center
> Research and Development Headquarters
> NTT DATA CORPORATION



-- 
Tatsuhito Kasahara
kasahara.tatsuhito _at_ gmail.com




Re: Preserve attstattarget on REINDEX CONCURRENTLY

2021-02-06 Thread Michael Paquier
On Sat, Feb 06, 2021 at 10:39:53PM +0100, Tomas Vondra wrote:
> Copying this info in index_concurrently_swap seems a bit strange - we're
> copying other stuff there, but this is modifying something we've already
> copied before. I understand why we do it there to make this backpatchable,
> but maybe it'd be good to mention this in a comment (or at least the commit
> message). We could do this in the backbranches only and the "correct" way in
> master, but that does not seem worth it.

Thanks.

> One minor comment - the code says this:
> 
> /* no need for a refresh if both match */
> if (attstattarget == att->attstattarget)
> continue;
> 
> Isn't that just a different way to say "attstattarget is not default")?

For REINDEX CONCURRENTLY, yes.  I was thinking here about the case
where this code is used for other purposes in the future, where
attstattarget may not be -1.

I'll see about applying this stuff after the next version is tagged
then.
--
Michael


signature.asc
Description: PGP signature


Re: Bug in query rewriter - hasModifyingCTE not getting set

2021-02-06 Thread Tom Lane
After poking around a bit more, I notice that the hasRecursive flag
really ought to get propagated as well, since that's also an attribute
of the CTE list.  That omission doesn't seem to have any ill effect
today, since nothing in planning or execution looks at that flag, but
someday it might.  So what I think we should do is as attached.
(I re-integrated your example into with.sql, too.)

Given the very limited time remaining before the release wrap, I'm
going to go ahead and push this.

regards, tom lane

diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 0672f497c6..ba6f8a0507 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -536,6 +536,9 @@ rewriteRuleAction(Query *parsetree,
 		 *
 		 * This could possibly be fixed by using some sort of internally
 		 * generated ID, instead of names, to link CTE RTEs to their CTEs.
+		 * However, decompiling the results would be quite confusing; note the
+		 * merge of hasRecursive flags below, which could change the apparent
+		 * semantics of such redundantly-named CTEs.
 		 */
 		foreach(lc, parsetree->cteList)
 		{
@@ -557,6 +560,9 @@ rewriteRuleAction(Query *parsetree,
 		/* OK, it's safe to combine the CTE lists */
 		sub_action->cteList = list_concat(sub_action->cteList,
 		  copyObject(parsetree->cteList));
+		/* ... and don't forget about the associated flags */
+		sub_action->hasRecursive |= parsetree->hasRecursive;
+		sub_action->hasModifyingCTE |= parsetree->hasModifyingCTE;
 	}
 
 	/*
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index c519a61c4f..5e4ea29243 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -2277,6 +2277,33 @@ SELECT * FROM bug6051_2;
  3
 (3 rows)
 
+-- silly example to verify that hasModifyingCTE flag is propagated
+CREATE TEMP TABLE bug6051_3 AS
+  select a from generate_series(11,13) as a;
+CREATE RULE bug6051_3_ins AS ON INSERT TO bug6051_3 DO INSTEAD
+  SELECT i FROM bug6051_2;
+BEGIN; SET LOCAL force_parallel_mode = on;
+WITH t1 AS ( DELETE FROM bug6051_3 RETURNING * )
+  INSERT INTO bug6051_3 SELECT * FROM t1;
+ i 
+---
+ 1
+ 2
+ 3
+ 1
+ 2
+ 3
+ 1
+ 2
+ 3
+(9 rows)
+
+COMMIT;
+SELECT * FROM bug6051_3;
+ a 
+---
+(0 rows)
+
 -- a truly recursive CTE in the same list
 WITH RECURSIVE t(a) AS (
 	SELECT 0
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index f4ba0d8e39..0ffa44afa7 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -1070,6 +1070,22 @@ INSERT INTO bug6051 SELECT * FROM t1;
 SELECT * FROM bug6051;
 SELECT * FROM bug6051_2;
 
+-- silly example to verify that hasModifyingCTE flag is propagated
+CREATE TEMP TABLE bug6051_3 AS
+  select a from generate_series(11,13) as a;
+
+CREATE RULE bug6051_3_ins AS ON INSERT TO bug6051_3 DO INSTEAD
+  SELECT i FROM bug6051_2;
+
+BEGIN; SET LOCAL force_parallel_mode = on;
+
+WITH t1 AS ( DELETE FROM bug6051_3 RETURNING * )
+  INSERT INTO bug6051_3 SELECT * FROM t1;
+
+COMMIT;
+
+SELECT * FROM bug6051_3;
+
 -- a truly recursive CTE in the same list
 WITH RECURSIVE t(a) AS (
 	SELECT 0


Re: Bug in query rewriter - hasModifyingCTE not getting set

2021-02-06 Thread Tom Lane
Greg Nancarrow  writes:
> I found a bug in the query rewriter. If a query that has a modifying
> CTE is re-written, the hasModifyingCTE flag is not getting set in the
> re-written query.

Ugh.

> I've attached the patch with the suggested fix (reviewed by Amit Langote).

I think either the bit about rule_action is unnecessary, or most of
the code immediately above this is wrong, because it's only updating
flags in sub_action.  Why do you think it's necessary to change
rule_action in addition to sub_action?

regards, tom lane




Bug in query rewriter - hasModifyingCTE not getting set

2021-02-06 Thread Greg Nancarrow
Hi Hackers,

I found a bug in the query rewriter. If a query that has a modifying
CTE is re-written, the hasModifyingCTE flag is not getting set in the
re-written query. This bug can result in the query being allowed to
execute in parallel-mode, which results in an error.

I originally found the problem using INSERT (which doesn't actually
affect the current Postgres code, as it doesn't support INSERT in
parallel mode) but a colleague of mine (Hou, Zhijie) managed to
reproduce it using SELECT as well (see example below), and helped to
minimize the patch size.

I've attached the patch with the suggested fix (reviewed by Amit Langote).


The following reproduces the issue (adapted from a test case in the
"with" regression tests):

drop table if exists test_data1;
create table test_data1(a int, b int) ;
insert into test_data1 select generate_series(1,1000), generate_series(1,1000);
set force_parallel_mode=on;
CREATE TEMP TABLE bug6051 AS
select i from generate_series(1,3) as i;
SELECT * FROM bug6051;
CREATE RULE bug6051_ins AS ON INSERT TO bug6051 DO INSTEAD select a as
i from test_data1;
WITH t1 AS ( DELETE FROM bug6051 RETURNING * ) INSERT INTO bug6051
SELECT * FROM t1;

produces the error:

ERROR:  cannot assign XIDs during a parallel operation


Regards,
Greg Nancarrow
Fujitsu Australia


v1-0001-Fix-bug-in-the-query-rewriter-hasModifyingCTE-not-set.patch
Description: Binary data


Re: psql \copy from sends a lot of packets

2021-02-06 Thread Tom Lane
Heikki Linnakangas  writes:
> I just noticed that if you load a file using psql:
> it sends every line as a separate FE/BE protocol CopyData packet.
> ...
> I'll add this to the next commitfest. There's similar inefficiency in 
> the server side in COPY TO, but I'll leave that for another patch.

The FE/BE protocol documentation is pretty explicit about this:

Copy-in mode (data transfer to the server) is initiated when the
backend executes a COPY FROM STDIN SQL statement. The backend sends a
CopyInResponse message to the frontend. The frontend should then send
zero or more CopyData messages, forming a stream of input data. (The
message boundaries are not required to have anything to do with row
boundaries, although that is often a reasonable choice.)
...
Copy-out mode (data transfer from the server) is initiated when the
backend executes a COPY TO STDOUT SQL statement. The backend sends a
CopyOutResponse message to the frontend, followed by zero or more
CopyData messages (always one per row), followed by CopyDone.

So while changing psql isn't so much a problem, changing the server
is a wire protocol break.  Maybe we should do it anyway, but I'm
not sure.

regards, tom lane




psql \copy from sends a lot of packets

2021-02-06 Thread Heikki Linnakangas

Hi,

I just noticed that if you load a file using psql:

\copy  from 

it sends every line as a separate FE/BE protocol CopyData packet. That's 
pretty wasteful if the lines are narrow. The overhead of each CopyData 
packet is 5 bytes.


To demonstrate, I generated a simple test file with the string "foobar" 
repeated 10 million times:


$ perl -le 'for (1..1000) { print "foobar" }' > /tmp/testdata

and loaded that into a temp table with psql:

create temporary table copytest (t text) on commit delete rows;
\copy copytest from '/tmp/testdata';

I repeated and timed the \copy a few times; it takes about about 3 
seconds on my laptop:


postgres=# \copy copytest from '/tmp/testdata';
COPY 1000
Time: 3039.625 ms (00:03.040)

Wireshark says that that involved about 120 MB of network traffic. The 
size of the file on disk is only 70 MB.


The attached patch modifies psql so that it buffers up 8 kB of data into 
each CopyData message, instead of sending one per line. That makes the 
operation faster:


postgres=# \copy copytest from '/tmp/testdata';
COPY 1000
Time: 2490.268 ms (00:02.490)

And wireshark confirms that there's now only a bit over 70 MB of network 
traffic.


I'll add this to the next commitfest. There's similar inefficiency in 
the server side in COPY TO, but I'll leave that for another patch.


- Heikki
>From e49334de4eb51f5ba061bf48d1979a7ab8ef0de9 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Sun, 7 Feb 2021 00:10:05 +0200
Subject: [PATCH 1/1] In psql \copy from, send data to server in larger chunks.

Previously, we would send each line as a separate CopyData message.
That's pretty wasteful if the table is narrow, as each CopyData message
has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input
data into each CopyData message.
---
 src/bin/psql/copy.c | 114 +++-
 1 file changed, 69 insertions(+), 45 deletions(-)

diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index 78f0dc5a507..8c56f2470e2 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -581,77 +581,101 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 	else
 	{
 		bool		copydone = false;
+		int			buflen;
+		bool		at_line_begin = true;
 
+		if (showprompt)
+		{
+			const char *prompt = get_prompt(PROMPT_COPY, NULL);
+
+			fputs(prompt, stdout);
+			fflush(stdout);
+		}
+
+		/*
+		 * In text mode, We have to read the input one line at a time, so that
+		 * we can stop reading at the EOF marker (\.).  We mustn't read beyond
+		 * the EOF marker, because if the data is inlined in a SQL script, we
+		 * would eat up the commands after the EOF marker.
+		 */
+		buflen = 0;
 		while (!copydone)
-		{		/* for each input line ... */
-			bool		firstload;
-			bool		linedone;
+		{
+			int			linelen;
+			char	   *fgresult;
 
-			if (showprompt)
+			if (at_line_begin && showprompt)
 			{
-const char *prompt = get_prompt(PROMPT_COPY, NULL);
+const char *prompt;
 
+sigint_interrupt_enabled = false;
+
+prompt = get_prompt(PROMPT_COPY, NULL);
 fputs(prompt, stdout);
 fflush(stdout);
-			}
 
-			firstload = true;
-			linedone = false;
-
-			while (!linedone)
-			{	/* for each bufferload in line ... */
-int			linelen;
-char	   *fgresult;
-
-/* enable longjmp while waiting for input */
 sigint_interrupt_enabled = true;
+			}
 
-fgresult = fgets(buf, sizeof(buf), copystream);
-
-sigint_interrupt_enabled = false;
+			/* enable longjmp while waiting for input */
+			sigint_interrupt_enabled = true;
 
-if (!fgresult)
-{
-	copydone = true;
-	break;
-}
+			fgresult = fgets([buflen], COPYBUFSIZ - buflen, copystream);
 
-linelen = strlen(buf);
+			sigint_interrupt_enabled = false;
 
-/* current line is done? */
-if (linelen > 0 && buf[linelen - 1] == '\n')
-	linedone = true;
+			if (!fgresult)
+copydone = true;
+			else
+			{
+linelen = strlen(fgresult);
+buflen += linelen;
 
-/* check for EOF marker, but not on a partial line */
-if (firstload)
+if (buf[buflen - 1] == '\n')
 {
-	/*
-	 * This code erroneously assumes '\.' on a line alone
-	 * inside a quoted CSV string terminates the \copy.
-	 * https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org
-	 */
-	if (strcmp(buf, "\\.\n") == 0 ||
-		strcmp(buf, "\\.\r\n") == 0)
+	/* check for EOF marker, but not on a partial line */
+	if (at_line_begin)
 	{
-		copydone = true;
-		break;
+		/*
+		 * This code erroneously assumes '\.' on a line alone
+		 * inside a quoted CSV string terminates the \copy.
+		 * https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org
+		 */
+		if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
+			(linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
+		{
+			copydone = true;
+		}
 	}
 
-	firstload = 

Re: Preserve attstattarget on REINDEX CONCURRENTLY

2021-02-06 Thread Tomas Vondra

On 2/5/21 8:43 AM, Michael Paquier wrote:

On Fri, Feb 05, 2021 at 08:22:17AM +0100, Ronan Dunklau wrote:

I'm not surprised by this answer, the good news is it's being back-patched.


Yes, I have no problem with that.  Until this gets fixed, the damage
can be limited with an extra ALTER INDEX, that takes a
ShareUpdateExclusiveLock so there is no impact on the concurrent
activity.


Looks good to me ! Thank you.


Thanks for looking at it.  Tomas, do you have any comments?
--


Not really.

Copying this info in index_concurrently_swap seems a bit strange - we're 
copying other stuff there, but this is modifying something we've already 
copied before. I understand why we do it there to make this 
backpatchable, but maybe it'd be good to mention this in a comment (or 
at least the commit message). We could do this in the backbranches only 
and the "correct" way in master, but that does not seem worth it.


One minor comment - the code says this:

/* no need for a refresh if both match */
if (attstattarget == att->attstattarget)
continue;

Isn't that just a different way to say "attstattarget is not default")?


regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




GlobalVisIsRemovableFullXid() vs GlobalVisCheckRemovableXid()

2021-02-06 Thread Peter Geoghegan
Why is GlobalVisIsRemovableFullXid() not named
GlobalVisCheckRemovableFullXid() instead? ISTM that that name makes
much more sense, since it is what I'd expect for a function that is
the "Full XID equivalent" of GlobalVisCheckRemovableXid().

Note also that GlobalVisIsRemovableFullXid() is the only symbol name
matching "GlobalVisIsRemovable*".

Have I missed something?

-- 
Peter Geoghegan




Re: CLUSTER on partitioned index

2021-02-06 Thread Zhihong Yu
Hi,
For v7-0002-Implement-CLUSTER-of-partitioned-table.patch:

+* We have to build the list in a different memory context so it
will
+* survive the cross-transaction processing
+*/
+   old_context = MemoryContextSwitchTo(cluster_context);

cluster_context is not modified within the loop. Can the memory context
switching code be moved outside the loop ?

Cheers

On Sat, Feb 6, 2021 at 6:46 AM Justin Pryzby  wrote:

> On Mon, Jan 18, 2021 at 12:34:59PM -0600, Justin Pryzby wrote:
> > On Sat, Nov 28, 2020 at 08:03:02PM -0600, Justin Pryzby wrote:
> > > On Sun, Nov 15, 2020 at 07:53:35PM -0600, Justin Pryzby wrote:
> > > > On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote:
> > > > > On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote:
> > > > > > I'm attaching a counter-proposal to your catalog change, which
> preserves
> > > > > > indisclustered on children of clustered, partitioned indexes,
> and invalidates
> > > > > > indisclustered when attaching unclustered indexes.
> > > > >
> > > > > ..and now propagates CLUSTER ON to child indexes.
> > > > >
> > > > > I left this as separate patches to show what I mean and what's new
> while we
> > > > > discuss it.
> > > >
> > > > This fixes some omissions in the previous patch and error in its
> test cases.
> > > >
> > > > CLUSTER ON recurses to children, since I think a clustered parent
> index means
> > > > that all its child indexes are clustered.  "SET WITHOUT CLUSTER"
> doesn't have
> > > > to recurse to children, but I did it like that for consistency and
> it avoids
> > > > the need to special case InvalidOid.
> > >
> > > The previous patch failed pg_upgrade when restoring a clustered,
> parent index,
> > > since it's marked INVALID until indexes have been built on all child
> tables, so
> > > CLUSTER ON was rejected on invalid index.
> > >
> > > So I think CLUSTER ON needs to be a separate pg_dump object, to allow
> attaching
> > > the child index (thereby making the parent "valid") to happen before
> SET
> > > CLUSTER on the parent index.
> >
> > Rebased on b5913f612 and now a3dc92600.
>
> This resolves ORDER BY test failure with COLLATE "C".
>
> --
> Justin
>


Re: TRUNCATE on foreign table

2021-02-06 Thread Zhihong Yu
Hi,
+   if (strcmp(defel->defname, "truncatable") == 0)
+   server_truncatable = defGetBoolean(defel);

Looks like we can break out of the loop when the condition is met.

+   /* ExecForeignTruncate() is invoked for each server */

The method name in the comment is slightly different from the actual method
name.

+   if (strcmp(defel->defname, "truncatable") == 0)
+   truncatable = defGetBoolean(defel);

We can break out of the loop when the condition is met.

Cheers

On Sat, Feb 6, 2021 at 5:11 AM Kazutaka Onishi  wrote:

> Hello,
>
> The attached patch is for supporting "TRUNCATE" on  foreign tables.
>
> This patch includes:
> * Adding "ExecForeignTruncate" function into FdwRoutine.
> * Enabling "postgres_fdw" to use TRUNCATE.
>
> This patch was proposed by Kaigai-san in March 2020,
> but it was returned because it can't be applied to the latest source codes.
>
> Please refer to the discussion.
>
> https://www.postgresql.org/message-id/flat/CAOP8fzb-t3WVNLjGMC%2B4sV4AZa9S%3DMAQ7Q6pQoADMCf_1jp4ew%40mail.gmail.com#3b6c6ff85ff5c722b36c7a09b2dd7165
>
> I have fixed the patch due to submit it to Commit Fest 2021-03.
>
> regards,
>
> --
> --
> Kazutaka Onishi
> (oni...@heterodb.com)
>


Re: CLUSTER on partitioned index

2021-02-06 Thread Justin Pryzby
On Mon, Jan 18, 2021 at 12:34:59PM -0600, Justin Pryzby wrote:
> On Sat, Nov 28, 2020 at 08:03:02PM -0600, Justin Pryzby wrote:
> > On Sun, Nov 15, 2020 at 07:53:35PM -0600, Justin Pryzby wrote:
> > > On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote:
> > > > On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote:
> > > > > I'm attaching a counter-proposal to your catalog change, which 
> > > > > preserves
> > > > > indisclustered on children of clustered, partitioned indexes, and 
> > > > > invalidates
> > > > > indisclustered when attaching unclustered indexes.
> > > > 
> > > > ..and now propagates CLUSTER ON to child indexes.
> > > > 
> > > > I left this as separate patches to show what I mean and what's new 
> > > > while we
> > > > discuss it.
> > > 
> > > This fixes some omissions in the previous patch and error in its test 
> > > cases.
> > > 
> > > CLUSTER ON recurses to children, since I think a clustered parent index 
> > > means
> > > that all its child indexes are clustered.  "SET WITHOUT CLUSTER" doesn't 
> > > have
> > > to recurse to children, but I did it like that for consistency and it 
> > > avoids
> > > the need to special case InvalidOid.
> > 
> > The previous patch failed pg_upgrade when restoring a clustered, parent 
> > index,
> > since it's marked INVALID until indexes have been built on all child 
> > tables, so
> > CLUSTER ON was rejected on invalid index.
> > 
> > So I think CLUSTER ON needs to be a separate pg_dump object, to allow 
> > attaching
> > the child index (thereby making the parent "valid") to happen before SET
> > CLUSTER on the parent index.
> 
> Rebased on b5913f612 and now a3dc92600.

This resolves ORDER BY test failure with COLLATE "C".

-- 
Justin
>From 1101d6b8a44f5cc170816f305476750352dc06de Mon Sep 17 00:00:00 2001
From: Justin Pryzby 
Date: Thu, 26 Nov 2020 14:37:08 -0600
Subject: [PATCH v7 1/7] pg_dump: make CLUSTER ON a separate dump object..

..since it needs to be restored after any child indexes are restored *and
attached*.  The order needs to be:

1) restore child and parent index (order doesn't matter);
2) attach child index;
3) set cluster on child and parent index (order doesn't matter);
---
 src/bin/pg_dump/pg_dump.c  | 86 ++
 src/bin/pg_dump/pg_dump.h  |  8 
 src/bin/pg_dump/pg_dump_sort.c |  8 
 3 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d99b61e621..8dc8a42964 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -208,6 +208,7 @@ static void dumpSequence(Archive *fout, TableInfo *tbinfo);
 static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
 static void dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo);
+static void dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo);
 static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
@@ -7092,6 +7093,11 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 i_inddependcollversions;
 	int			ntups;
 
+	int	ncluster = 0;
+	IndexClusterInfo *clusterinfo;
+	clusterinfo = (IndexClusterInfo *)
+		pg_malloc0(numTables * sizeof(IndexClusterInfo));
+
 	for (i = 0; i < numTables; i++)
 	{
 		TableInfo  *tbinfo = [i];
@@ -7471,6 +7477,27 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 /* Plain secondary index */
 indxinfo[j].indexconstraint = 0;
 			}
+
+			/* Record each table's CLUSTERed index, if any */
+			if (indxinfo[j].indisclustered)
+			{
+IndxInfo   *index = [j];
+IndexClusterInfo *cluster = [ncluster];
+
+cluster->dobj.objType = DO_INDEX_CLUSTER_ON;
+cluster->dobj.catId.tableoid = 0;
+cluster->dobj.catId.oid = 0;
+AssignDumpId(>dobj);
+cluster->dobj.name = pg_strdup(index->dobj.name);
+cluster->dobj.namespace = index->indextable->dobj.namespace;
+cluster->index = index;
+cluster->indextable = [i];
+
+/* The CLUSTER ON depends on its index.. */
+addObjectDependency(>dobj, index->dobj.dumpId);
+
+ncluster++;
+			}
 		}
 
 		PQclear(res);
@@ -10323,6 +10350,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 		case DO_SUBSCRIPTION:
 			dumpSubscription(fout, (SubscriptionInfo *) dobj);
 			break;
+		case DO_INDEX_CLUSTER_ON:
+			dumpIndexClusterOn(fout, (IndexClusterInfo *) dobj);
+			break;
 		case DO_PRE_DATA_BOUNDARY:
 		case DO_POST_DATA_BOUNDARY:
 			/* never dumped, nothing to do */
@@ -16543,6 +16573,41 @@ getAttrName(int attrnum, TableInfo *tblInfo)
 	return NULL;/* keep compiler quiet */
 }
 
+/*
+ * dumpIndexClusterOn
+ *	  record that the index is clustered.
+ */
+static void
+dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo)
+{
+	DumpOptions *dopt = 

Re: Fuzz testing COPY FROM parsing

2021-02-06 Thread Andrew Dunstan


On 2/5/21 2:50 PM, Heikki Linnakangas wrote:
> On 05/02/2021 21:16, Andrew Dunstan wrote:
>>
>> On 2/5/21 10:54 AM, Stephen Frost wrote:
>>> * Heikki Linnakangas (hlinn...@iki.fi) wrote:
 I ran it for about 2 h on my laptop with the patch I was working on
 [2]. It
 didn't find any crashes, but it generated about 1300 input files
 that it
 considered "interesting" based on code coverage analysis. When I
 took those
 generated inputs, and ran them against unpatched and patched
 server, some
 inputs produced different results. So that revealed a couple of
 bugs in the
 patch. (I'll post a fixed patched version on that thread soon.)

 I hope others find this useful, too.
>>> Nice!  I wonder if there's a way to have a buildfarm member or other
>>> system doing this automatically on new commits and perhaps adding
>>> coverage for other things like the JSON code..
>>
>> Not easily in the buildfarm as it is today. We can easily create modules
>> for extensions and other things that don't require modification of core
>> code, but things that require patching core code are a whole different
>> story.
>
> It might be possible to call the fuzzer's HF_ITER() function from a C
> extension instead. So you would run a query like "SELECT
> next_fuzz_iter()" in a loop, and next_fuzz_iter() would be a C
> function that calls HF_ITER(), and executes the actual query with SPI.
>
> That said, I don't think it's important to run the fuzzer in the
> buildfarm. It should be enough to do that every once in a while, when
> you modify the COPY FROM code (or something else that you want to fuzz
> test). But we could easily include the test inputs generated by the
> fuzzer in the regular tests. We've usually been very frugal in adding
> tests, though, to keep the time it takes to run all the tests short.
>
>

This strikes me as a better design in any case.


cheers


andrew


--
Andrew Dunstan
EDB: https://www.enterprisedb.com





TRUNCATE on foreign table

2021-02-06 Thread Kazutaka Onishi
Hello,

The attached patch is for supporting "TRUNCATE" on  foreign tables.

This patch includes:
* Adding "ExecForeignTruncate" function into FdwRoutine.
* Enabling "postgres_fdw" to use TRUNCATE.

This patch was proposed by Kaigai-san in March 2020,
but it was returned because it can't be applied to the latest source codes.

Please refer to the discussion.
https://www.postgresql.org/message-id/flat/CAOP8fzb-t3WVNLjGMC%2B4sV4AZa9S%3DMAQ7Q6pQoADMCf_1jp4ew%40mail.gmail.com#3b6c6ff85ff5c722b36c7a09b2dd7165

I have fixed the patch due to submit it to Commit Fest 2021-03.

regards,

--
--
Kazutaka Onishi
(oni...@heterodb.com)
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..a9ce323a67 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+   List *frels_list,
+   List *frels_extra,
+   DropBehavior behavior,
+   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra != 0)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+	 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index b09dce63f5..17e0f250c8 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8229,6 +8229,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===
+-- test for TRUNCATE
+-- ===
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+   SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+   SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+   SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+   SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+   SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |x 
++--
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
++---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
++---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  

Re: pg_replication_origin_drop API potential race condition

2021-02-06 Thread Amit Kapila
On Sat, Feb 6, 2021 at 3:26 PM Petr Jelinek  wrote:
>
> On 06/02/2021 07:29, Amit Kapila wrote:
> > On Fri, Feb 5, 2021 at 6:45 PM Euler Taveira  wrote:
> >> - replorigin_drop(roident, true);
> >> + replorigin_drop_by_name(name, false /* missing_ok */ , true /* nowait */ 
> >> );
> >>
> >> A modern IDE would certainly show you the function definition that allows 
> >> you
> >> to check what each parameter value is without having to go back and forth. 
> >> I
> >> saw a few occurrences of this pattern in the source code and IMO it could 
> >> be
> >> used when it is not obvious what that value means. Booleans are easier to
> >> figure out, however, sometimes integer and text are not.
> >>
> > Fair enough, removed in the attached patch.
>
>
> To be fair the logical replication framework is full of these comments
> so it's pretty natural to add them to new code as well, but I agree with
> Euler that it's unnecessary with any reasonable development tooling.
>
> The patch as posted looks good to me,
>

Thanks, but today again testing this API, I observed that we can still
get "tuple concurrently deleted" because we are releasing the lock on
ReplicationOriginRelationId at the end of API replorigin_drop_by_name.
So there is no guarantee that invalidation reaches other backend doing
the same operation. I think we need to keep the lock till the end of
xact as we do in other drop operations (see DropTableSpace, dropdb).

-- 
With Regards,
Amit Kapila.




Re: pg_replication_origin_drop API potential race condition

2021-02-06 Thread Petr Jelinek

On 06/02/2021 07:29, Amit Kapila wrote:

On Fri, Feb 5, 2021 at 6:45 PM Euler Taveira  wrote:

On Fri, Feb 5, 2021, at 4:01 AM, Amit Kapila wrote:

I am not completely whether we should retire replorigin_drop or just
keep it for backward compatibility? What do you think? Anybody else
has any opinion?

We could certainly keep some code for backward compatibility, however, we have
to consider if it is (a) an exposed API and/or (b) a critical path. We break
several extensions every release due to Postgres extensibility. For (a), it is
not an exposed function, I mean, we are not changing
`pg_replication_origin_drop`. Hence, there is no need to keep it. In (b), we
could risk slowing down some critical paths that we decide to keep the old
function and create a new one that contains additional features. It is not the
case for this function. It is rare that an extension does not have a few #ifdef
if it supports multiple Postgres versions. IMO we should keep as little code as
possible into the core in favor of maintainability.


Yeah, that makes. I was a bit worried about pglogical but I think they
can easily update it if required, so removed as per your suggestion.
Petr, any opinion on this matter? I am planning to push this early
next week (by Tuesday) unless you or someone else think it is not a
good idea.


- replorigin_drop(roident, true);
+ replorigin_drop_by_name(name, false /* missing_ok */ , true /* nowait */ );

A modern IDE would certainly show you the function definition that allows you
to check what each parameter value is without having to go back and forth. I
saw a few occurrences of this pattern in the source code and IMO it could be
used when it is not obvious what that value means. Booleans are easier to
figure out, however, sometimes integer and text are not.


Fair enough, removed in the attached patch.



To be fair the logical replication framework is full of these comments 
so it's pretty natural to add them to new code as well, but I agree with 
Euler that it's unnecessary with any reasonable development tooling.


The patch as posted looks good to me, as an extension author I normally 
have origin cached by id, so the api change means I have to do name 
lookup now, but given this is just for drop, it does not really matter.


--
Petr





Re: Single transaction in the tablesync worker?

2021-02-06 Thread Petr Jelinek



On 06/02/2021 06:07, Amit Kapila wrote:

On Sat, Feb 6, 2021 at 6:22 AM Peter Smith  wrote:

On Sat, Feb 6, 2021 at 2:10 AM Petr Jelinek
 wrote:

+ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char 
syncslotname[NAMEDATALEN])
+{
+ if (syncslotname)
+ sprintf(syncslotname, "pg_%u_sync_%u", suboid, relid);
+ else
+ syncslotname = psprintf("pg_%u_sync_%u", suboid, relid);
+
+ return syncslotname;
+}

Given that we are now explicitly dropping slots, what happens here if we
have 2 different downstreams that happen to get same suboid and reloid,
will one of the drop the slot of the other one? Previously with the
cleanup being left to temp slot we'd at maximum got error when creating
it but with the new logic in LogicalRepSyncTableStart it feels like we
could get into situation where 2 downstreams are fighting over slot no?


I think so. See, if the alternative suggested below works or if you
have any other suggestions for the same?


The PG docs [1] says "there is only one copy of pg_subscription per
cluster, not one per database". IIUC that means it is not possible for
2 different subscriptions to have the same suboid.


I think he is talking about two different clusters having separate
subscriptions but point to the same publisher. In different clusters,
we can get the same subid/relid. I think we need a cluster-wide unique
identifier to distinguish among different subscribers. How about using
the system_identifier stored in the control file (we can use
GetSystemIdentifier to retrieve it).  I think one concern could be
that adding that to slot name could exceed the max length of slot
(NAMEDATALEN -1) but I don't think that is the case here
(pg_%u_sync_%u_UINT64_FORMAT (3 + 10 + 6 + 10 + 20 + '\0')). Note last
is system_identifier in this scheme.



Yep that's what I mean and system_identifier seems like a good choice to me.



Do you guys think that works or let me know if you have any other
better idea? Petr, is there a reason why such an identifier is not
considered originally, is there any risk in it?



Originally it was not considered likely because it's all based on 
pglogical/BDR work where ids are hashes of stuff that's unique across 
group of instances, not counter based like Oids in PostgreSQL and I 
simply didn't realize it could be a problem until reading this patch :)



--
Petr Jelinek