On Sat, Jan 26, 2019 at 2:14 PM Amit Kapila <amit.kapil...@gmail.com> wrote:
>
> On Sat, Jan 26, 2019 at 5:05 AM John Naylor <john.nay...@2ndquadrant.com> 
> wrote:
> >
> > So, in v19 we check pg_class.relpages and if it's
> > a heap and less than or equal the threshold we call stat on the 0th
> > segment to verify.
> >
>
> Okay, but the way logic is implemented appears clumsy to me.

> The function transfer_relfile has no clue about skipping of FSM stuff,
> but it contains comments about it.

Yeah, I wasn't entirely happy with how that turned out.

> I think there is some value in using the information from
> this function to skip fsm files, but the code doesn't appear to fit
> well, how about moving this check to new function
> new_cluster_needs_fsm()?

For v21, new_cluster_needs_fsm() has all responsibility for obtaining
the info it needs. I think this is much cleaner, but there is a small
bit of code duplication since it now has to form the file name. One
thing we could do is form the the base old/new file names in
transfer_single_new_db() and pass those to transfer_relfile(), which
will only add suffixes and segment numbers. We could then pass the
base old file name to new_cluster_needs_fsm() and use it as is. Not
sure if that's worthwhile, though.

> The order in which relkind and relpages is used in the above code is
> different from the order in which it is mentioned in the query, it
> won't matter, but keeping in order will make look code consistent.  I
> have made this and some more minor code adjustments in the attached
> patch.  If you like those, you can include them in the next version of
> your patch.

Okay, done.

-- 
John Naylor                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 2b70b65f43c05234b81e939b0ed49e34b97d5667 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnay...@gmail.com>
Date: Sun, 27 Jan 2019 21:42:07 +0100
Subject: [PATCH v21 3/3] During pg_upgrade, conditionally skip transfer of
 FSMs.

If a heap on the old cluster has 4 pages or fewer, and the old cluster
was PG v11 or earlier, don't copy or link the FSM. This will shrink
space usage for installations with large numbers of small tables.
---
 src/bin/pg_upgrade/info.c        | 16 +++++++--
 src/bin/pg_upgrade/pg_upgrade.h  |  6 ++++
 src/bin/pg_upgrade/relfilenode.c | 58 ++++++++++++++++++++++++++++++--
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 2f925f086c..902bfc647e 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
+	map->relpages = old_rel->relpages;
+	map->relkind = old_rel->relkind;
 
 	/*
 	 * old_relfilenode might differ from pg_class.oid (and hence
@@ -418,6 +420,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	char	   *nspname = NULL;
 	char	   *relname = NULL;
 	char	   *tablespace = NULL;
+	char	   *relkind = NULL;
 	int			i_spclocation,
 				i_nspname,
 				i_relname,
@@ -425,7 +428,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 				i_indtable,
 				i_toastheap,
 				i_relfilenode,
-				i_reltablespace;
+				i_reltablespace,
+				i_relpages,
+				i_relkind;
 	char		query[QUERY_ALLOC];
 	char	   *last_namespace = NULL,
 			   *last_tablespace = NULL;
@@ -494,7 +499,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	 */
 	snprintf(query + strlen(query), sizeof(query) - strlen(query),
 			 "SELECT all_rels.*, n.nspname, c.relname, "
-			 "  c.relfilenode, c.reltablespace, %s "
+			 "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
 			 "FROM (SELECT * FROM regular_heap "
 			 "      UNION ALL "
 			 "      SELECT * FROM toast_heap "
@@ -525,6 +530,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	i_relname = PQfnumber(res, "relname");
 	i_relfilenode = PQfnumber(res, "relfilenode");
 	i_reltablespace = PQfnumber(res, "reltablespace");
+	i_relpages = PQfnumber(res, "relpages");
+	i_relkind = PQfnumber(res, "relkind");
 	i_spclocation = PQfnumber(res, "spclocation");
 
 	for (relnum = 0; relnum < ntups; relnum++)
@@ -556,6 +563,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 		curr->relname = pg_strdup(relname);
 
 		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
+		curr->relpages = atoi(PQgetvalue(res, relnum, i_relpages));
+
+		relkind = PQgetvalue(res, relnum, i_relkind);
+		curr->relkind = relkind[0];
+
 		curr->tblsp_alloc = false;
 
 		/* Is the tablespace oid non-default? */
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 2f67eee22b..baeb8ff0f8 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -147,6 +147,8 @@ typedef struct
 	char	   *tablespace;		/* tablespace path; "" for cluster default */
 	bool		nsp_alloc;		/* should nspname be freed? */
 	bool		tblsp_alloc;	/* should tablespace be freed? */
+	int32		relpages;		/* # of pages -- see pg_class.h */
+	char		relkind;		/* relation kind -- see pg_class.h */
 } RelInfo;
 
 typedef struct
@@ -173,6 +175,10 @@ typedef struct
 	 */
 	Oid			old_relfilenode;
 	Oid			new_relfilenode;
+
+	int32		relpages;		/* # of pages -- see pg_class.h */
+	char		relkind;		/* relation kind -- see pg_class.h */
+
 	/* the rest are used only for logging and error reporting */
 	char	   *nspname;		/* namespaces */
 	char	   *relname;
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index 0c78073f0e..ce9f3ecf83 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -14,10 +14,12 @@
 #include <sys/stat.h>
 #include "catalog/pg_class_d.h"
 #include "access/transam.h"
+#include "storage/freespace.h"
 
 
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
 static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
+static bool new_cluster_needs_fsm(FileNameMap *map);
 
 
 /*
@@ -172,9 +174,13 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
 			{
 				/*
-				 * Copy/link any fsm and vm files, if they exist
+				 * Transfer any FSM files if they would be created in the
+				 * new cluster.
 				 */
-				transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+				if (new_cluster_needs_fsm(&maps[mapnum]))
+					transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+
+				/* Transfer any VM files if we can trust their contents. */
 				if (vm_crashsafe_match)
 					transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
 			}
@@ -278,3 +284,51 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 			}
 	}
 }
+
+/*
+ * Return false for small heaps if we're upgrading across version 12,
+ * the first where small heap relations don't have FSMs by default.
+ */
+static bool
+new_cluster_needs_fsm(FileNameMap *map)
+{
+	char		old_primary_file[MAXPGPATH];
+	struct stat statbuf;
+
+	if (!(GET_MAJOR_VERSION(old_cluster.major_version) <= 1100 &&
+		GET_MAJOR_VERSION(new_cluster.major_version) >= 1200))
+		return true;
+
+	/* Always transfer FSMs of non-heap relations. */
+	if (map->relkind != RELKIND_RELATION &&
+		map->relkind != RELKIND_TOASTVALUE)
+		return true;
+
+	/*
+	 * If pg_class.relpages falsely reports that the heap is above the
+	 * threshold, we will transfer a FSM when we don't need to, but this
+	 * is harmless.
+	 */
+	if (map->relpages > HEAP_FSM_CREATION_THRESHOLD)
+		return true;
+
+	/* Determine file name of the first segment of the main fork. */
+	snprintf(old_primary_file, sizeof(old_primary_file), "%s%s/%u/%u",
+			 map->old_tablespace,
+			 map->old_tablespace_suffix,
+			 map->old_db_oid,
+			 map->old_relfilenode);
+
+	/*
+	 * If pg_class.relpages falsely reports that the heap is below the
+	 * threshold, a FSM would be skipped when we actually need it.  To guard
+	 * against this, we verify the size of the first segment.
+	 */
+	if (stat(old_primary_file, &statbuf) != 0)
+		pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\"): %s\n",
+				 map->nspname, map->relname, old_primary_file, strerror(errno));
+	else if (statbuf.st_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ)
+		return true;
+	else
+		return false;
+}
-- 
2.17.1

Reply via email to