On Mon, Sep  9, 2013 at 07:39:00PM -0400, Bruce Momjian wrote:
> > In the case of tablespaces, I should have thought you could keep a
> > hash table of the names and just store an entry id in the table
> > structure. But that's just my speculation without actually looking
> > at the code, so don't take my word for it :-)
> 
> Yes, please feel free to improve the code.  I improved pg_upgrade CPU
> usage for a lerge number of objects, but never thought to look at memory
> usage.  It would be a big win to just palloc/pfree the memory, rather
> than allocate tones of memory.  If you don't get to it, I will in a few
> weeks.

Thanks you for pointing out this problem.  I have researched the cause
and the major problem was that I was allocating the maximum path length
in a struct rather than allocating just the length I needed, and was not
reusing string pointers that I knew were not going to change.

The updated attached patch significantly decreases memory consumption:

        tables          orig      patch     % decrease
        ----
        1            27,168 kB  27,168 kB        0
        1k           46,136 kB  27,920 kB       39
        2k           65,224 kB  28,796 kB       56
        4k          103,276 kB  30,472 kB       70
        8k          179,512 kB  33,900 kB       81
        16k         331,860 kB  40,788 kB       88
        32k         636,544 kB  54,572 kB       91
        64k       1,245,920 kB  81,876 kB       93

As you can see, a database with 64k tables shows a 93% decrease in
memory use.  I plan to apply this for PG 9.4.

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + Everyone has their own god. +
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
new file mode 100644
index f04707f..acf8660
*** a/contrib/pg_upgrade/check.c
--- b/contrib/pg_upgrade/check.c
*************** create_script_for_old_cluster_deletion(c
*** 707,726 ****
  			fprintf(script, "\n");
  			/* remove PG_VERSION? */
  			if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
! 				fprintf(script, RM_CMD " %s%s%cPG_VERSION\n",
  						fix_path_separator(os_info.old_tablespaces[tblnum]),
- 						fix_path_separator(old_cluster.tablespace_suffix),
  						PATH_SEPARATOR);
  
  			for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
! 			{
! 				fprintf(script, RMDIR_CMD " %s%s%c%d\n",
  						fix_path_separator(os_info.old_tablespaces[tblnum]),
- 						fix_path_separator(old_cluster.tablespace_suffix),
  						PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid);
- 			}
  		}
  		else
  
  			/*
  			 * Simply delete the tablespace directory, which might be ".old"
--- 707,724 ----
  			fprintf(script, "\n");
  			/* remove PG_VERSION? */
  			if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
! 				fprintf(script, RM_CMD " %s%cPG_VERSION\n",
  						fix_path_separator(os_info.old_tablespaces[tblnum]),
  						PATH_SEPARATOR);
  
  			for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
! 				fprintf(script, RMDIR_CMD " %s%c%d\n",
  						fix_path_separator(os_info.old_tablespaces[tblnum]),
  						PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid);
  		}
  		else
+ 		{
+ 			char	*suffix_path = pg_strdup(old_cluster.tablespace_suffix);
  
  			/*
  			 * Simply delete the tablespace directory, which might be ".old"
*************** create_script_for_old_cluster_deletion(c
*** 728,734 ****
  			 */
  			fprintf(script, RMDIR_CMD " %s%s\n",
  					fix_path_separator(os_info.old_tablespaces[tblnum]),
! 					fix_path_separator(old_cluster.tablespace_suffix));
  	}
  
  	fclose(script);
--- 726,734 ----
  			 */
  			fprintf(script, RMDIR_CMD " %s%s\n",
  					fix_path_separator(os_info.old_tablespaces[tblnum]),
! 					fix_path_separator(suffix_path));
! 			pfree(suffix_path);
! 		}
  	}
  
  	fclose(script);
diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
new file mode 100644
index 3d0dd1a..fd083de
*** a/contrib/pg_upgrade/info.c
--- b/contrib/pg_upgrade/info.c
*************** create_rel_filename_map(const char *old_
*** 108,127 ****
  		 * relation belongs to the default tablespace, hence relfiles should
  		 * exist in the data directories.
  		 */
! 		strlcpy(map->old_tablespace, old_data, sizeof(map->old_tablespace));
! 		strlcpy(map->new_tablespace, new_data, sizeof(map->new_tablespace));
! 		strlcpy(map->old_tablespace_suffix, "/base", sizeof(map->old_tablespace_suffix));
! 		strlcpy(map->new_tablespace_suffix, "/base", sizeof(map->new_tablespace_suffix));
  	}
  	else
  	{
  		/* relation belongs to a tablespace, so use the tablespace location */
! 		strlcpy(map->old_tablespace, old_rel->tablespace, sizeof(map->old_tablespace));
! 		strlcpy(map->new_tablespace, new_rel->tablespace, sizeof(map->new_tablespace));
! 		strlcpy(map->old_tablespace_suffix, old_cluster.tablespace_suffix,
! 				sizeof(map->old_tablespace_suffix));
! 		strlcpy(map->new_tablespace_suffix, new_cluster.tablespace_suffix,
! 				sizeof(map->new_tablespace_suffix));
  	}
  
  	map->old_db_oid = old_db->db_oid;
--- 108,125 ----
  		 * relation belongs to the default tablespace, hence relfiles should
  		 * exist in the data directories.
  		 */
! 		map->old_tablespace = old_data;
! 		map->new_tablespace = new_data;
! 		map->old_tablespace_suffix = "/base";
! 		map->new_tablespace_suffix = "/base";
  	}
  	else
  	{
  		/* relation belongs to a tablespace, so use the tablespace location */
! 		map->old_tablespace = old_rel->tablespace;
! 		map->new_tablespace = new_rel->tablespace;
! 		map->old_tablespace_suffix = old_cluster.tablespace_suffix;
! 		map->new_tablespace_suffix = new_cluster.tablespace_suffix;
  	}
  
  	map->old_db_oid = old_db->db_oid;
*************** get_db_infos(ClusterInfo *cluster)
*** 231,237 ****
  	{
  		dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid));
  		dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname));
! 		snprintf(dbinfos[tupnum].db_tblspace, sizeof(dbinfos[tupnum].db_tblspace), "%s",
  				 PQgetvalue(res, tupnum, i_spclocation));
  	}
  	PQclear(res);
--- 229,235 ----
  	{
  		dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid));
  		dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname));
! 		snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s",
  				 PQgetvalue(res, tupnum, i_spclocation));
  	}
  	PQclear(res);
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 264,269 ****
--- 262,268 ----
  	int			num_rels = 0;
  	char	   *nspname = NULL;
  	char	   *relname = NULL;
+ 	char	   *tablespace = NULL;
  	int			i_spclocation,
  				i_nspname,
  				i_relname,
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 271,276 ****
--- 270,276 ----
  				i_relfilenode,
  				i_reltablespace;
  	char		query[QUERY_ALLOC];
+ 	char	   *last_namespace = NULL, *last_tablespace = NULL;
  
  	/*
  	 * pg_largeobject contains user data that does not appear in pg_dumpall
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 366,391 ****
  	for (relnum = 0; relnum < ntups; relnum++)
  	{
  		RelInfo    *curr = &relinfos[num_rels++];
- 		const char *tblspace;
  
  		curr->reloid = atooid(PQgetvalue(res, relnum, i_oid));
  
  		nspname = PQgetvalue(res, relnum, i_nspname);
! 		curr->nspname = pg_strdup(nspname);
  
  		relname = PQgetvalue(res, relnum, i_relname);
  		curr->relname = pg_strdup(relname);
  
  		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
  
  		if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0)
! 			/* Might be "", meaning the cluster default location. */
! 			tblspace = PQgetvalue(res, relnum, i_spclocation);
! 		else
! 			/* A zero reltablespace indicates the database tablespace. */
! 			tblspace = dbinfo->db_tblspace;
  
! 		strlcpy(curr->tablespace, tblspace, sizeof(curr->tablespace));
  	}
  	PQclear(res);
  
--- 366,418 ----
  	for (relnum = 0; relnum < ntups; relnum++)
  	{
  		RelInfo    *curr = &relinfos[num_rels++];
  
  		curr->reloid = atooid(PQgetvalue(res, relnum, i_oid));
  
  		nspname = PQgetvalue(res, relnum, i_nspname);
! 		curr->nsp_alloc = false;
! 
! 		/*
! 		 * Many of the namespace and tablespace strings are identical,
! 		 * so we try to reuse the allocated string pointers where possible
! 		 * to reduce memory consumption.
! 		 */
! 		/* Can we reuse the previous string allocation? */
! 		if (last_namespace && strcmp(nspname, last_namespace) == 0)
! 			curr->nspname = last_namespace;
! 		else
! 		{
! 			last_namespace = curr->nspname = pg_strdup(nspname);
! 			curr->nsp_alloc = true;
! 		}
  
  		relname = PQgetvalue(res, relnum, i_relname);
  		curr->relname = pg_strdup(relname);
  
  		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
+ 		curr->tblsp_alloc = false;
  
+ 		/* Is the tablespace oid non-zero? */
  		if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0)
! 		{
! 			/*
! 			 * The tablespace location might be "", meaning the cluster
! 			 * default location, i.e. pg_default or pg_global.
! 			 */
! 			tablespace = PQgetvalue(res, relnum, i_spclocation);
  
! 			/* Can we reuse the previous string allocation? */
! 			if (last_tablespace && strcmp(tablespace, last_tablespace) == 0)
! 				curr->tablespace = last_tablespace;
! 			else
! 			{
! 				last_tablespace = curr->tablespace = pg_strdup(tablespace);
! 				curr->tblsp_alloc = true;
! 			}
! 		}
! 		else
! 			/* A zero reltablespace oid indicates the database tablespace. */
! 			curr->tablespace = dbinfo->db_tablespace;
  	}
  	PQclear(res);
  
*************** free_rel_infos(RelInfoArr *rel_arr)
*** 419,426 ****
  
  	for (relnum = 0; relnum < rel_arr->nrels; relnum++)
  	{
! 		pg_free(rel_arr->rels[relnum].nspname);
  		pg_free(rel_arr->rels[relnum].relname);
  	}
  	pg_free(rel_arr->rels);
  	rel_arr->nrels = 0;
--- 446,456 ----
  
  	for (relnum = 0; relnum < rel_arr->nrels; relnum++)
  	{
! 		if (rel_arr->rels[relnum].nsp_alloc)
! 			pg_free(rel_arr->rels[relnum].nspname);
  		pg_free(rel_arr->rels[relnum].relname);
+ 		if (rel_arr->rels[relnum].tblsp_alloc)
+ 			pg_free(rel_arr->rels[relnum].tablespace);
  	}
  	pg_free(rel_arr->rels);
  	rel_arr->nrels = 0;
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
new file mode 100644
index de487bc..389ec93
*** a/contrib/pg_upgrade/pg_upgrade.h
--- b/contrib/pg_upgrade/pg_upgrade.h
*************** typedef struct
*** 127,133 ****
  	Oid			reloid;			/* relation oid */
  	Oid			relfilenode;	/* relation relfile node */
  	/* relation tablespace path, or "" for the cluster default */
! 	char		tablespace[MAXPGPATH];
  } RelInfo;
  
  typedef struct
--- 127,135 ----
  	Oid			reloid;			/* relation oid */
  	Oid			relfilenode;	/* relation relfile node */
  	/* relation tablespace path, or "" for the cluster default */
! 	char	   *tablespace;
! 	bool		nsp_alloc;
! 	bool		tblsp_alloc;
  } RelInfo;
  
  typedef struct
*************** typedef struct
*** 141,150 ****
   */
  typedef struct
  {
! 	char		old_tablespace[MAXPGPATH];
! 	char		new_tablespace[MAXPGPATH];
! 	char		old_tablespace_suffix[MAXPGPATH];
! 	char		new_tablespace_suffix[MAXPGPATH];
  	Oid			old_db_oid;
  	Oid			new_db_oid;
  
--- 143,152 ----
   */
  typedef struct
  {
! 	const char		*old_tablespace;
! 	const char		*new_tablespace;
! 	const char		*old_tablespace_suffix;
! 	const char		*new_tablespace_suffix;
  	Oid			old_db_oid;
  	Oid			new_db_oid;
  
*************** typedef struct
*** 166,172 ****
  {
  	Oid			db_oid;			/* oid of the database */
  	char	   *db_name;		/* database name */
! 	char		db_tblspace[MAXPGPATH]; /* database default tablespace path */
  	RelInfoArr	rel_arr;		/* array of all user relinfos */
  } DbInfo;
  
--- 168,174 ----
  {
  	Oid			db_oid;			/* oid of the database */
  	char	   *db_name;		/* database name */
! 	char		db_tablespace[MAXPGPATH]; /* database default tablespace path */
  	RelInfoArr	rel_arr;		/* array of all user relinfos */
  } DbInfo;
  
*************** typedef struct
*** 256,262 ****
  	Oid			pg_database_oid;	/* OID of pg_database relation */
  	Oid			install_role_oid;		/* OID of connected role */
  	Oid			role_count;		/* number of roles defined in the cluster */
! 	char	   *tablespace_suffix;		/* directory specification */
  } ClusterInfo;
  
  
--- 258,264 ----
  	Oid			pg_database_oid;	/* OID of pg_database relation */
  	Oid			install_role_oid;		/* OID of connected role */
  	Oid			role_count;		/* number of roles defined in the cluster */
! 	const char *tablespace_suffix;		/* directory specification */
  } ClusterInfo;
  
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to