Currently, initdb parses locale -a output to populate pg_collation.  If
additional collations are installed in the operating system, it is not
possible to repeat this process, only by doing each step manually.  So I
propose to move this to a backend function that can be called
separately, and have initdb call that.  Running this logic in the
backend instead of initdb also makes the code simpler.  If we add other
collation providers such as ICU, initdb doesn't need to know about that
at all, because all the logic would be contained in the backend.

Here is an example:

    select pg_import_system_collations(if_not_exists => false, schema =>
'test');

(Specifying the schema also allows testing this without overwriting
pg_catalog.)

I thought about making this a top-level command (IMPORT COLLATIONS ...
?) but decided against it for now, to keep it simple.  Right now, this
is more of a refactoring.  Documentation could be added if we decide so.

-- 
Peter Eisentraut              http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 97fb7f992b95d2ca8725011dc141dad88051a8cd Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pete...@gmx.net>
Date: Thu, 13 Oct 2016 12:00:00 -0400
Subject: [PATCH] Add function to import operation system collations

Move this logic out of initdb into a user-callable function.  This
simplifies the code and makes it possible to update the standard
collations later on if additional operating system collations appear.
---
 src/backend/catalog/pg_collation.c    |  18 +++-
 src/backend/commands/collationcmds.c  | 146 +++++++++++++++++++++++++++++-
 src/bin/initdb/initdb.c               | 164 +---------------------------------
 src/include/catalog/pg_collation_fn.h |   3 +-
 src/include/catalog/pg_proc.h         |   3 +
 5 files changed, 167 insertions(+), 167 deletions(-)

diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index f37cf37..cda64c4 100644
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -41,7 +41,8 @@ Oid
 CollationCreate(const char *collname, Oid collnamespace,
 				Oid collowner,
 				int32 collencoding,
-				const char *collcollate, const char *collctype)
+				const char *collcollate, const char *collctype,
+				bool if_not_exists)
 {
 	Relation	rel;
 	TupleDesc	tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
 							  PointerGetDatum(collname),
 							  Int32GetDatum(collencoding),
 							  ObjectIdGetDatum(collnamespace)))
-		ereport(ERROR,
+	{
+		if (if_not_exists)
+		{
+			ereport(NOTICE,
 				(errcode(ERRCODE_DUPLICATE_OBJECT),
-				 errmsg("collation \"%s\" for encoding \"%s\" already exists",
+				 errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
 						collname, pg_encoding_to_char(collencoding))));
+			return InvalidOid;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("collation \"%s\" for encoding \"%s\" already exists",
+							collname, pg_encoding_to_char(collencoding))));
+	}
 
 	/*
 	 * Also forbid matching an any-encoding entry.  This test of course is not
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 9bba748..062e3b6 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
 							 GetUserId(),
 							 GetDatabaseEncoding(),
 							 collcollate,
-							 collctype);
+							 collctype,
+							 false);
+
+	if (!newoid)
+		return InvalidObjectAddress;
 
 	ObjectAddressSet(address, CollationRelationId, newoid);
 
@@ -177,3 +181,143 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
 				 errmsg("collation \"%s\" already exists in schema \"%s\"",
 						collname, get_namespace_name(nspOid))));
 }
+
+
+/*
+ * "Normalize" a locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro").  Return true if a new, different name was
+ * generated.
+ */
+static bool
+normalize_locale_name(char *new, const char *old)
+{
+	char	   *n = new;
+	const char *o = old;
+	bool		changed = false;
+
+	while (*o)
+	{
+		if (*o == '.')
+		{
+			/* skip over encoding tag such as ".utf8" or ".UTF-8" */
+			o++;
+			while ((*o >= 'A' && *o <= 'Z')
+				   || (*o >= 'a' && *o <= 'z')
+				   || (*o >= '0' && *o <= '9')
+				   || (*o == '-'))
+				o++;
+			changed = true;
+		}
+		else
+			*n++ = *o++;
+	}
+	*n = '\0';
+
+	return changed;
+}
+
+
+Datum pg_import_system_collations(PG_FUNCTION_ARGS);
+
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+	bool		if_not_exists = PG_GETARG_BOOL(0);
+	Oid         nspid = PG_GETARG_OID(1);
+
+	FILE	   *locale_a_handle;
+	char		localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
+	int			count = 0;
+
+	locale_a_handle = OpenPipeStream("locale -a", "r");
+	if (locale_a_handle == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not execute command \"%s\": %m",
+						"locale -a")));
+
+	while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+	{
+		int			i;
+		size_t		len;
+		int			enc;
+		bool		skip;
+		char		alias[NAMEDATALEN];
+
+		len = strlen(localebuf);
+
+		if (len == 0 || localebuf[len - 1] != '\n')
+		{
+			elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
+			continue;
+		}
+		localebuf[len - 1] = '\0';
+
+		/*
+		 * Some systems have locale names that don't consist entirely of ASCII
+		 * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
+		 * pretty silly, since we need the locale itself to interpret the
+		 * non-ASCII characters. We can't do much with those, so we filter
+		 * them out.
+		 */
+		skip = false;
+		for (i = 0; i < len; i++)
+		{
+			if (IS_HIGHBIT_SET(localebuf[i]))
+			{
+				skip = true;
+				break;
+			}
+		}
+		if (skip)
+		{
+			elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
+			continue;
+		}
+
+		enc = pg_get_encoding_from_locale(localebuf, false);
+		if (enc < 0)
+		{
+			/* error message printed by pg_get_encoding_from_locale() */
+			continue;
+		}
+		if (!PG_VALID_BE_ENCODING(enc))
+			continue;			/* ignore locales for client-only encodings */
+		if (enc == PG_SQL_ASCII)
+			continue;			/* C/POSIX are already in the catalog */
+
+		count++;
+
+		CollationCreate(localebuf, nspid, GetUserId(), enc,
+						localebuf, localebuf, if_not_exists);
+
+		CommandCounterIncrement();
+
+		/*
+		 * Generate aliases such as "en_US" in addition to "en_US.utf8" for
+		 * ease of use.  Note that collation names are unique per encoding
+		 * only, so this doesn't clash with "en_US" for LATIN1, say.
+		 *
+		 * This always runs in "if not exists" mode, to skip aliases that
+		 * conflict with an existing locale name for the same encoding.  For
+		 * example, "br_FR.iso88591" is normalized to "br_FR", both for
+		 * encoding LATIN1.  But the unnormalized locale "br_FR" already
+		 * exists for LATIN1.
+		 */
+		if (normalize_locale_name(alias, localebuf))
+		{
+			CollationCreate(alias, nspid, GetUserId(), enc,
+							localebuf, localebuf, true);
+			CommandCounterIncrement();
+		}
+	}
+
+	ClosePipeStream(locale_a_handle);
+
+	if (count == 0)
+		ereport(ERROR,
+				(errmsg("no usable system locales were found")));
+
+	PG_RETURN_VOID();
+}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index c8a8c52..498729c 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1587,42 +1587,6 @@ setup_description(FILE *cmdfd)
 	PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
 }
 
-#ifdef HAVE_LOCALE_T
-/*
- * "Normalize" a locale name, stripping off encoding tags such as
- * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
- * -> "br_FR@euro").  Return true if a new, different name was
- * generated.
- */
-static bool
-normalize_locale_name(char *new, const char *old)
-{
-	char	   *n = new;
-	const char *o = old;
-	bool		changed = false;
-
-	while (*o)
-	{
-		if (*o == '.')
-		{
-			/* skip over encoding tag such as ".utf8" or ".UTF-8" */
-			o++;
-			while ((*o >= 'A' && *o <= 'Z')
-				   || (*o >= 'a' && *o <= 'z')
-				   || (*o >= '0' && *o <= '9')
-				   || (*o == '-'))
-				o++;
-			changed = true;
-		}
-		else
-			*n++ = *o++;
-	}
-	*n = '\0';
-
-	return changed;
-}
-#endif   /* HAVE_LOCALE_T */
-
 /*
  * populate pg_collation
  */
@@ -1630,134 +1594,10 @@ static void
 setup_collation(FILE *cmdfd)
 {
 #if defined(HAVE_LOCALE_T) && !defined(WIN32)
-	int			i;
-	FILE	   *locale_a_handle;
-	char		localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
-	int			count = 0;
-
-	locale_a_handle = popen_check("locale -a", "r");
-	if (!locale_a_handle)
-		return;					/* complaint already printed */
-
-	PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
-				"	collname name, "
-				"	locale name, "
-				"	encoding int) WITHOUT OIDS;\n\n");
-
-	while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
-	{
-		size_t		len;
-		int			enc;
-		bool		skip;
-		char	   *quoted_locale;
-		char		alias[NAMEDATALEN];
-
-		len = strlen(localebuf);
-
-		if (len == 0 || localebuf[len - 1] != '\n')
-		{
-			if (debug)
-				fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
-						progname, localebuf);
-			continue;
-		}
-		localebuf[len - 1] = '\0';
-
-		/*
-		 * Some systems have locale names that don't consist entirely of ASCII
-		 * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
-		 * pretty silly, since we need the locale itself to interpret the
-		 * non-ASCII characters. We can't do much with those, so we filter
-		 * them out.
-		 */
-		skip = false;
-		for (i = 0; i < len; i++)
-		{
-			if (IS_HIGHBIT_SET(localebuf[i]))
-			{
-				skip = true;
-				break;
-			}
-		}
-		if (skip)
-		{
-			if (debug)
-				fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
-						progname, localebuf);
-			continue;
-		}
-
-		enc = pg_get_encoding_from_locale(localebuf, debug);
-		if (enc < 0)
-		{
-			/* error message printed by pg_get_encoding_from_locale() */
-			continue;
-		}
-		if (!PG_VALID_BE_ENCODING(enc))
-			continue;			/* ignore locales for client-only encodings */
-		if (enc == PG_SQL_ASCII)
-			continue;			/* C/POSIX are already in the catalog */
-
-		count++;
-
-		quoted_locale = escape_quotes(localebuf);
-
-		PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-					   quoted_locale, quoted_locale, enc);
-
-		/*
-		 * Generate aliases such as "en_US" in addition to "en_US.utf8" for
-		 * ease of use.  Note that collation names are unique per encoding
-		 * only, so this doesn't clash with "en_US" for LATIN1, say.
-		 */
-		if (normalize_locale_name(alias, localebuf))
-		{
-			char	   *quoted_alias = escape_quotes(alias);
-
-			PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-						   quoted_alias, quoted_locale, enc);
-			free(quoted_alias);
-		}
-		free(quoted_locale);
-	}
+	PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
 
 	/* Add an SQL-standard name */
-	PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
-
-	/*
-	 * When copying collations to the final location, eliminate aliases that
-	 * conflict with an existing locale name for the same encoding.  For
-	 * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
-	 * LATIN1.  But the unnormalized locale "br_FR" already exists for LATIN1.
-	 * Prefer the alias that matches the OS locale name, else the first locale
-	 * name by sort order (arbitrary choice to be deterministic).
-	 *
-	 * Also, eliminate any aliases that conflict with pg_collation's
-	 * hard-wired entries for "C" etc.
-	 */
-	PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
-				" SELECT DISTINCT ON (collname, encoding)"
-				"   collname, "
-				"   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
-				"   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
-				"   encoding, locale, locale "
-				"  FROM tmp_pg_collation"
-				"  WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
-	 "  ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
-
-	/*
-	 * Even though the table is temp, drop it explicitly so it doesn't get
-	 * copied into template0/postgres databases.
-	 */
-	PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
-
-	pclose(locale_a_handle);
-
-	if (count == 0 && !debug)
-	{
-		printf(_("No usable system locales were found.\n"));
-		printf(_("Use the option \"--debug\" to see details.\n"));
-	}
+	PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
 #endif   /* not HAVE_LOCALE_T  && not WIN32 */
 }
 
diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h
index 574b288..ac1a81d 100644
--- a/src/include/catalog/pg_collation_fn.h
+++ b/src/include/catalog/pg_collation_fn.h
@@ -17,7 +17,8 @@
 extern Oid CollationCreate(const char *collname, Oid collnamespace,
 				Oid collowner,
 				int32 collencoding,
-				const char *collcollate, const char *collctype);
+				const char *collcollate, const char *collctype,
+				bool if_not_exists);
 extern void RemoveCollationById(Oid collationOid);
 
 #endif   /* PG_COLLATION_FN_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 17ec71d..0283310 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5341,6 +5341,9 @@ DESCR("pg_controldata recovery state information as a function");
 DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
 DESCR("pg_controldata init state information as a function");
 
+DATA(insert OID = 4032 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
 /*
  * Symbolic values for provolatile column: these indicate whether the result
  * of a function is dependent *only* on the values of its explicit arguments,
-- 
2.10.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to