From 90fad2d7524620dca35d667aa3266cdb8fb16cac Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 7 Oct 2019 12:21:36 +0200
Subject: [PATCH v2] Add option to use ICU as global collation provider

This adds the option to use ICU as the default collation provider for
either the whole cluster or a database.  New options for initdb,
createdb, and CREATE DATABASE are used to select this.
---
 doc/src/sgml/ref/createdb.sgml       |   9 ++
 doc/src/sgml/ref/initdb.sgml         |  23 ++++
 src/backend/access/hash/hashfunc.c   |  18 ++-
 src/backend/commands/dbcommands.c    |  52 ++++++++-
 src/backend/regex/regc_pg_locale.c   |   7 +-
 src/backend/utils/adt/formatting.c   |   6 +
 src/backend/utils/adt/like.c         |  20 +++-
 src/backend/utils/adt/like_support.c |   2 +
 src/backend/utils/adt/pg_locale.c    | 164 ++++++++++++++++-----------
 src/backend/utils/adt/varchar.c      |  22 +++-
 src/backend/utils/adt/varlena.c      |  26 ++++-
 src/backend/utils/init/postinit.c    |  21 ++++
 src/bin/initdb/Makefile              |   2 +
 src/bin/initdb/initdb.c              |  63 ++++++++--
 src/bin/initdb/t/001_initdb.pl       |  18 ++-
 src/bin/pg_dump/pg_dump.c            |  16 +++
 src/bin/psql/describe.c              |   8 ++
 src/bin/scripts/Makefile             |   2 +
 src/bin/scripts/createdb.c           |   9 ++
 src/bin/scripts/t/020_createdb.pl    |  19 +++-
 src/include/catalog/pg_database.dat  |   2 +-
 src/include/catalog/pg_database.h    |   3 +
 src/include/utils/pg_locale.h        |   6 +
 23 files changed, 413 insertions(+), 105 deletions(-)

diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml
index 8fc8128bf9..5b73afad91 100644
--- a/doc/src/sgml/ref/createdb.sgml
+++ b/doc/src/sgml/ref/createdb.sgml
@@ -85,6 +85,15 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--collation-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
+      <listitem>
+       <para>
+        Specifies the collation provider for the database.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-D <replaceable class="parameter">tablespace</replaceable></option></term>
       <term><option>--tablespace=<replaceable class="parameter">tablespace</replaceable></option></term>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index da5c8f5307..9ad7b2e112 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -165,6 +165,18 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--collation-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
+      <listitem>
+       <para>
+        This option sets the collation provider for databases created in the
+        new cluster.  It can be overridden in the <command>CREATE
+        DATABASE</command> command when new databases are subsequently
+        created.  The default is <literal>libc</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-D <replaceable class="parameter">directory</replaceable></option></term>
       <term><option>--pgdata=<replaceable class="parameter">directory</replaceable></option></term>
@@ -209,6 +221,17 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--icu-locale=<replaceable>locale</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the ICU locale if the ICU collation provider is used.  If
+        this is not specified, the value from the <option>--locale</option>
+        option is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-data-checksums" xreflabel="data checksums">
       <term><option>-k</option></term>
       <term><option>--data-checksums</option></term>
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 6ec1ec3df3..2f8f220549 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -255,8 +255,13 @@ hashtext(PG_FUNCTION_ARGS)
 				 errmsg("could not determine which collation to use for string hashing"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 
-	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	if (!lc_collate_is_c(collid))
+	{
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
+	}
 
 	if (!mylocale || mylocale->deterministic)
 	{
@@ -311,8 +316,13 @@ hashtextextended(PG_FUNCTION_ARGS)
 				 errmsg("could not determine which collation to use for string hashing"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 
-	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	if (!lc_collate_is_c(collid))
+	{
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
+	}
 
 	if (!mylocale || mylocale->deterministic)
 	{
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 01d66212e9..e068c02d18 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -36,6 +36,7 @@
 #include "catalog/indexing.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_db_role_setting.h"
 #include "catalog/pg_subscription.h"
@@ -87,7 +88,8 @@ static bool get_db_info(const char *name, LOCKMODE lockmode,
 						int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
 						Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
 						MultiXactId *dbMinMultiP,
-						Oid *dbTablespace, char **dbCollate, char **dbCtype);
+						Oid *dbTablespace, char **dbCollate, char **dbCtype,
+						char *dbCollProvider);
 static bool have_createdb_privilege(void);
 static void remove_dbtablespaces(Oid db_id);
 static bool check_db_file_conflict(Oid db_id);
@@ -107,6 +109,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	int			src_encoding = -1;
 	char	   *src_collate = NULL;
 	char	   *src_ctype = NULL;
+	char		src_collprovider;
 	bool		src_istemplate;
 	bool		src_allowconn;
 	Oid			src_lastsysoid = InvalidOid;
@@ -128,6 +131,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	DefElem    *dlocale = NULL;
 	DefElem    *dcollate = NULL;
 	DefElem    *dctype = NULL;
+	DefElem	   *dcollprovider = NULL;
 	DefElem    *distemplate = NULL;
 	DefElem    *dallowconnections = NULL;
 	DefElem    *dconnlimit = NULL;
@@ -136,6 +140,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	const char *dbtemplate = NULL;
 	char	   *dbcollate = NULL;
 	char	   *dbctype = NULL;
+	char		dbcollprovider = '\0';
 	char	   *canonname;
 	int			encoding = -1;
 	bool		dbistemplate = false;
@@ -213,6 +218,15 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 						 parser_errposition(pstate, defel->location)));
 			dctype = defel;
 		}
+		else if (strcmp(defel->defname, "collation_provider") == 0)
+		{
+			if (dcollprovider)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options"),
+						 parser_errposition(pstate, defel->location)));
+			dcollprovider = defel;
+		}
 		else if (strcmp(defel->defname, "is_template") == 0)
 		{
 			if (distemplate)
@@ -302,6 +316,23 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 		dbcollate = defGetString(dcollate);
 	if (dctype && dctype->arg)
 		dbctype = defGetString(dctype);
+	if (dcollprovider && dcollprovider->arg)
+	{
+		char	   *collproviderstr = defGetString(dcollprovider);
+
+#ifdef USE_ICU
+		if (pg_strcasecmp(collproviderstr, "icu") == 0)
+			dbcollprovider = COLLPROVIDER_ICU;
+		else
+#endif
+		if (pg_strcasecmp(collproviderstr, "libc") == 0)
+			dbcollprovider = COLLPROVIDER_LIBC;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("unrecognized collation provider: %s",
+							collproviderstr)));
+	}
 	if (distemplate && distemplate->arg)
 		dbistemplate = defGetBoolean(distemplate);
 	if (dallowconnections && dallowconnections->arg)
@@ -351,7 +382,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 					 &src_dboid, &src_owner, &src_encoding,
 					 &src_istemplate, &src_allowconn, &src_lastsysoid,
 					 &src_frozenxid, &src_minmxid, &src_deftablespace,
-					 &src_collate, &src_ctype))
+					 &src_collate, &src_ctype, &src_collprovider))
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_DATABASE),
 				 errmsg("template database \"%s\" does not exist",
@@ -377,6 +408,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 		dbcollate = src_collate;
 	if (dbctype == NULL)
 		dbctype = src_ctype;
+	if (dbcollprovider == '\0')
+		dbcollprovider = src_collprovider;
 
 	/* Some encodings are client only */
 	if (!PG_VALID_BE_ENCODING(encoding))
@@ -384,6 +417,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("invalid server encoding %d", encoding)));
 
+	if (dbcollprovider == COLLPROVIDER_LIBC)
+	{
 	/* Check that the chosen locales are valid, and get canonical spellings */
 	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
 		ereport(ERROR,
@@ -397,6 +432,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	dbctype = canonname;
 
 	check_encoding_locale_matches(encoding, dbcollate, dbctype);
+	}
 
 	/*
 	 * Check that the new encoding and locale settings match the source
@@ -560,6 +596,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 		DirectFunctionCall1(namein, CStringGetDatum(dbcollate));
 	new_record[Anum_pg_database_datctype - 1] =
 		DirectFunctionCall1(namein, CStringGetDatum(dbctype));
+	new_record[Anum_pg_database_datcollprovider - 1] = CharGetDatum(dbcollprovider);
 	new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
 	new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
 	new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
@@ -833,7 +870,7 @@ dropdb(const char *dbname, bool missing_ok)
 	pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
 
 	if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
-					 &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+					 &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
 	{
 		if (!missing_ok)
 		{
@@ -1024,7 +1061,7 @@ RenameDatabase(const char *oldname, const char *newname)
 	rel = table_open(DatabaseRelationId, RowExclusiveLock);
 
 	if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
-					 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+					 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_DATABASE),
 				 errmsg("database \"%s\" does not exist", oldname)));
@@ -1137,7 +1174,7 @@ movedb(const char *dbname, const char *tblspcname)
 	pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
 
 	if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
-					 NULL, NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL))
+					 NULL, NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL))
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_DATABASE),
 				 errmsg("database \"%s\" does not exist", dbname)));
@@ -1769,7 +1806,8 @@ get_db_info(const char *name, LOCKMODE lockmode,
 			int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
 			Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
 			MultiXactId *dbMinMultiP,
-			Oid *dbTablespace, char **dbCollate, char **dbCtype)
+			Oid *dbTablespace, char **dbCollate, char **dbCtype,
+			char *dbCollProvider)
 {
 	bool		result = false;
 	Relation	relation;
@@ -1866,6 +1904,8 @@ get_db_info(const char *name, LOCKMODE lockmode,
 					*dbCollate = pstrdup(NameStr(dbform->datcollate));
 				if (dbCtype)
 					*dbCtype = pstrdup(NameStr(dbform->datctype));
+				if (dbCollProvider)
+					*dbCollProvider = dbform->datcollprovider;
 				ReleaseSysCache(tuple);
 				result = true;
 				break;
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 4a808b7606..510bd71371 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -241,7 +241,12 @@ pg_set_regex_collation(Oid collation)
 	else
 	{
 		if (collation == DEFAULT_COLLATION_OID)
-			pg_regex_locale = 0;
+		{
+			if (global_locale.provider == COLLPROVIDER_ICU)
+				pg_regex_locale = &global_locale;
+			else
+				pg_regex_locale = 0;
+		}
 		else if (OidIsValid(collation))
 		{
 			/*
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index f7175df8da..dca8ca566f 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1674,6 +1674,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 			}
 			mylocale = pg_newlocale_from_collation(collid);
 		}
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
 
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1798,6 +1800,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 			}
 			mylocale = pg_newlocale_from_collation(collid);
 		}
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
 
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1923,6 +1927,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 			}
 			mylocale = pg_newlocale_from_collation(collid);
 		}
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
 
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 13d5cb083c..57dd3fe59d 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -150,9 +150,14 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
 static inline int
 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
 {
-	if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
+	if (collation && !lc_ctype_is_c(collation))
 	{
-		pg_locale_t locale = pg_newlocale_from_collation(collation);
+		pg_locale_t locale = 0;
+
+		if (collation != DEFAULT_COLLATION_OID)
+			locale = pg_newlocale_from_collation(collation);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			locale = &global_locale;
 
 		if (locale && !locale->deterministic)
 			ereport(ERROR,
@@ -195,11 +200,14 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 		}
 		locale = pg_newlocale_from_collation(collation);
 
-		if (locale && !locale->deterministic)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("nondeterministic collations are not supported for ILIKE")));
 	}
+	else if (global_locale.provider == COLLPROVIDER_ICU)
+		locale = &global_locale;
+
+	if (locale && !locale->deterministic)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("nondeterministic collations are not supported for ILIKE")));
 
 	/*
 	 * For efficiency reasons, in the single byte case we don't call lower()
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index c8fec7863f..09a28aab8e 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -966,6 +966,8 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 			}
 			locale = pg_newlocale_from_collation(collation);
 		}
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			locale = &global_locale;
 	}
 
 	if (typeid != BYTEAOID)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index fcdbaae37b..5a2cbc7dfb 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1191,6 +1191,9 @@ lc_collate_is_c(Oid collation)
 		static int	result = -1;
 		char	   *localeptr;
 
+		if (global_locale.provider == COLLPROVIDER_ICU)
+			return false;
+
 		if (result >= 0)
 			return (bool) result;
 		localeptr = setlocale(LC_COLLATE, NULL);
@@ -1241,6 +1244,9 @@ lc_ctype_is_c(Oid collation)
 		static int	result = -1;
 		char	   *localeptr;
 
+		if (global_locale.provider == COLLPROVIDER_ICU)
+			return false;
+
 		if (result >= 0)
 			return (bool) result;
 		localeptr = setlocale(LC_CTYPE, NULL);
@@ -1269,6 +1275,89 @@ lc_ctype_is_c(Oid collation)
 	return (lookup_collation_cache(collation, true))->ctype_is_c;
 }
 
+struct pg_locale_struct global_locale;
+
+void
+make_icu_collator(const char *collcollate, const char *collctype,
+				  struct pg_locale_struct *resultp)
+{
+#ifdef USE_ICU
+	UCollator  *collator;
+	UErrorCode	status;
+
+	if (strcmp(collcollate, collctype) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("collations with different collate and ctype values are not supported by ICU")));
+
+	status = U_ZERO_ERROR;
+	collator = ucol_open(collcollate, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("could not open collator for locale \"%s\": %s",
+						collcollate, u_errorName(status))));
+
+	if (U_ICU_VERSION_MAJOR_NUM < 54)
+		icu_set_collation_attributes(collator, collcollate);
+
+	/* We will leak this string if we get an error below :-( */
+	resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext,
+														   collcollate);
+	resultp->info.icu.ucol = collator;
+#else							/* not USE_ICU */
+	/* could get here if a collation was created by a build with ICU */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("ICU is not supported in this build"), \
+			 errhint("You need to rebuild PostgreSQL using --with-icu.")));
+#endif							/* not USE_ICU */
+}
+
+void
+check_collation_version(HeapTuple colltuple)
+{
+	Form_pg_collation collform;
+	Datum		collversion;
+	bool		isnull;
+
+	collform = (Form_pg_collation) GETSTRUCT(colltuple);
+
+	collversion = SysCacheGetAttr(COLLOID, colltuple, Anum_pg_collation_collversion,
+								  &isnull);
+	if (!isnull)
+	{
+		char	   *actual_versionstr;
+		char	   *collversionstr;
+
+		actual_versionstr = get_collation_actual_version(collform->collprovider,
+														 NameStr(collform->collcollate));
+		if (!actual_versionstr)
+		{
+			/*
+			 * This could happen when specifying a version in CREATE
+			 * COLLATION for a libc locale, or manually creating a mess in
+			 * the catalogs.
+			 */
+			ereport(ERROR,
+					(errmsg("collation \"%s\" has no actual version, but a version was specified",
+							NameStr(collform->collname))));
+		}
+		collversionstr = TextDatumGetCString(collversion);
+
+		if (strcmp(actual_versionstr, collversionstr) != 0)
+			ereport(WARNING,
+					(errmsg("collation \"%s\" has version mismatch",
+							NameStr(collform->collname)),
+					 errdetail("The collation in the database was created using version %s, "
+							   "but the operating system provides version %s.",
+							   collversionstr, actual_versionstr),
+					 errhint("Rebuild all objects affected by this collation and run "
+							 "ALTER COLLATION %s REFRESH VERSION, "
+							 "or build PostgreSQL with the right library version.",
+							 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
+														NameStr(collform->collname)))));
+	}
+}
 
 /* simple subroutine for reporting errors from newlocale() */
 #ifdef HAVE_LOCALE_T
@@ -1342,8 +1431,6 @@ pg_newlocale_from_collation(Oid collid)
 		const char *collctype pg_attribute_unused();
 		struct pg_locale_struct result;
 		pg_locale_t resultp;
-		Datum		collversion;
-		bool		isnull;
 
 		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
 		if (!HeapTupleIsValid(tp))
@@ -1413,72 +1500,10 @@ pg_newlocale_from_collation(Oid collid)
 		}
 		else if (collform->collprovider == COLLPROVIDER_ICU)
 		{
-#ifdef USE_ICU
-			UCollator  *collator;
-			UErrorCode	status;
-
-			if (strcmp(collcollate, collctype) != 0)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("collations with different collate and ctype values are not supported by ICU")));
-
-			status = U_ZERO_ERROR;
-			collator = ucol_open(collcollate, &status);
-			if (U_FAILURE(status))
-				ereport(ERROR,
-						(errmsg("could not open collator for locale \"%s\": %s",
-								collcollate, u_errorName(status))));
-
-			if (U_ICU_VERSION_MAJOR_NUM < 54)
-				icu_set_collation_attributes(collator, collcollate);
-
-			/* We will leak this string if we get an error below :-( */
-			result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
-														 collcollate);
-			result.info.icu.ucol = collator;
-#else							/* not USE_ICU */
-			/* could get here if a collation was created by a build with ICU */
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("ICU is not supported in this build"), \
-					 errhint("You need to rebuild PostgreSQL using --with-icu.")));
-#endif							/* not USE_ICU */
+			make_icu_collator(collcollate, collctype, &result);
 		}
 
-		collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
-									  &isnull);
-		if (!isnull)
-		{
-			char	   *actual_versionstr;
-			char	   *collversionstr;
-
-			actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
-			if (!actual_versionstr)
-			{
-				/*
-				 * This could happen when specifying a version in CREATE
-				 * COLLATION for a libc locale, or manually creating a mess in
-				 * the catalogs.
-				 */
-				ereport(ERROR,
-						(errmsg("collation \"%s\" has no actual version, but a version was specified",
-								NameStr(collform->collname))));
-			}
-			collversionstr = TextDatumGetCString(collversion);
-
-			if (strcmp(actual_versionstr, collversionstr) != 0)
-				ereport(WARNING,
-						(errmsg("collation \"%s\" has version mismatch",
-								NameStr(collform->collname)),
-						 errdetail("The collation in the database was created using version %s, "
-								   "but the operating system provides version %s.",
-								   collversionstr, actual_versionstr),
-						 errhint("Rebuild all objects affected by this collation and run "
-								 "ALTER COLLATION %s REFRESH VERSION, "
-								 "or build PostgreSQL with the right library version.",
-								 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
-															NameStr(collform->collname)))));
-		}
+		check_collation_version(tp);
 
 		ReleaseSysCache(tp);
 
@@ -1505,6 +1530,13 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 {
 	char	   *collversion = NULL;
 
+	if (collprovider == COLLPROVIDER_DEFAULT)
+	{
+		/* Recurse to the real provider. */
+		collversion = get_collation_actual_version(global_locale.provider,
+												   global_locale.info.icu.locale);
+	}
+	else
 #ifdef USE_ICU
 	if (collprovider == COLLPROVIDER_ICU)
 	{
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index e63a4e553b..d1b6ccc6a3 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -751,7 +751,7 @@ bpchareq(PG_FUNCTION_ARGS)
 	len2 = bcTruelen(arg2);
 
 	if (lc_collate_is_c(collid) ||
-		collid == DEFAULT_COLLATION_OID ||
+		(collid == DEFAULT_COLLATION_OID && global_locale.deterministic) ||
 		pg_newlocale_from_collation(collid)->deterministic)
 	{
 		/*
@@ -789,7 +789,7 @@ bpcharne(PG_FUNCTION_ARGS)
 	len2 = bcTruelen(arg2);
 
 	if (lc_collate_is_c(collid) ||
-		collid == DEFAULT_COLLATION_OID ||
+		(collid == DEFAULT_COLLATION_OID && global_locale.deterministic) ||
 		pg_newlocale_from_collation(collid)->deterministic)
 	{
 		/*
@@ -995,8 +995,13 @@ hashbpchar(PG_FUNCTION_ARGS)
 	keydata = VARDATA_ANY(key);
 	keylen = bcTruelen(key);
 
-	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	if (!lc_collate_is_c(collid))
+	{
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
+	}
 
 	if (!mylocale || mylocale->deterministic)
 	{
@@ -1055,8 +1060,13 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 	keydata = VARDATA_ANY(key);
 	keylen = bcTruelen(key);
 
-	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	if (!lc_collate_is_c(collid))
+	{
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
+	}
 
 	if (!mylocale || mylocale->deterministic)
 	{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 722b2c722d..0543a0688c 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1156,8 +1156,13 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
 
 	check_collation_set(collid);
 
-	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	if (!lc_collate_is_c(collid))
+	{
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
+	}
 
 	if (mylocale && !mylocale->deterministic)
 		ereport(ERROR,
@@ -1499,6 +1504,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 
 		if (collid != DEFAULT_COLLATION_OID)
 			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
 
 		/*
 		 * memcmp() can't tell us which of two unequal strings sorts first,
@@ -1720,7 +1727,7 @@ texteq(PG_FUNCTION_ARGS)
 	check_collation_set(collid);
 
 	if (lc_collate_is_c(collid) ||
-		collid == DEFAULT_COLLATION_OID ||
+		(collid == DEFAULT_COLLATION_OID && global_locale.deterministic) ||
 		pg_newlocale_from_collation(collid)->deterministic)
 	{
 		Datum		arg1 = PG_GETARG_DATUM(0);
@@ -1774,7 +1781,7 @@ textne(PG_FUNCTION_ARGS)
 	check_collation_set(collid);
 
 	if (lc_collate_is_c(collid) ||
-		collid == DEFAULT_COLLATION_OID ||
+		(collid == DEFAULT_COLLATION_OID && global_locale.deterministic) ||
 		pg_newlocale_from_collation(collid)->deterministic)
 	{
 		Datum		arg1 = PG_GETARG_DATUM(0);
@@ -1886,8 +1893,13 @@ text_starts_with(PG_FUNCTION_ARGS)
 
 	check_collation_set(collid);
 
-	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	if (!lc_collate_is_c(collid))
+	{
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			mylocale = &global_locale;
+	}
 
 	if (mylocale && !mylocale->deterministic)
 		ereport(ERROR,
@@ -2002,6 +2014,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 		 */
 		if (collid != DEFAULT_COLLATION_OID)
 			locale = pg_newlocale_from_collation(collid);
+		else if (global_locale.provider == COLLPROVIDER_ICU)
+			locale = &global_locale;
 
 		/*
 		 * There is a further exception on Windows.  When the database
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 29c5ec7b58..1a91b42798 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -31,6 +31,7 @@
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_db_role_setting.h"
 #include "catalog/pg_tablespace.h"
@@ -404,6 +405,8 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 	collate = NameStr(dbform->datcollate);
 	ctype = NameStr(dbform->datctype);
 
+	if (dbform->datcollprovider == COLLPROVIDER_LIBC)
+	{
 	if (pg_perm_setlocale(LC_COLLATE, collate) == NULL)
 		ereport(FATAL,
 				(errmsg("database locale is incompatible with operating system"),
@@ -417,6 +420,24 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 				 errdetail("The database was initialized with LC_CTYPE \"%s\", "
 						   " which is not recognized by setlocale().", ctype),
 				 errhint("Recreate the database with another locale or install the missing locale.")));
+	}
+	else if (dbform->datcollprovider == COLLPROVIDER_ICU)
+	{
+		make_icu_collator(collate, ctype, &global_locale);
+	}
+
+	global_locale.provider = dbform->datcollprovider;
+	global_locale.deterministic = true;	// TODO
+
+	{
+		HeapTuple	tp;
+
+		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(DEFAULT_COLLATION_OID));
+		if (!HeapTupleIsValid(tp))
+			elog(ERROR, "cache lookup failed for collation %u", DEFAULT_COLLATION_OID);
+		check_collation_version(tp);
+		ReleaseSysCache(tp);
+	}
 
 	/* Make the locale settings visible as GUC variables, too */
 	SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE);
diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile
index 7c404430a9..a9335a8ba6 100644
--- a/src/bin/initdb/Makefile
+++ b/src/bin/initdb/Makefile
@@ -61,6 +61,8 @@ clean distclean maintainer-clean:
 # ensure that changes in datadir propagate into object file
 initdb.o: initdb.c $(top_builddir)/src/Makefile.global
 
+export with_icu
+
 check:
 	$(prove_check)
 
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 88a261d9bd..62c310040a 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -129,6 +129,8 @@ static char *lc_monetary = NULL;
 static char *lc_numeric = NULL;
 static char *lc_time = NULL;
 static char *lc_messages = NULL;
+static char collation_provider[] = {COLLPROVIDER_LIBC, '\0'};
+static char *icu_locale = NULL;
 static const char *default_text_search_config = NULL;
 static char *username = NULL;
 static bool pwprompt = false;
@@ -1412,10 +1414,13 @@ bootstrap_template1(void)
 							  encodingid_to_string(encodingid));
 
 	bki_lines = replace_token(bki_lines, "LC_COLLATE",
-							  escape_quotes_bki(lc_collate));
+							  escape_quotes_bki(collation_provider[0] == COLLPROVIDER_ICU ? icu_locale : lc_collate));
 
 	bki_lines = replace_token(bki_lines, "LC_CTYPE",
-							  escape_quotes_bki(lc_ctype));
+							  escape_quotes_bki(collation_provider[0] == COLLPROVIDER_ICU ? icu_locale : lc_ctype));
+
+	bki_lines = replace_token(bki_lines, "COLLPROVIDER",
+							  collation_provider);
 
 	/* Also ensure backend isn't confused by this environment var: */
 	unsetenv("PGCLIENTENCODING");
@@ -1707,6 +1712,12 @@ setup_description(FILE *cmdfd)
 static void
 setup_collation(FILE *cmdfd)
 {
+	/*
+	 * Set version of the default collation.
+	 */
+	PG_CMD_PRINTF("UPDATE pg_collation SET collversion = pg_collation_actual_version(oid) WHERE oid = %d;\n\n",
+				  DEFAULT_COLLATION_OID);
+
 	/*
 	 * Add an SQL-standard name.  We don't want to pin this, so it doesn't go
 	 * in pg_collation.h.  But add it before reading system collations, so
@@ -1995,8 +2006,6 @@ make_template0(FILE *cmdfd)
 {
 	const char *const *line;
 	static const char *const template0_setup[] = {
-		"CREATE DATABASE template0 IS_TEMPLATE = true ALLOW_CONNECTIONS = false;\n\n",
-
 		/*
 		 * We use the OID of template0 to determine datlastsysoid
 		 */
@@ -2021,6 +2030,9 @@ make_template0(FILE *cmdfd)
 		NULL
 	};
 
+	PG_CMD_PRINTF("CREATE DATABASE template0 IS_TEMPLATE = true ALLOW_CONNECTIONS = false COLLATION_PROVIDER = %s;\n\n",
+				  collation_provider[0] == COLLPROVIDER_ICU ? "icu" : "libc");
+
 	for (line = template0_setup; *line; line++)
 		PG_CMD_PUTS(*line);
 }
@@ -2293,13 +2305,14 @@ setlocales(void)
 			lc_monetary = locale;
 		if (!lc_messages)
 			lc_messages = locale;
+		if (!icu_locale)
+			icu_locale = locale;
 	}
 
 	/*
 	 * canonicalize locale names, and obtain any missing values from our
 	 * current environment
 	 */
-
 	check_locale_name(LC_CTYPE, lc_ctype, &canonname);
 	lc_ctype = canonname;
 	check_locale_name(LC_COLLATE, lc_collate, &canonname);
@@ -2318,6 +2331,18 @@ setlocales(void)
 	check_locale_name(LC_CTYPE, lc_messages, &canonname);
 	lc_messages = canonname;
 #endif
+
+	/*
+	 * If ICU is selected but no ICU locale has been given, take the
+	 * lc_collate locale and chop off any encoding suffix.  This should give
+	 * the user a configuration that resembles their operating system's locale
+	 * setup.
+	 */
+	if (collation_provider[0] == COLLPROVIDER_ICU && !icu_locale)
+	{
+		icu_locale = pg_strdup(lc_collate);
+		icu_locale[strcspn(icu_locale, ".")] = '\0';
+	}
 }
 
 /*
@@ -2333,9 +2358,12 @@ usage(const char *progname)
 	printf(_("  -A, --auth=METHOD         default authentication method for local connections\n"));
 	printf(_("      --auth-host=METHOD    default authentication method for local TCP/IP connections\n"));
 	printf(_("      --auth-local=METHOD   default authentication method for local-socket connections\n"));
+	printf(_("      --collation-provider={libc|icu}\n"
+			 "                            set default collation provider for new databases\n"));
 	printf(_(" [-D, --pgdata=]DATADIR     location for this database cluster\n"));
 	printf(_("  -E, --encoding=ENCODING   set default encoding for new databases\n"));
 	printf(_("  -g, --allow-group-access  allow group read/execute on data directory\n"));
+	printf(_("      --icu-locale          set ICU locale for new databases\n"));
 	printf(_("      --locale=LOCALE       set default locale for new databases\n"));
 	printf(_("      --lc-collate=, --lc-ctype=, --lc-messages=LOCALE\n"
 			 "      --lc-monetary=, --lc-numeric=, --lc-time=LOCALE\n"
@@ -2510,7 +2538,8 @@ setup_locale_encoding(void)
 		strcmp(lc_ctype, lc_time) == 0 &&
 		strcmp(lc_ctype, lc_numeric) == 0 &&
 		strcmp(lc_ctype, lc_monetary) == 0 &&
-		strcmp(lc_ctype, lc_messages) == 0)
+		strcmp(lc_ctype, lc_messages) == 0 &&
+		(!icu_locale || strcmp(lc_ctype, icu_locale) == 0))
 		printf(_("The database cluster will be initialized with locale \"%s\".\n"), lc_ctype);
 	else
 	{
@@ -2527,9 +2556,13 @@ setup_locale_encoding(void)
 			   lc_monetary,
 			   lc_numeric,
 			   lc_time);
+		if (icu_locale)
+			printf(_("  ICU:      %s\n"), icu_locale);
 	}
 
-	if (!encoding)
+	if (!encoding && collation_provider[0] == COLLPROVIDER_ICU)
+		encodingid = PG_UTF8;
+	else if (!encoding)
 	{
 		int			ctype_enc;
 
@@ -3029,6 +3062,8 @@ main(int argc, char *argv[])
 		{"wal-segsize", required_argument, NULL, 12},
 		{"data-checksums", no_argument, NULL, 'k'},
 		{"allow-group-access", no_argument, NULL, 'g'},
+		{"collation-provider", required_argument, NULL, 13},
+		{"icu-locale", required_argument, NULL, 14},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3167,6 +3202,20 @@ main(int argc, char *argv[])
 			case 'g':
 				SetDataDirectoryCreatePerm(PG_DIR_MODE_GROUP);
 				break;
+			case 13:
+				if (strcmp(optarg, "icu") == 0)
+					collation_provider[0] = COLLPROVIDER_ICU;
+				else if (strcmp(optarg, "libc") == 0)
+					collation_provider[0] = COLLPROVIDER_LIBC;
+				else
+				{
+					pg_log_error("unrecognized collation provider: %s", optarg);
+					exit(1);
+				}
+				break;
+			case 14:
+				icu_locale = pg_strdup(optarg);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 8387b945d3..90f6fc8f14 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -8,7 +8,7 @@ use Fcntl ':mode';
 use File::stat qw{lstat};
 use PostgresNode;
 use TestLib;
-use Test::More tests => 22;
+use Test::More tests => 24;
 
 my $tempdir = TestLib::tempdir;
 my $xlogdir = "$tempdir/pgxlog";
@@ -89,3 +89,19 @@ SKIP:
 	ok(check_mode_recursive($datadir_group, 0750, 0640),
 		'check PGDATA permissions');
 }
+
+# Collation provider tests
+
+if ($ENV{with_icu} eq 'yes')
+{
+	command_ok(['initdb', '--no-sync', '--collation-provider=icu', "$tempdir/data2"],
+			   'collation provider ICU');
+}
+else
+{
+	command_fails(['initdb', '--no-sync', '--collation-provider=icu', "$tempdir/data2"],
+				  'collation provider ICU fails since no ICU support');
+}
+
+command_fails(['initdb', '--no-sync', '--collation-provider=xyz', "$tempdir/dataX"],
+			  'fails for invalid collation provider');
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index f01fea5b91..9d7842583b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -2638,6 +2638,7 @@ dumpDatabase(Archive *fout)
 				i_datname,
 				i_dba,
 				i_encoding,
+				i_datcollprovider,
 				i_collate,
 				i_ctype,
 				i_frozenxid,
@@ -2652,6 +2653,7 @@ dumpDatabase(Archive *fout)
 	const char *datname,
 			   *dba,
 			   *encoding,
+			   *datcollprovider,
 			   *collate,
 			   *ctype,
 			   *datacl,
@@ -2680,6 +2682,7 @@ dumpDatabase(Archive *fout)
 		appendPQExpBuffer(dbQry, "SELECT tableoid, oid, datname, "
 						  "(%s datdba) AS dba, "
 						  "pg_encoding_to_char(encoding) AS encoding, "
+						  "datcollprovider, "
 						  "datcollate, datctype, datfrozenxid, datminmxid, "
 						  "(SELECT array_agg(acl ORDER BY row_n) FROM "
 						  "  (SELECT acl, row_n FROM "
@@ -2772,6 +2775,7 @@ dumpDatabase(Archive *fout)
 	i_datname = PQfnumber(res, "datname");
 	i_dba = PQfnumber(res, "dba");
 	i_encoding = PQfnumber(res, "encoding");
+	i_datcollprovider = PQfnumber(res, "datcollprovider");
 	i_collate = PQfnumber(res, "datcollate");
 	i_ctype = PQfnumber(res, "datctype");
 	i_frozenxid = PQfnumber(res, "datfrozenxid");
@@ -2787,6 +2791,7 @@ dumpDatabase(Archive *fout)
 	datname = PQgetvalue(res, 0, i_datname);
 	dba = PQgetvalue(res, 0, i_dba);
 	encoding = PQgetvalue(res, 0, i_encoding);
+	datcollprovider = PQgetvalue(res, 0, i_datcollprovider);
 	collate = PQgetvalue(res, 0, i_collate);
 	ctype = PQgetvalue(res, 0, i_ctype);
 	frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid));
@@ -2812,6 +2817,17 @@ dumpDatabase(Archive *fout)
 		appendPQExpBufferStr(creaQry, " ENCODING = ");
 		appendStringLiteralAH(creaQry, encoding, fout);
 	}
+	if (strlen(datcollprovider) > 0)
+	{
+		appendPQExpBufferStr(creaQry, " COLLATION_PROVIDER = ");
+		if (datcollprovider[0] == 'c')
+			appendPQExpBufferStr(creaQry, "libc");
+		else if (datcollprovider[0] == 'i')
+			appendPQExpBufferStr(creaQry, "icu");
+		else
+			fatal("unrecognized collation provider: %s",
+				  datcollprovider);
+	}
 	if (strlen(collate) > 0 && strcmp(collate, ctype) == 0)
 	{
 		appendPQExpBufferStr(creaQry, " LOCALE = ");
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index d7c0fc0c1e..23fdbb92ae 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -880,6 +880,14 @@ listAllDbs(const char *pattern, bool verbose)
 						  "       d.datctype as \"%s\",\n",
 						  gettext_noop("Collate"),
 						  gettext_noop("Ctype"));
+	if (pset.sversion >= 130000)
+		appendPQExpBuffer(&buf,
+						  "       CASE d.datcollprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
+						  gettext_noop("Provider"));
+	else
+		appendPQExpBuffer(&buf,
+						  "       'libc' AS \"%s\",\n",
+						  gettext_noop("Provider"));
 	appendPQExpBufferStr(&buf, "       ");
 	printACLColumn(&buf, "d.datacl");
 	if (verbose && pset.sversion >= 80200)
diff --git a/src/bin/scripts/Makefile b/src/bin/scripts/Makefile
index ede665090f..ef4f8593dc 100644
--- a/src/bin/scripts/Makefile
+++ b/src/bin/scripts/Makefile
@@ -53,6 +53,8 @@ clean distclean maintainer-clean:
 	rm -f common.o scripts_parallel.o $(WIN32RES)
 	rm -rf tmp_check
 
+export with_icu
+
 check:
 	$(prove_check)
 
diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
index 64bcc20cb4..5944fd3f63 100644
--- a/src/bin/scripts/createdb.c
+++ b/src/bin/scripts/createdb.c
@@ -37,6 +37,7 @@ main(int argc, char *argv[])
 		{"lc-ctype", required_argument, NULL, 2},
 		{"locale", required_argument, NULL, 'l'},
 		{"maintenance-db", required_argument, NULL, 3},
+		{"collation-provider", required_argument, NULL, 4},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -59,6 +60,7 @@ main(int argc, char *argv[])
 	char	   *lc_collate = NULL;
 	char	   *lc_ctype = NULL;
 	char	   *locale = NULL;
+	char	   *collation_provider = NULL;
 
 	PQExpBufferData sql;
 
@@ -117,6 +119,9 @@ main(int argc, char *argv[])
 			case 3:
 				maintenance_db = pg_strdup(optarg);
 				break;
+			case 4:
+				collation_provider = pg_strdup(optarg);
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -193,6 +198,8 @@ main(int argc, char *argv[])
 		appendPQExpBuffer(&sql, " LC_COLLATE '%s'", lc_collate);
 	if (lc_ctype)
 		appendPQExpBuffer(&sql, " LC_CTYPE '%s'", lc_ctype);
+	if (collation_provider)
+		appendPQExpBuffer(&sql, " COLLATION_PROVIDER %s", collation_provider);
 
 	appendPQExpBufferChar(&sql, ';');
 
@@ -250,6 +257,8 @@ help(const char *progname)
 	printf(_("Usage:\n"));
 	printf(_("  %s [OPTION]... [DBNAME] [DESCRIPTION]\n"), progname);
 	printf(_("\nOptions:\n"));
+	printf(_("      --collation-provider={libc|icu}\n"
+			 "                               collation provider for the database\n"));
 	printf(_("  -D, --tablespace=TABLESPACE  default tablespace for the database\n"));
 	printf(_("  -e, --echo                   show the commands being sent to the server\n"));
 	printf(_("  -E, --encoding=ENCODING      encoding for the database\n"));
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index c0f6067a92..9e8220335f 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -3,7 +3,7 @@ use warnings;
 
 use PostgresNode;
 use TestLib;
-use Test::More tests => 13;
+use Test::More tests => 16;
 
 program_help_ok('createdb');
 program_version_ok('createdb');
@@ -22,5 +22,22 @@ $node->issues_sql_like(
 	qr/statement: CREATE DATABASE foobar2 ENCODING 'LATIN1'/,
 	'create database with encoding');
 
+if ($ENV{with_icu} eq 'yes')
+{
+	$node->issues_sql_like(
+		[ 'createdb', '-T', 'template0', '--collation-provider=icu', 'foobar3' ],
+		qr/statement: CREATE DATABASE foobar3 .* COLLATION_PROVIDER icu/,
+		'create database with ICU');
+}
+else
+{
+	$node->command_fails(
+		[ 'createdb', '-T', 'template0', '--collation-provider=icu', 'foobar3' ],
+		'create database with ICU fails since no ICU support');
+	pass;
+}
+
 $node->command_fails([ 'createdb', 'foobar1' ],
 	'fails if database already exists');
+$node->command_fails([ 'createdb', '-T', 'template0', '--collation-provider=xyz', 'foobarX' ],
+	'fails for invalid collation provider');
diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat
index 89bd75d024..f261cdd838 100644
--- a/src/include/catalog/pg_database.dat
+++ b/src/include/catalog/pg_database.dat
@@ -15,7 +15,7 @@
 { oid => '1', oid_symbol => 'TemplateDbOid',
   descr => 'default template for new databases',
   datname => 'template1', encoding => 'ENCODING', datcollate => 'LC_COLLATE',
-  datctype => 'LC_CTYPE', datistemplate => 't', datallowconn => 't',
+  datctype => 'LC_CTYPE', datcollprovider => 'COLLPROVIDER', datistemplate => 't', datallowconn => 't',
   datconnlimit => '-1', datlastsysoid => '0', datfrozenxid => '0',
   datminmxid => '1', dattablespace => 'pg_default', datacl => '_null_' },
 
diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h
index 06fea45f53..ab3c0951df 100644
--- a/src/include/catalog/pg_database.h
+++ b/src/include/catalog/pg_database.h
@@ -46,6 +46,9 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID
 	/* LC_CTYPE setting */
 	NameData	datctype;
 
+	/* see pg_collation.collprovider */
+	char		datcollprovider;
+
 	/* allowed as CREATE DATABASE template? */
 	bool		datistemplate;
 
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index b4b3aa5843..17fcee1e89 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -101,6 +101,12 @@ struct pg_locale_struct
 
 typedef struct pg_locale_struct *pg_locale_t;
 
+extern struct pg_locale_struct global_locale;
+
+extern void make_icu_collator(const char *collcollate, const char *collctype,
+							  struct pg_locale_struct *resultp);
+extern void check_collation_version(HeapTuple colltuple);
+
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
-- 
2.20.1

