Currently, initdb parses locale -a output to populate pg_collation. If additional collations are installed in the operating system, it is not possible to repeat this process, only by doing each step manually. So I propose to move this to a backend function that can be called separately, and have initdb call that. Running this logic in the backend instead of initdb also makes the code simpler. If we add other collation providers such as ICU, initdb doesn't need to know about that at all, because all the logic would be contained in the backend.
Here is an example: select pg_import_system_collations(if_not_exists => false, schema => 'test'); (Specifying the schema also allows testing this without overwriting pg_catalog.) I thought about making this a top-level command (IMPORT COLLATIONS ... ?) but decided against it for now, to keep it simple. Right now, this is more of a refactoring. Documentation could be added if we decide so. -- Peter Eisentraut http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 97fb7f992b95d2ca8725011dc141dad88051a8cd Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Thu, 13 Oct 2016 12:00:00 -0400 Subject: [PATCH] Add function to import operation system collations Move this logic out of initdb into a user-callable function. This simplifies the code and makes it possible to update the standard collations later on if additional operating system collations appear. --- src/backend/catalog/pg_collation.c | 18 +++- src/backend/commands/collationcmds.c | 146 +++++++++++++++++++++++++++++- src/bin/initdb/initdb.c | 164 +--------------------------------- src/include/catalog/pg_collation_fn.h | 3 +- src/include/catalog/pg_proc.h | 3 + 5 files changed, 167 insertions(+), 167 deletions(-) diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index f37cf37..cda64c4 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -41,7 +41,8 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, int32 collencoding, - const char *collcollate, const char *collctype) + const char *collcollate, const char *collctype, + bool if_not_exists) { Relation rel; TupleDesc tupDesc; @@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace, PointerGetDatum(collname), Int32GetDatum(collencoding), ObjectIdGetDatum(collnamespace))) - ereport(ERROR, + { + if (if_not_exists) + { + ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_OBJECT), - errmsg("collation \"%s\" for encoding \"%s\" already exists", + errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping", collname, pg_encoding_to_char(collencoding)))); + return InvalidOid; + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" for encoding \"%s\" already exists", + collname, pg_encoding_to_char(collencoding)))); + } /* * Also forbid matching an any-encoding entry. This test of course is not diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 9bba748..062e3b6 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters) GetUserId(), GetDatabaseEncoding(), collcollate, - collctype); + collctype, + false); + + if (!newoid) + return InvalidObjectAddress; ObjectAddressSet(address, CollationRelationId, newoid); @@ -177,3 +181,143 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid) errmsg("collation \"%s\" already exists in schema \"%s\"", collname, get_namespace_name(nspOid)))); } + + +/* + * "Normalize" a locale name, stripping off encoding tags such as + * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" + * -> "br_FR@euro"). Return true if a new, different name was + * generated. + */ +static bool +normalize_locale_name(char *new, const char *old) +{ + char *n = new; + const char *o = old; + bool changed = false; + + while (*o) + { + if (*o == '.') + { + /* skip over encoding tag such as ".utf8" or ".UTF-8" */ + o++; + while ((*o >= 'A' && *o <= 'Z') + || (*o >= 'a' && *o <= 'z') + || (*o >= '0' && *o <= '9') + || (*o == '-')) + o++; + changed = true; + } + else + *n++ = *o++; + } + *n = '\0'; + + return changed; +} + + +Datum pg_import_system_collations(PG_FUNCTION_ARGS); + +Datum +pg_import_system_collations(PG_FUNCTION_ARGS) +{ + bool if_not_exists = PG_GETARG_BOOL(0); + Oid nspid = PG_GETARG_OID(1); + + FILE *locale_a_handle; + char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ + int count = 0; + + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); + + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + { + int i; + size_t len; + int enc; + bool skip; + char alias[NAMEDATALEN]; + + len = strlen(localebuf); + + if (len == 0 || localebuf[len - 1] != '\n') + { + elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; + + /* + * Some systems have locale names that don't consist entirely of ASCII + * letters (such as "bokmål" or "français"). This is + * pretty silly, since we need the locale itself to interpret the + * non-ASCII characters. We can't do much with those, so we filter + * them out. + */ + skip = false; + for (i = 0; i < len; i++) + { + if (IS_HIGHBIT_SET(localebuf[i])) + { + skip = true; + break; + } + } + if (skip) + { + elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); + continue; + } + + enc = pg_get_encoding_from_locale(localebuf, false); + if (enc < 0) + { + /* error message printed by pg_get_encoding_from_locale() */ + continue; + } + if (!PG_VALID_BE_ENCODING(enc)) + continue; /* ignore locales for client-only encodings */ + if (enc == PG_SQL_ASCII) + continue; /* C/POSIX are already in the catalog */ + + count++; + + CollationCreate(localebuf, nspid, GetUserId(), enc, + localebuf, localebuf, if_not_exists); + + CommandCounterIncrement(); + + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" for + * ease of use. Note that collation names are unique per encoding + * only, so this doesn't clash with "en_US" for LATIN1, say. + * + * This always runs in "if not exists" mode, to skip aliases that + * conflict with an existing locale name for the same encoding. For + * example, "br_FR.iso88591" is normalized to "br_FR", both for + * encoding LATIN1. But the unnormalized locale "br_FR" already + * exists for LATIN1. + */ + if (normalize_locale_name(alias, localebuf)) + { + CollationCreate(alias, nspid, GetUserId(), enc, + localebuf, localebuf, true); + CommandCounterIncrement(); + } + } + + ClosePipeStream(locale_a_handle); + + if (count == 0) + ereport(ERROR, + (errmsg("no usable system locales were found"))); + + PG_RETURN_VOID(); +} diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index c8a8c52..498729c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1587,42 +1587,6 @@ setup_description(FILE *cmdfd) PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n"); } -#ifdef HAVE_LOCALE_T -/* - * "Normalize" a locale name, stripping off encoding tags such as - * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" - * -> "br_FR@euro"). Return true if a new, different name was - * generated. - */ -static bool -normalize_locale_name(char *new, const char *old) -{ - char *n = new; - const char *o = old; - bool changed = false; - - while (*o) - { - if (*o == '.') - { - /* skip over encoding tag such as ".utf8" or ".UTF-8" */ - o++; - while ((*o >= 'A' && *o <= 'Z') - || (*o >= 'a' && *o <= 'z') - || (*o >= '0' && *o <= '9') - || (*o == '-')) - o++; - changed = true; - } - else - *n++ = *o++; - } - *n = '\0'; - - return changed; -} -#endif /* HAVE_LOCALE_T */ - /* * populate pg_collation */ @@ -1630,134 +1594,10 @@ static void setup_collation(FILE *cmdfd) { #if defined(HAVE_LOCALE_T) && !defined(WIN32) - int i; - FILE *locale_a_handle; - char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ - int count = 0; - - locale_a_handle = popen_check("locale -a", "r"); - if (!locale_a_handle) - return; /* complaint already printed */ - - PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( " - " collname name, " - " locale name, " - " encoding int) WITHOUT OIDS;\n\n"); - - while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) - { - size_t len; - int enc; - bool skip; - char *quoted_locale; - char alias[NAMEDATALEN]; - - len = strlen(localebuf); - - if (len == 0 || localebuf[len - 1] != '\n') - { - if (debug) - fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"), - progname, localebuf); - continue; - } - localebuf[len - 1] = '\0'; - - /* - * Some systems have locale names that don't consist entirely of ASCII - * letters (such as "bokmål" or "français"). This is - * pretty silly, since we need the locale itself to interpret the - * non-ASCII characters. We can't do much with those, so we filter - * them out. - */ - skip = false; - for (i = 0; i < len; i++) - { - if (IS_HIGHBIT_SET(localebuf[i])) - { - skip = true; - break; - } - } - if (skip) - { - if (debug) - fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"), - progname, localebuf); - continue; - } - - enc = pg_get_encoding_from_locale(localebuf, debug); - if (enc < 0) - { - /* error message printed by pg_get_encoding_from_locale() */ - continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - continue; /* ignore locales for client-only encodings */ - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ - - count++; - - quoted_locale = escape_quotes(localebuf); - - PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n", - quoted_locale, quoted_locale, enc); - - /* - * Generate aliases such as "en_US" in addition to "en_US.utf8" for - * ease of use. Note that collation names are unique per encoding - * only, so this doesn't clash with "en_US" for LATIN1, say. - */ - if (normalize_locale_name(alias, localebuf)) - { - char *quoted_alias = escape_quotes(alias); - - PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n", - quoted_alias, quoted_locale, enc); - free(quoted_alias); - } - free(quoted_locale); - } + PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n"); /* Add an SQL-standard name */ - PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8); - - /* - * When copying collations to the final location, eliminate aliases that - * conflict with an existing locale name for the same encoding. For - * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding - * LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1. - * Prefer the alias that matches the OS locale name, else the first locale - * name by sort order (arbitrary choice to be deterministic). - * - * Also, eliminate any aliases that conflict with pg_collation's - * hard-wired entries for "C" etc. - */ - PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) " - " SELECT DISTINCT ON (collname, encoding)" - " collname, " - " (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, " - " (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, " - " encoding, locale, locale " - " FROM tmp_pg_collation" - " WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)" - " ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n"); - - /* - * Even though the table is temp, drop it explicitly so it doesn't get - * copied into template0/postgres databases. - */ - PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n"); - - pclose(locale_a_handle); - - if (count == 0 && !debug) - { - printf(_("No usable system locales were found.\n")); - printf(_("Use the option \"--debug\" to see details.\n")); - } + PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8); #endif /* not HAVE_LOCALE_T && not WIN32 */ } diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h index 574b288..ac1a81d 100644 --- a/src/include/catalog/pg_collation_fn.h +++ b/src/include/catalog/pg_collation_fn.h @@ -17,7 +17,8 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, int32 collencoding, - const char *collcollate, const char *collctype); + const char *collcollate, const char *collctype, + bool if_not_exists); extern void RemoveCollationById(Oid collationOid); #endif /* PG_COLLATION_FN_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 17ec71d..0283310 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5341,6 +5341,9 @@ DESCR("pg_controldata recovery state information as a function"); DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ )); DESCR("pg_controldata init state information as a function"); +DATA(insert OID = 4032 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); +DESCR("import collations from operating system"); + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, -- 2.10.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers