On Fri, Mar 18, 2022 at 03:09:59PM -0700, Andres Freund wrote:
> Hi,
> 
> On 2022-03-18 20:28:58 +0100, Peter Eisentraut wrote:
> > Why does your patch introduce a function check_icu_locale() that is only
> > called once?  Did you have further plans for that?
> 
> I like that it moves ICU code out of dbcommands.c

Yes, it seemed cleaner this way.  But more importantly code outside pg_locale.c
really shouldn't have to deal with ICU specific version code.

I'm attaching a v2, addressing Peter and Tom comments to not duplicate the old
ICU versions attribute function.  I removed the ICU locale check entirely (for
consistency across ICU version) thus removing any need for ucol.h include in
initdb.

For the problem you reported at [1] with the meson branch, I changed createdb
tests with s/en-u-kf-upper/en@colCaseFirst=upper/, as older ICU versions don't
understand the former notation.  check-world now pass for me, using either ICU
< 54 or >= 54.

> imo there should be few
> calls to ICU functions outside of pg_locale.c. There might be an argument for
> moving *more* into such a function though.

I think it would be a good improvement.  I can work on that next week if
needed.

[1] 
https://www.postgresql.org/message-id/20220318000140.vzri3qw3p4aeb...@alap3.anarazel.de
>From f3884b483884a4e39b577dc01b72bab5176964bb Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouh...@free.fr>
Date: Fri, 18 Mar 2022 16:20:01 +0800
Subject: [PATCH v2] Fix global icu collations for ICU < 54

createdb() didn't check for collation attributes validity, which has
to be done explicitly on ICU < 54.  It also forgot to close the ICU collator
opened during the check which leaks some memory.

To fix both, add a new check_icu_locale() that does all the appropriate
verification and close the ICU collator.

initdb also had some partial check for ICU < 54.  To have consistent error
reporting across major ICU versions, and get rid of the need to include ucol.h,
remove the partial check there.  The backend will report an error if needed
during the post-boostrap iniitialization phase.
---
 src/backend/commands/dbcommands.c |  9 +--------
 src/backend/utils/adt/pg_locale.c | 21 +++++++++++++++++++++
 src/bin/initdb/initdb.c           | 22 +++-------------------
 src/bin/initdb/t/001_initdb.pl    |  2 +-
 src/include/utils/pg_locale.h     |  1 +
 src/test/icu/t/010_database.pl    |  4 ++--
 6 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/src/backend/commands/dbcommands.c 
b/src/backend/commands/dbcommands.c
index 962e11dd8f..15b6eb8e0e 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -460,14 +460,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
        if (dblocprovider == COLLPROVIDER_ICU)
        {
 #ifdef USE_ICU
-               UErrorCode  status;
-
-               status = U_ZERO_ERROR;
-               ucol_open(dbiculocale, &status);
-               if (U_FAILURE(status))
-                       ereport(ERROR,
-                                       (errmsg("could not open collator for 
locale \"%s\": %s",
-                                                       dbiculocale, 
u_errorName(status))));
+               check_icu_locale(dbiculocale);
 #else
                ereport(ERROR,
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/backend/utils/adt/pg_locale.c 
b/src/backend/utils/adt/pg_locale.c
index 4019255f8e..af69f2729e 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1884,6 +1884,27 @@ icu_from_uchar(char **result, const UChar *buff_uchar, 
int32_t len_uchar)
        return len_result;
 }
 
+/*
+ * Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
+ */
+void
+check_icu_locale(const char *icu_locale)
+{
+       UCollator  *collator;
+       UErrorCode  status;
+
+       status = U_ZERO_ERROR;
+       collator = ucol_open(icu_locale, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("could not open collator for locale 
\"%s\": %s",
+                                               icu_locale, 
u_errorName(status))));
+
+       if (U_ICU_VERSION_MAJOR_NUM < 54)
+               icu_set_collation_attributes(collator, icu_locale);
+       ucol_close(collator);
+}
+
 /*
  * Parse collation attributes and apply them to the open collator.  This takes
  * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index cbcd55288f..5e36943ef3 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -55,10 +55,6 @@
 #include <signal.h>
 #include <time.h>
 
-#ifdef USE_ICU
-#include <unicode/ucol.h>
-#endif
-
 #ifdef HAVE_SHM_OPEN
 #include "sys/mman.h"
 #endif
@@ -2205,22 +2201,10 @@ setlocales(void)
                }
 
                /*
-                * Check ICU locale ID
+                * In supported builds, the ICU locale ID will be checked by the
+                * backend when performing the post-boostrap initialization.
                 */
-#ifdef USE_ICU
-               {
-                       UErrorCode      status;
-
-                       status = U_ZERO_ERROR;
-                       ucol_open(icu_locale, &status);
-                       if (U_FAILURE(status))
-                       {
-                               pg_log_error("could not open collator for 
locale \"%s\": %s",
-                                                        icu_locale, 
u_errorName(status));
-                               exit(1);
-                       }
-               }
-#else
+#ifndef USE_ICU
                pg_log_error("ICU is not supported in this build");
                fprintf(stderr, _("You need to rebuild PostgreSQL using 
%s.\n"), "--with-icu");
                exit(1);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index c636bf3ab2..a3397777cf 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -105,7 +105,7 @@ if ($ENV{with_icu} eq 'yes')
                'option --icu-locale');
 
        command_fails_like(['initdb', '--no-sync', '--locale-provider=icu', 
'--icu-locale=@colNumeric=lower', "$tempdir/dataX"],
-               qr/initdb: error: could not open collator for locale/,
+               qr/FATAL:  could not open collator for locale/,
                'fails for invalid ICU locale');
 }
 else
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 9b158f24a0..297d14d918 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -115,6 +115,7 @@ extern char *get_collation_actual_version(char 
collprovider, const char *collcol
 #ifdef USE_ICU
 extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t 
nbytes);
 extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t 
len_uchar);
+extern void check_icu_locale(const char *icu_locale);
 #endif
 
 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index d50941b53d..07a1084b09 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -16,11 +16,11 @@ $node1->init;
 $node1->start;
 
 $node1->safe_psql('postgres',
-       q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' ICU_LOCALE 
'en-u-kf-upper' ENCODING 'UTF8' TEMPLATE template0});
+       q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' ICU_LOCALE 
'en@colCaseFirst=upper' ENCODING 'UTF8' TEMPLATE template0});
 
 $node1->safe_psql('dbicu',
 q{
-CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper');
+CREATE COLLATION upperfirst (provider = icu, locale = 'en@colCaseFirst=upper');
 CREATE TABLE icu (def text, en text COLLATE "en-x-icu", upfirst text COLLATE 
upperfirst);
 INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), 
('B', 'B', 'B');
 });
-- 
2.33.1

Reply via email to