commit: 78802b1270bf48bea2e39039877096c4867afbb3 Author: Mike Frysinger <vapier <AT> gentoo <DOT> org> AuthorDate: Mon Sep 27 06:31:11 2021 +0000 Commit: Mike Frysinger <vapier <AT> gentoo <DOT> org> CommitDate: Mon Sep 27 06:31:11 2021 +0000 URL: https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=78802b12
skip duplicate locales when normalized If you try to generate "en_US.UTF-8 UTF-8" and "en_US.UTF8 UTF-8", we end up generating the locale twice even though it normalizes down to the same value (or, in the case of --update, we skip the 1st and then always generate the 2nd). Keep track of all locales we've processed so we can dedupe as we go. Signed-off-by: Mike Frysinger <vapier <AT> gentoo.org> locale-gen | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/locale-gen b/locale-gen index d313c23..d4417d8 100755 --- a/locale-gen +++ b/locale-gen @@ -192,9 +192,9 @@ if [[ -z ${locales_to_generate} ]] && [[ -e ${CONFIG} ]] ; then fi fi -# Transform the name in locales.gen to the name used when storing -# the locale data in /usr/lib/locale/ ... this normalize algo is -# taken out of the glibc localedef source code ... +# Transform the name in locales.gen to the name used when storing the locale data in +# /usr/lib/locale/. This normalize algo is taken out of the glibc localedef source: +# https://sourceware.org/git/?p=glibc.git;a=blob;f=locale/programs/localedef.c;hb=glibc-2.34#l562 normalize() { if [[ $1 == *.* ]] ; then local ret=$(echo ${1##*.} | tr '[[:upper:]]' '[[:lower:]]') @@ -316,6 +316,10 @@ JOB_IDX_S=0 JOB_IDX_E=0 cnt=0 lidx=0 +# Keep track of (normalized) locales generated in case the request has different inputs that +# normalize down to the same value. We trim $existing_locales as we go for later use which +# prevents its direct use. +generated_locales=" " while [[ -n ${locales_to_generate[${lidx}]} ]] ; do : $(( ++cnt )) locale=${locales_to_generate[$((lidx++))]} @@ -334,16 +338,21 @@ while [[ -n ${locales_to_generate[${lidx}]} ]] ; do disp=${locales_disp[$(( cnt - 1 ))]} - if [[ -n ${UPDATE} ]] ; then - normalized_locale=$(normalize ${locale}) - if [[ ${existing_locales} == *" ${normalized_locale} "* ]] ; then - existing_locales=${existing_locales/ ${normalized_locale} / } - if [[ ${QUIET} -eq 0 ]] ; then - cnt_fmt=$(printf "%${#total}i" ${cnt}) - einfo " (${cnt_fmt}/${total}) Skipping ${disp}" - fi - continue + normalized_locale=$(normalize ${locale}) + if [[ ${generated_locales} == *" ${normalized_locale} "* ]] ; then + already_generated="true" + else + already_generated="false" + fi + generated_locales+="${normalized_locale} " + if ${already_generated} || \ + [[ -n ${UPDATE} && ${existing_locales} == *" ${normalized_locale} "* ]] ; then + existing_locales=${existing_locales/ ${normalized_locale} / } + if [[ ${QUIET} -eq 0 ]] ; then + cnt_fmt=$(printf "%${#total}i" ${cnt}) + einfo " (${cnt_fmt}/${total}) Skipping ${disp}" fi + continue fi # If the locale is like 'en_US.UTF8', then we really want 'en_US'