Hi Mark, Mark H Weaver <m...@netris.org> skribis:
> l...@gnu.org (Ludovic Courtès) writes: >> Nala Ginrut <nalagin...@gmail.com> skribis: >>> + setlocale (LC_ALL, ""); >> >> Currently, like in C, it’s the programmer’s responsibility to install >> the locale with such a call, and none of Guile’s business. > > Unfortunately, I don't see a way for the user to call setlocale before a > Guile script converts the command-line arguments to Scheme strings, at > least not without providing their own `main' function in C. Hmm, very good point. [...] > I think we should consider decoding the command-line arguments using the > locale specified by the environment variables, at least in cases like > this where there's no way for the user to call setlocale before the > conversion happens. Below is a patch that does roughly that (we should get ‘locale_encoding’ reviewed and perhaps added to Gnulib.) It solves the problem: --8<---------------cut here---------------start------------->8--- # With the patch. $ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' -- λ (/home/ludo/src/guile/libguile/.libs/guile -- λ) # Previously. $ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' -- λ (guile -- ??) --8<---------------cut here---------------end--------------->8--- (Note that the ‘setlocale’ call here is just so that the output port is correctly set up.) WDYT? Thanks, Ludo’.
diff --git a/libguile/script.c b/libguile/script.c index 5e0685a..20d7b9e 100644 --- a/libguile/script.c +++ b/libguile/script.c @@ -26,6 +26,7 @@ #include <stdio.h> #include <errno.h> #include <ctype.h> +#include <uniconv.h> #include "libguile/_scm.h" #include "libguile/eval.h" @@ -368,6 +369,74 @@ scm_shell_usage (int fatal, char *message) : SCM_BOOL_F)); } +/* Return the name of the locale encoding suggested by environment + variables, even if it's not current, or NULL if no encoding is + defined. Based on Gnulib's `localcharset.c'. */ +static const char * +locale_encoding (void) +{ + const char *locale, *codeset = NULL; + + /* Allow user to override the codeset, as set in the operating system, + with standard language environment variables. */ + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + } + if (locale != NULL && locale[0] != '\0') + { + /* If the locale name contains an encoding after the dot, return it. */ + const char *dot = strchr (locale, '.'); + + if (dot != NULL) + { + static char buf[2 + 10 + 1]; + const char *modifier; + + dot++; + /* Look for the possible @... trailer and remove it, if any. */ + modifier = strchr (dot, '@'); + if (modifier == NULL) + return dot; + if (modifier - dot < sizeof (buf)) + { + memcpy (buf, dot, modifier - dot); + buf [modifier - dot] = '\0'; + return buf; + } + } + + /* Resolve through the charset.alias file. */ + codeset = locale; + } + + return codeset; +} + +/* Return a list of strings from ARGV, which contains ARGC strings + assumed to be encoded in the current locale. Use `locale_charset' + instead of relying on `scm_from_locale_string' because the user + hasn't had a change to call (setlocale LC_ALL "") yet. */ +static SCM +locale_arguments_to_string_list (int argc, char **const argv) +{ + int i; + SCM lst; + const char *encoding; + + encoding = locale_encoding (); + for (i = argc - 1, lst = SCM_EOL; + i >= 0; + i--) + lst = scm_cons (scm_from_stringn (argv[i], (size_t) -1, encoding, + SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE), + lst); + + return lst; +} /* Given an array of command-line switches, return a Scheme expression to carry out the actions specified by the switches. @@ -378,7 +447,7 @@ scm_compile_shell_switches (int argc, char **argv) { return scm_call_2 (scm_c_public_ref ("ice-9 command-line", "compile-shell-switches"), - scm_makfromstrs (argc, argv), + locale_arguments_to_string_list (argc, argv), (scm_usage_name ? scm_from_locale_string (scm_usage_name) : scm_from_latin1_string ("guile")));