Hi Mark! Mark H Weaver <m...@netris.org> skribis:
> l...@gnu.org (Ludovic Courtès) writes: >>> I think we should consider decoding the command-line arguments using the >>> locale specified by the environment variables, at least in cases like >>> this where there's no way for the user to call setlocale before the >>> conversion happens. >> >> Below is a patch that does roughly that (we should get ‘locale_encoding’ >> reviewed and perhaps added to Gnulib.) >> >> It solves the problem: >> >> # With the patch. >> $ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' >> -- λ >> (/home/ludo/src/guile/libguile/.libs/guile -- λ) >> >> # Previously. >> $ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' -- λ >> (guile -- ??) > > Looks great, thanks! :) > > I have one question though. You fixed scm_compile_shell_switches, but I > see another place where command-line arguments are converted to Scheme > strings before the user is able to call setlocale: guile.c and init.c. > > main (guile.c) calls scm_boot_guile (init.c), which uses > invoke_main_func (init.c), which calls scm_set_program_arguments > (feature.c). Does this code need to be fixed also? Yes, good catch! An updated patch is attached. It seems to fulfill its mission: --8<---------------cut here---------------start------------->8--- # Now: $ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (list (command-line) (program-arguments)))' -- λ ((/home/ludo/src/guile/libguile/.libs/guile -- λ) (/home/ludo/src/guile/libguile/.libs/guile -- λ)) # Before: $ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (list (command-line) (program-arguments)))' -- λ ((guile -- ??) (guile -- ??)) --8<---------------cut here---------------end--------------->8--- Note that the code uses SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE, but I wonder if we couldn’t do better. For instance, upon conversion failure, we could pass the argument as a bytevector instead of a string and let the application cope with it. OTOH, that would be an API change. Thoughts? Thanks, Ludo’.
diff --git a/libguile/feature.c b/libguile/feature.c index 7007403..f3bddc7 100644 --- a/libguile/feature.c +++ b/libguile/feature.c @@ -1,5 +1,6 @@ -/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc. - * +/* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + * 2006, 2007, 2009, 2011 Free Software Foundation, Inc. + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 3 of @@ -36,7 +37,8 @@ -static SCM progargs_fluid; +SCM scm_program_arguments_fluid; + static SCM features_var; void @@ -58,7 +60,7 @@ SCM_DEFINE (scm_program_arguments, "program-arguments", 0, 0, 0, "options like @code{-e} and @code{-l}.") #define FUNC_NAME s_scm_program_arguments { - return scm_fluid_ref (progargs_fluid); + return scm_fluid_ref (scm_program_arguments_fluid); } #undef FUNC_NAME @@ -74,7 +76,7 @@ scm_set_program_arguments (int argc, char **argv, char *first) SCM args = scm_makfromstrs (argc, argv); if (first) args = scm_cons (scm_from_locale_string (first), args); - scm_fluid_set_x (progargs_fluid, args); + scm_fluid_set_x (scm_program_arguments_fluid, args); } SCM_DEFINE (scm_set_program_arguments_scm, "set-program-arguments", 1, 0, 0, @@ -89,7 +91,7 @@ SCM_DEFINE (scm_set_program_arguments_scm, "set-program-arguments", 1, 0, 0, "strings within it are copied, so should not be modified later.") #define FUNC_NAME s_scm_set_program_arguments_scm { - return scm_fluid_set_x (progargs_fluid, lst); + return scm_fluid_set_x (scm_program_arguments_fluid, lst); } #undef FUNC_NAME @@ -99,7 +101,7 @@ SCM_DEFINE (scm_set_program_arguments_scm, "set-program-arguments", 1, 0, 0, void scm_init_feature() { - progargs_fluid = scm_make_fluid (); + scm_program_arguments_fluid = scm_make_fluid (); features_var = scm_c_define ("*features*", SCM_EOL); #ifndef _Windows diff --git a/libguile/feature.h b/libguile/feature.h index d373bc7..467f9ed 100644 --- a/libguile/feature.h +++ b/libguile/feature.h @@ -3,7 +3,8 @@ #ifndef SCM_FEATURE_H #define SCM_FEATURE_H -/* Copyright (C) 1995,1996,1999,2000,2001, 2006, 2007, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1999, 2000, 2001, 2006, 2007, 2008, + * 2011 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -29,6 +30,8 @@ SCM_API void scm_add_feature (const char* str); SCM_API SCM scm_program_arguments (void); SCM_API void scm_set_program_arguments (int argc, char **argv, char *first); SCM_API SCM scm_set_program_arguments_scm (SCM lst); + +SCM_INTERNAL SCM scm_program_arguments_fluid; SCM_INTERNAL void scm_init_feature (void); #endif /* SCM_FEATURE_H */ diff --git a/libguile/init.c b/libguile/init.c index 8e3888d..633f8c6 100644 --- a/libguile/init.c +++ b/libguile/init.c @@ -332,7 +332,7 @@ invoke_main_func (void *body_data) { struct main_func_closure *closure = (struct main_func_closure *) body_data; - scm_set_program_arguments (closure->argc, closure->argv, 0); + scm_i_set_boot_program_arguments (closure->argc, closure->argv); (*closure->main_func) (closure->closure, closure->argc, closure->argv); scm_restore_signals (); diff --git a/libguile/script.c b/libguile/script.c index 5e0685a..b1d3327 100644 --- a/libguile/script.c +++ b/libguile/script.c @@ -26,6 +26,7 @@ #include <stdio.h> #include <errno.h> #include <ctype.h> +#include <uniconv.h> #include "libguile/_scm.h" #include "libguile/eval.h" @@ -368,6 +369,87 @@ scm_shell_usage (int fatal, char *message) : SCM_BOOL_F)); } +/* Return the name of the locale encoding suggested by environment + variables, even if it's not current, or NULL if no encoding is + defined. Based on Gnulib's `localcharset.c'. */ +static const char * +locale_encoding (void) +{ + static char buf[2 + 10 + 1]; + const char *locale, *codeset = NULL; + + /* Allow user to override the codeset, as set in the operating system, + with standard language environment variables. */ + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + } + if (locale != NULL && locale[0] != '\0') + { + /* If the locale name contains an encoding after the dot, return it. */ + const char *dot = strchr (locale, '.'); + + if (dot != NULL) + { + const char *modifier; + + dot++; + /* Look for the possible @... trailer and remove it, if any. */ + modifier = strchr (dot, '@'); + if (modifier == NULL) + return dot; + if (modifier - dot < sizeof (buf)) + { + memcpy (buf, dot, modifier - dot); + buf [modifier - dot] = '\0'; + return buf; + } + } + else if (strcmp (locale, "C") == 0) + { + strcpy (buf, "ASCII"); + return buf; + } + + /* Resolve through the charset.alias file. */ + codeset = locale; + } + + return codeset; +} + +/* Return a list of strings from ARGV, which contains ARGC strings + assumed to be encoded in the current locale. Use `locale_charset' + instead of relying on `scm_from_locale_string' because the user + hasn't had a change to call (setlocale LC_ALL "") yet. */ +static SCM +locale_arguments_to_string_list (int argc, char **const argv) +{ + int i; + SCM lst; + const char *encoding; + + encoding = locale_encoding (); + for (i = argc - 1, lst = SCM_EOL; + i >= 0; + i--) + lst = scm_cons (scm_from_stringn (argv[i], (size_t) -1, encoding, + SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE), + lst); + + return lst; +} + +/* Set the value returned by `program-arguments', given ARGC and ARGV. */ +void +scm_i_set_boot_program_arguments (int argc, char *argv[]) +{ + scm_fluid_set_x (scm_program_arguments_fluid, + locale_arguments_to_string_list (argc, argv)); +} /* Given an array of command-line switches, return a Scheme expression to carry out the actions specified by the switches. @@ -378,7 +460,7 @@ scm_compile_shell_switches (int argc, char **argv) { return scm_call_2 (scm_c_public_ref ("ice-9 command-line", "compile-shell-switches"), - scm_makfromstrs (argc, argv), + locale_arguments_to_string_list (argc, argv), (scm_usage_name ? scm_from_locale_string (scm_usage_name) : scm_from_latin1_string ("guile"))); diff --git a/libguile/script.h b/libguile/script.h index 7e3828a..cf0162a 100644 --- a/libguile/script.h +++ b/libguile/script.h @@ -3,7 +3,7 @@ #ifndef SCM_SCRIPT_H #define SCM_SCRIPT_H -/* Copyright (C) 1997,1998,2000, 2006, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1997,1998,2000, 2006, 2008, 2011 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -37,6 +37,7 @@ SCM_API void scm_shell_usage (int fatal, char *message); SCM_API SCM scm_compile_shell_switches (int argc, char **argv); SCM_API void scm_shell (int argc, char **argv); SCM_API char *scm_usage_name; +SCM_INTERNAL void scm_i_set_boot_program_arguments (int argc, char *argv[]); SCM_INTERNAL void scm_init_script (void); #endif /* SCM_SCRIPT_H */