The more I look at this patch, the more concerned I become. Here's an
easy question; for starters... Isn't this the same problem that users
of all auth schemas encounter?
Even supposing the user creates their own user identifier and it permits
opaque bytes, that user may begin on a browser configured to send all
requests as utf-8, and later open a browser configured with their local
code page. I don't see how this is an LDAP issue.
Would you consider moving this new code to a mod_request_charset module?
Then all auth user may have normalized usernames. We simply document
that turning on mod_request_charset affects all auth providers, and therefore
all auth databases should be created with utf-8 usernames.
Even Basic auth passwords suffer the same. So do digest hashes, but there
is nothing we can ever do about that. Once this code is in its own module,
we can begin to expand on it.
The last problem I have with the patch is a nit. Can we please move the
charset.conv file over to the docs/conf directory where it belongs? That will
help get fixes committed to that file by our very multilingual docs team.
Bill
At 06:13 PM 12/13/2002, you wrote:
>bnicholes 2002/12/13 16:13:15
>
> Modified: modules/experimental NWGNUauthldap mod_auth_ldap.c
> Added: modules/experimental charset.conv
> Log:
> Added character set support to mod_auth_LDAP to allow it to convert
> extended characters used in the user ID to UTF-8 before authenticating
> against the LDAP directory. The new directive AuthLDAPCharsetConfig is
> used to specify the config file that contains the character set conversion table.
>
> Revision Changes Path
> 1.7 +1 -0 httpd-2.0/modules/experimental/NWGNUauthldap
>
> Index: NWGNUauthldap
> ===================================================================
> RCS file: /home/cvs/httpd-2.0/modules/experimental/NWGNUauthldap,v
> retrieving revision 1.6
> retrieving revision 1.7
> diff -u -r1.6 -r1.7
> --- NWGNUauthldap 16 Oct 2002 23:52:26 -0000 1.6
> +++ NWGNUauthldap 14 Dec 2002 00:13:15 -0000 1.7
> @@ -246,6 +246,7 @@
> #
> install :: nlms FORCE
> copy $(OBJDIR)\*.nlm $(INSTALL)\Apache2\modules\*.*
> + copy charset.conv $(INSTALL)\Apache2\conf\*.*
>
> #
> # Any specialized rules here
>
>
>
> 1.10 +160 -2 httpd-2.0/modules/experimental/mod_auth_ldap.c
>
> Index: mod_auth_ldap.c
> ===================================================================
> RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_auth_ldap.c,v
> retrieving revision 1.9
> retrieving revision 1.10
> diff -u -r1.9 -r1.10
> --- mod_auth_ldap.c 11 Dec 2002 06:11:11 -0000 1.9
> +++ mod_auth_ldap.c 14 Dec 2002 00:13:15 -0000 1.10
> @@ -62,6 +62,7 @@
>
> #include <apr_ldap.h>
> #include <apr_strings.h>
> +#include <apr_xlate.h>
>
> #include "ap_config.h"
> #if APR_HAVE_UNISTD_H
> @@ -116,7 +117,7 @@
> it's the exact string passed by the HTTP
>client */
>
> int netscapessl; /* True if Netscape SSL is enabled */
> - int starttls; /* True if StartTLS is enabled */
> + int starttls; /* True if StartTLS is enabled */
> } mod_auth_ldap_config_t;
>
> typedef struct mod_auth_ldap_request_t {
> @@ -143,6 +144,59 @@
>
> /* ---------------------------------------- */
>
> +static apr_hash_t *charset_conversions = NULL;
> +static char *to_charset = NULL; /* UTF-8 identifier derived from the
>charset.conv file */
> +
> +/* Derive a code page ID give a language name or ID */
> +static char* derive_codepage_from_lang (apr_pool_t *p, char *language)
> +{
> + int lang_len;
> + int check_short = 0;
> + char *charset;
> +
> + if (!language) // our default codepage
> + return apr_pstrdup(p, "ISO-8859-1");
> + else
> + lang_len = strlen(language);
> +
> + charset = (char*) apr_hash_get(charset_conversions, language,
>APR_HASH_KEY_STRING);
> +
> + if (!charset) {
> + language[2] = '\0';
> + charset = (char*) apr_hash_get(charset_conversions, language,
>APR_HASH_KEY_STRING);
> + }
> +
> + if (charset) {
> + charset = apr_pstrdup(p, charset);
> + }
> +
> + return charset;
> +}
> +
> +static apr_xlate_t* get_conv_set (request_rec *r)
> +{
> + char *lang_line = (char*)apr_table_get(r->headers_in, "accept-language");
> + char *lang;
> + apr_xlate_t *convset;
> +
> + if (lang_line) {
> + lang_line = apr_pstrdup(r->pool, lang_line);
> + for (lang = lang_line;*lang;lang++) {
> + if ((*lang == ',') || (*lang == ';')) {
> + *lang = '\0';
> + break;
> + }
> + }
> + lang = derive_codepage_from_lang(r->pool, lang_line);
> +
> + if (lang && (apr_xlate_open(&convset, to_charset, lang, r->pool) ==
>APR_SUCCESS)) {
> + return convset;
> + }
> + }
> +
> + return NULL;
> +}
> +
>
> /*
> * Build the search filter, or at least as much of the search filter that
> @@ -168,6 +222,33 @@
> mod_auth_ldap_config_t *sec)
> {
> char *p, *q, *filtbuf_end;
> + char *user;
> + apr_xlate_t *convset = NULL;
> + apr_size_t inbytes;
> + apr_size_t outbytes;
> + char *outbuf;
> +
> + if (r->user != NULL) {
> + user = apr_pstrdup (r->pool, r->user);
> + }
> + else
> + return;
> +
> + if (charset_conversions) {
> + convset = get_conv_set(r);
> + }
> +
> + if (convset) {
> + inbytes = strlen(user);
> + outbytes = (inbytes+1)*3;
> + outbuf = apr_pcalloc(r->pool, outbytes);
> +
> + /* Convert the user name to UTF-8. This is only valid for LDAP v3 */
> + if (apr_xlate_conv_buffer(convset, user, &inbytes, outbuf, &outbytes) ==
>APR_SUCCESS) {
> + user = apr_pstrdup(r->pool, outbuf);
> + }
> + }
> +
> /*
> * Create the first part of the filter, which consists of the
> * config-supplied portions.
> @@ -179,7 +260,7 @@
> * LDAP filter metachars are escaped.
> */
> filtbuf_end = filtbuf + FILTER_LENGTH - 1;
> - for (p = r->user, q=filtbuf + strlen(filtbuf);
> + for (p = user, q=filtbuf + strlen(filtbuf);
> *p && q < filtbuf_end; *q++ = *p++) {
> if (strchr("*()\\", *p) != NULL) {
> *q++ = '\\';
> @@ -270,6 +351,13 @@
> return result;
> }
>
> + if (r->user == NULL) {
> + ap_log_rerror(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, 0, r,
> + "[%d] auth_ldap authenticate: no user specified", getpid());
> + util_ldap_connection_close(ldc);
> + return sec->auth_authoritative? HTTP_UNAUTHORIZED : DECLINED;
> + }
> +
> /* build the username filter */
> mod_auth_ldap_build_filter(filtbuf, r, sec);
>
> @@ -796,6 +884,13 @@
> return NULL;
> }
>
> +static const char *set_charset_config(cmd_parms *cmd, void *config, const char
>*arg)
> +{
> + ap_set_module_config(cmd->server->module_config, &auth_ldap_module,
> + (void *)arg);
> + return NULL;
> +}
> +
> command_rec mod_auth_ldap_cmds[] = {
> AP_INIT_TAKE1("AuthLDAPURL", mod_auth_ldap_parse_url, NULL, OR_AUTHCFG,
> "URL to define LDAP connection. This should be an RFC 2255
>complaint\n"
> @@ -870,6 +965,10 @@
> (void *)APR_OFFSETOF(mod_auth_ldap_config_t, frontpage_hack),
>OR_AUTHCFG,
> "Set to 'on' to support Microsoft FrontPage"),
>
> + AP_INIT_TAKE1("AuthLDAPCharsetConfig", set_charset_config, NULL, RSRC_CONF,
> + "Character set conversion configuration file. If omitted,
>character set"
> + "conversion is disabled."),
> +
> #ifdef APU_HAS_LDAP_STARTTLS
> AP_INIT_FLAG("AuthLDAPStartTLS", ap_set_flag_slot,
> (void *)APR_OFFSETOF(mod_auth_ldap_config_t, starttls),
>OR_AUTHCFG,
> @@ -879,8 +978,67 @@
> {NULL}
> };
>
> +static int auth_ldap_post_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t
>*ptemp, server_rec *s)
> +{
> + ap_configfile_t *f;
> + char l[MAX_STRING_LEN];
> + const char *charset_confname = ap_get_module_config(s->module_config,
> + &auth_ldap_module);
> + apr_status_t status;
> +
> + if (!charset_confname) {
> + return OK;
> + }
> +
> + charset_confname = ap_server_root_relative(p, charset_confname);
> + if (!charset_confname) {
> + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s,
> + "Invalid charset conversion config path %s",
> + (const char *)ap_get_module_config(s->module_config,
> + &auth_ldap_module));
> + return HTTP_INTERNAL_SERVER_ERROR;
> + }
> + if ((status = ap_pcfg_openfile(&f, ptemp, charset_confname))
> + != APR_SUCCESS) {
> + ap_log_error(APLOG_MARK, APLOG_ERR, status, s,
> + "could not open charset conversion config file %s.",
> + charset_confname);
> + return HTTP_INTERNAL_SERVER_ERROR;
> + }
> +
> + charset_conversions = apr_hash_make(p);
> +
> + while (!(ap_cfg_getline(l, MAX_STRING_LEN, f))) {
> + const char *ll = l;
> + char *lang;
> +
> + if (l[0] == '#') {
> + continue;
> + }
> + lang = ap_getword_conf(p, &ll);
> + ap_str_tolower(lang);
> +
> + if (ll[0]) {
> + char *charset = ap_getword_conf(p, &ll);
> + apr_hash_set(charset_conversions, lang, APR_HASH_KEY_STRING, charset);
> + }
> + }
> + ap_cfg_closefile(f);
> +
> + to_charset = derive_codepage_from_lang (p, "utf-8");
> + if (to_charset == NULL) {
> + ap_log_error(APLOG_MARK, APLOG_ERR, status, s,
> + "could not find the UTF-8 charset in the file %s.",
> + charset_confname);
> + return HTTP_INTERNAL_SERVER_ERROR;
> + }
> +
> + return OK;
> +}
> +
> static void mod_auth_ldap_register_hooks(apr_pool_t *p)
> {
> + ap_hook_post_config(auth_ldap_post_config,NULL,NULL,APR_HOOK_MIDDLE);
> ap_hook_check_user_id(mod_auth_ldap_check_user_id, NULL, NULL,
>APR_HOOK_MIDDLE);
> ap_hook_auth_checker(mod_auth_ldap_auth_checker, NULL, NULL, APR_HOOK_MIDDLE);
> }
>
>
>
> 1.1 httpd-2.0/modules/experimental/charset.conv
>
> Index: charset.conv
> ===================================================================
>
> # Lang-abbv Charset Language
> #---------------------------------
> en ISO-8859-1 English
> UTF-8 utf8 UTF-8
> Unicode ucs Unicode
> th Cp874 Thai
> ja SJIS Japanese
> ko Cp949 Korean
> zh Cp950 Chinese-Traditional
> zh-cn GB2312 Chinese-Simplified
> zh-tw Cp950 Chinese
> cs ISO-8859-2 Czech
> hu ISO-8859-2 Hungarian
> hr ISO-8859-2 Croation
> pl ISO-8859-2 Polish
> ro ISO-8859-2 Romanian
> sr ISO-8859-2 Serbian
> sk ISO-8859-2 Slovak
> sl ISO-8859-2 Slovenian
> sq ISO-8859-2 Albanian
> bg ISO-8859-5 Bulgarian
> be ISO-8859-5 Byelorussian
> mk ISO-8859-5 Macedonian
> ru ISO-8859-5 Russian
> uk ISO-8859-5 Ukrainian
> ca ISO-8859-1 Catalan
> de ISO-8859-1 German
> da ISO-8859-1 Danish
> fi ISO-8859-1 Finnish
> fr ISO-8859-1 French
> es ISO-8859-1 Spanish
> is ISO-8859-1 Icelandic
> it ISO-8859-1 Italian
> nl ISO-8859-1 Dutch
> no ISO-8859-1 Norwegian
> pt ISO-8859-1 Portuguese
> sv ISO-8859-1 Swedish
> af ISO-8859-1 Afrikaans
> eu ISO-8859-1 Basque
> fo ISO-8859-1 Faroese
> gl ISO-8859-1 Galician
> ga ISO-8859-1 Irish
> gd ISO-8859-1 Scottish
> mt ISO-8859-3 Maltese
> eo ISO-8859-3 Esperanto
> el ISO-8859-7 Greek
> tr ISO-8859-9 Turkish
> he ISO-8859-8 Hebrew
> iw ISO-8859-8 Hebrew
> ar ISO-8859-6 Arabic
> et ISO-8859-1 Estonian
> lv ISO-8859-2 Latvian
> lt ISO-8859-2 Lithuanian
>
>
>