--- D:\tempapache\httpd-2.1\modules\experimental\mod_auth_ldap.c	Fri Sep 27 02:49:48 2002
+++ mod_auth_ldap.c	Mon Dec 09 16:32:42 2002
@@ -62,6 +62,7 @@
 
 #include <apr_ldap.h>
 #include <apr_strings.h>
+#include <apr_xlate.h>
 
 #include "ap_config.h"
 #if APR_HAVE_UNISTD_H
@@ -115,8 +116,11 @@
     int group_attrib_is_dn;		/* If true, the group attribute is the DN, otherwise, 
 					   it's the exact string passed by the HTTP client */
 
+    int conv_type;              /* Type of charset conversion to use when converting to UTF-8 */
+    apr_xlate_t *convset;       /* Charset convertion handle */
+    char *to_charset;           /* Charset to convert to (optional) */
     int netscapessl;			/* True if Netscape SSL is enabled */
-    int starttls;                       /* True if StartTLS is enabled */
+    int starttls;               /* True if StartTLS is enabled */
 } mod_auth_ldap_config_t;
 
 typedef struct mod_auth_ldap_request_t {
@@ -143,7 +147,119 @@
 
 /* ---------------------------------------- */
 
+#define LANG_CONV_OFF           0
+#define LANG_CONV_ON            1
+#define LANG_CONV_USE_HEADER    2
 
+typedef struct
+{
+    char *country_abbrev;
+    char *iso_designation;
+    char *language_name;
+} cpstr_t;
+ 
+static cpstr_t sCodePages[] =
+{
+ {"en",   "ISO-8859-1", "English"    },
+
+ // these are special ways of getting Unicode and UTF-8 translation...
+ {"UTF-8",   "utf8",   ""       },
+ {"Unicode", "ucs",    ""       },
+ 
+ // these are normal translation with this utility...
+ {"th",   "Cp874",      "Thai"     },
+ {"ja",   "SJIS",       "Japanese"    },
+ {"ko",   "Cp949",      "Korean"     },
+ {"zh",   "Cp950",      "Chinese Traditional"},
+ {"zh",   "GB2312",     "Chinese Simplified" },
+ {"zh",   "GB2312",     "Chinese"    },
+ {"cs",   "ISO-8859-2", "Czech"     },
+ {"hu",   "ISO-8859-2", "Hungarian"    },
+ {"hr",   "ISO-8859-2", "Croation"    },
+ {"pl",   "ISO-8859-2", "Polish"     },
+ {"ro",   "ISO-8859-2", "Romanian"    },
+ {"sr",   "ISO-8859-2", "Serbian"    },
+ {"sk",   "ISO-8859-2", "Slovak"     },
+ {"sl",   "ISO-8859-2", "Slovenian"    },
+ {"sq",   "ISO-8859-2", "Albanian"    },
+ {"bg",   "ISO-8859-5", "Bulgarian"    },
+ {"be",   "ISO-8859-5", "Byelorussian"   },
+ {"mk",   "ISO-8859-5", "Macedonian"   },
+ {"ru",   "ISO-8859-5", "Russian"    },
+ {"uk",   "ISO-8859-5", "Ukrainian"    },
+ {"ca",   "ISO-8859-1", "Catalan"    },
+ {"de",   "ISO-8859-1", "German"     },
+ {"da",   "ISO-8859-1", "Danish"     },
+ {"fi",   "ISO-8859-1", "Finnish"    },
+ {"fr",   "ISO-8859-1", "French"     },
+ {"es",   "ISO-8859-1", "Spanish"    },
+ {"is",   "ISO-8859-1", "Icelandic"    },
+ {"it",   "ISO-8859-1", "Italian"    },
+ {"nl",   "ISO-8859-1", "Dutch"     },
+ {"no",   "ISO-8859-1", "Norwegian"    },
+ {"pt",   "ISO-8859-1", "Portuguese"   },
+ {"sv",   "ISO-8859-1", "Swedish"    },
+ {"af",   "ISO-8859-1", "Afrikaans"   },
+ {"eu",   "ISO-8859-1", "Basque"   },
+ {"fo",   "ISO-8859-1", "Faroese"   },
+ {"gl",   "ISO-8859-1", "Galician"   },
+ {"ga",   "ISO-8859-1", "Irish"   },
+ {"gd",   "ISO-8859-1", "Scottish"   },
+ {"mt",   "ISO-8859-3", "Maltese"   },
+ {"eo",   "ISO-8859-3", "Esperanto"   },
+ {"el",   "ISO-8859-7", "Greek"     },
+ {"tr",   "ISO-8859-9", "Turkish"    },
+ {"he",   "ISO-8859-8", "Hebrew"     },
+ {"iw",   "ISO-8859-8", "Hebrew"     },
+ {"ar",   "ISO-8859-6", "Arabic"     },
+ {"et",   "ISO-8859-1", "Estonian"    },
+ {"lv",   "ISO-8859-2", "Latvian"    },
+ {"lt",   "ISO-8859-2", "Lithuanian"   },
+ {NULL,   NULL,    NULL      }
+};
+ 
+/* Derive a code page ID give a language name or ID */
+static char* derive_codepage_from_string (const char *language)
+{
+    cpstr_t *cp = sCodePages;
+    
+    if (!language)          // our default codepage
+        return cp[0].iso_designation;
+    
+    while (cp->country_abbrev)
+    {
+        // basically, you get the first entry in the table that matches...
+        if ( (stricmp(language, cp->country_abbrev ) == 0)
+            || (stricmp(language, cp->iso_designation) == 0)
+            || (stricmp(language, cp->language_name  ) == 0)) {
+
+            return cp->iso_designation;
+        }
+        cp++;
+    }
+    
+    return 0;
+}
+
+static apr_xlate_t* get_conv_set (request_rec *r, const char *to_charset)
+{
+    char *lang_line = (char*)apr_table_get(r->headers_in, "accept-language");
+    char *lang;
+    apr_xlate_t *convset;
+
+    if (lang_line) {
+        lang_line[2] = '\0';
+        lang = derive_codepage_from_string(lang_line);
+
+        if (lang && (apr_xlate_open(&convset, to_charset, lang, r->pool) == APR_SUCCESS)) {
+            return convset;
+        }
+    }
+
+    return NULL;
+}
+
+
 /*
  * Build the search filter, or at least as much of the search filter that
  * will fit in the buffer. We don't worry about the buffer not being able
@@ -168,6 +284,41 @@
                                 mod_auth_ldap_config_t *sec)
 {
     char *p, *q, *filtbuf_end;
+    char *user;
+    apr_xlate_t *convset;
+    apr_size_t inbytes;
+    apr_size_t outbytes;
+    char *outbuf;
+
+    if (r->user != NULL) {
+        user = apr_pstrdup (r->pool, r->user);
+    }
+    else
+        return;
+
+    switch (sec->conv_type) {
+        case LANG_CONV_ON:
+            convset = sec->convset;
+            break;
+        case LANG_CONV_USE_HEADER:
+            convset = get_conv_set(r, sec->to_charset);
+            break;
+        default:
+            convset = NULL;
+            break;
+    }
+
+    if (convset) {
+        inbytes = strlen(user);
+        outbytes = (inbytes+1)*3;
+        outbuf = apr_pcalloc(r->pool, outbytes);
+
+        /* Convert the user name to UTF-8.  This is only valid for LDAP v3 */
+        if (apr_xlate_conv_buffer(convset, user, &inbytes, outbuf, &outbytes) == APR_SUCCESS) {
+            user = apr_pstrdup(r->pool, outbuf);
+        }
+    }
+
     /* 
      * Create the first part of the filter, which consists of the 
      * config-supplied portions.
@@ -179,7 +330,7 @@
      * LDAP filter metachars are escaped.
      */
     filtbuf_end = filtbuf + FILTER_LENGTH - 1;
-    for (p = r->user, q=filtbuf + strlen(filtbuf);
+    for (p = user, q=filtbuf + strlen(filtbuf);
          *p && q < filtbuf_end; *q++ = *p++) {
         if (strchr("*()\\", *p) != NULL) {
             *q++ = '\\';
@@ -270,6 +421,13 @@
         return result;
     }
 
+    if (r->user == NULL) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, 0, r,
+		      "[%d] auth_ldap authenticate: no user specified", getpid());
+        util_ldap_connection_close(ldc);
+        return sec->auth_authoritative? HTTP_UNAUTHORIZED : DECLINED;
+    }
+
     /* build the username filter */
     mod_auth_ldap_build_filter(filtbuf, r, sec);
 
@@ -630,6 +788,10 @@
     sec->user_is_dn = 0;
     sec->compare_dn_on_server = 0;
 
+    sec->conv_type = LANG_CONV_OFF;  /* initialize language conversion to off */
+    sec->convset = NULL;
+    sec->to_charset = NULL;
+
     return sec;
 }
 
@@ -796,6 +958,47 @@
     return NULL;
 }
 
+static const char *mod_auth_ldap_set_conversion(cmd_parms *cmd, void *config, 
+                                              const char *from, const char *to)
+{
+    mod_auth_ldap_config_t *sec = config;
+    apr_xlate_t *convset;
+    const char *from_charset, *to_charset;
+
+    if (strcasecmp(from, "use-header") == 0) {
+        sec->conv_type = LANG_CONV_USE_HEADER; /* turn on language conversion and use the request header */
+    }
+    else {
+        from_charset = derive_codepage_from_string (from);
+    }
+
+    if (to) {
+        to_charset = derive_codepage_from_string (to);
+        if (to_charset) {
+            sec->to_charset = apr_pstrdup(cmd->pool, to_charset);
+        }
+    }
+
+    if (!sec->to_charset) {
+        sec->to_charset = apr_pstrdup(cmd->pool, "utf8");
+    }
+
+    /* If we are converting from the header then we have all of the info we need */
+    if (sec->conv_type == LANG_CONV_USE_HEADER) {
+        return NULL;
+    }
+    /* If we have a from and a to charsets then create a conversion handle */
+    else if (sec->to_charset && from_charset) {
+        if (apr_xlate_open(&convset, sec->to_charset, from_charset, cmd->pool) == APR_SUCCESS) {
+            sec->convset = convset;
+            sec->conv_type = LANG_CONV_ON; /* turn on language conversion */
+        }
+        return NULL;
+    }
+
+    return "Unrecognized value for AuthLDAPCharsetConversion directive";
+}
+
 command_rec mod_auth_ldap_cmds[] = {
     AP_INIT_TAKE1("AuthLDAPURL", mod_auth_ldap_parse_url, NULL, OR_AUTHCFG, 
                   "URL to define LDAP connection. This should be an RFC 2255 complaint\n"
@@ -869,6 +1072,12 @@
     AP_INIT_FLAG("AuthLDAPFrontPageHack", ap_set_flag_slot,
                  (void *)APR_OFFSETOF(mod_auth_ldap_config_t, frontpage_hack), OR_AUTHCFG,
                  "Set to 'on' to support Microsoft FrontPage"),
+
+    AP_INIT_TAKE12("AuthLDAPCharsetConversion", mod_auth_ldap_set_conversion, NULL, OR_AUTHCFG,
+                  "Specifies the <from> and <to> language or charset identifiers that will be used"
+                  "when converting the user ID and password to be used with LDAP.  If \"use-header\""
+                  "is specified, the accept-language header entry of the request will be used to"
+                  "make a best guess."),
 
 #ifdef APU_HAS_LDAP_STARTTLS
     AP_INIT_FLAG("AuthLDAPStartTLS", ap_set_flag_slot,
