pcs 96/10/29 07:23:05
Modified: src mod_negotiation.c Log: Reviewed By: Brian Behlendorf, Jim Jagielski Update mod_negotation.c to support variants with multiple language types. Languages can either be obtained from file extensions (eg foo.fr.en.html) or on the Content-Language line in a type-map file (eg Content-Language: fr, en). This patch: * Allows multiple comma separated languages on the Content-Language: header in a type-map file * Updates the function which sets the language quality factor for each variant to select the best (highest q) match from the tags of the variant, using the algorithm in HTTP/1.1 14.4 * If the new (HTTP/1.1) language negotiation results in a tie between variants, falls back onto the Apache 1.1 language negotiation algorythm, using _just_ the first language of the variant * Updates the 406 text and Alternates header to list multiple languages if necessary Revision Changes Path 1.23 +174 -89 apache/src/mod_negotiation.c Index: mod_negotiation.c =================================================================== RCS file: /export/home/cvs/apache/src/mod_negotiation.c,v retrieving revision 1.22 retrieving revision 1.23 diff -C3 -r1.22 -r1.23 *** mod_negotiation.c 1996/10/28 16:28:17 1.22 --- mod_negotiation.c 1996/10/29 15:23:03 1.23 *************** *** 82,87 **** --- 82,98 ---- module negotiation_module; + char *merge_string_array (pool *p, array_header *arr, char *sep) + { + int i; + char *t = ""; + + for (i = 0; i < arr->nelts; i++) { + t = pstrcat(p, t, i ? sep : "", ((char**)arr->elts)[i], NULL); + } + return t; + } + void *create_neg_dir_config (pool *p, char *dummy) { neg_dir_config *new = *************** *** 162,168 **** char *type_name; char *file_name; char *content_encoding; ! char *content_language; char *content_charset; char *description; --- 173,179 ---- char *type_name; char *file_name; char *content_encoding; ! array_header *content_languages; /* list of languages for this variant */ char *content_charset; char *description; *************** *** 230,236 **** mime_info->type_name = ""; mime_info->file_name = ""; mime_info->content_encoding = ""; ! mime_info->content_language = ""; mime_info->content_charset = ""; mime_info->description = ""; --- 241,247 ---- mime_info->type_name = ""; mime_info->file_name = ""; mime_info->content_encoding = ""; ! mime_info->content_languages = NULL; mime_info->content_charset = ""; mime_info->description = ""; *************** *** 392,397 **** --- 403,429 ---- return accept_recs; } + /* Given the text of the Content-Languages: line from the var map file, + * return an array containing the languages of this variant + */ + + array_header *do_languages_line (pool *p, char **lang_line) + { + array_header *lang_recs = make_array (p, 2, sizeof (char *)); + + if (!lang_line) return lang_recs; + + while (**lang_line) { + char **new = (char **)push_array (lang_recs); + *new = get_token (p, lang_line, 0); + str_tolower (*new); + if (**lang_line == ',') + ++(*lang_line); + } + + return lang_recs; + } + /***************************************************************** * * Handling header lines from clients... *************** *** 648,655 **** mime_info.bytes = atoi(body); } else if (!strncmp (buffer, "content-language:", 17)) { ! mime_info.content_language = get_token (neg->pool, &body, 0); ! str_tolower (mime_info.content_language); } else if (!strncmp (buffer, "content-encoding:", 17)) { mime_info.content_encoding = get_token (neg->pool, &body, 0); --- 680,687 ---- mime_info.bytes = atoi(body); } else if (!strncmp (buffer, "content-language:", 17)) { ! mime_info.content_languages = ! do_languages_line(neg->pool, &body); } else if (!strncmp (buffer, "content-encoding:", 17)) { mime_info.content_encoding = get_token (neg->pool, &body, 0); *************** *** 756,764 **** mime_info.content_encoding = sub_req->content_encoding; str_tolower(mime_info.content_encoding); } ! if (sub_req->content_language) { ! mime_info.content_language = sub_req->content_language; ! str_tolower(mime_info.content_language); } get_entry (neg->pool, &accept_info, sub_req->content_type); --- 788,800 ---- mime_info.content_encoding = sub_req->content_encoding; str_tolower(mime_info.content_encoding); } ! if (sub_req->content_languages) { ! int i; ! mime_info.content_languages = sub_req->content_languages; ! if (mime_info.content_languages) ! for (i = 0; i < mime_info.content_languages->nelts; ++i) ! str_tolower(((char**) ! (mime_info.content_languages->elts))[i]); } get_entry (neg->pool, &accept_info, sub_req->content_type); *************** *** 958,964 **** if (!neg->use_transparent_neg) for (j = 0; j < neg->avail_vars->nelts; ++j) { var_rec *variant = &avail_recs[j]; ! if (variant->content_language && *variant->content_language) { neg->default_lang_quality = 0.001; return; } --- 994,1001 ---- if (!neg->use_transparent_neg) for (j = 0; j < neg->avail_vars->nelts; ++j) { var_rec *variant = &avail_recs[j]; ! if (variant->content_languages && ! variant->content_languages->nelts) { neg->default_lang_quality = 0.001; return; } *************** *** 978,983 **** --- 1015,1026 ---- * match, use the longest string from the Accept-Language: header * (see HTTP/1.1 [14.4]) * + * When a variant has multiple languages, we find the 'best' + * match for each variant language tag as above, then select the + * one with the highest q value. Because both the accept-header + * and variant can have multiple languages, we now have a hairy + * loop-within-a-loop here. + * * If the variant has no language and we have no Accept-Language * items, leave the quality at 1.0 and return. * *************** *** 994,1089 **** void set_language_quality(negotiation_state *neg, var_rec *variant) { - accept_rec *accs, *best = NULL, *star = NULL; int i; - char *lang = variant->content_language; - int prefixlen = 0; - char *p; int naccept = neg->accept_langs->nelts; int index; neg_dir_config *conf = NULL; ! int longest_lang_range_len = 0; ! int len; if (naccept == 0) conf = (neg_dir_config *) get_module_config (neg->r->per_dir_config, &negotiation_module); ! if (naccept == 0 && (!lang || !*lang)) ! return; /* variant has no assigned language */ ! ! p = strchr(lang, '-'); /* find prefix part (if any) */ ! if (p) ! prefixlen = p - lang; ! if (!lang || !*lang) { /* This variant has no content-language, so use the default * quality factor for variants with no content-language * (previously set by set_default_lang_quality()). */ variant->lang_quality = neg->default_lang_quality; } else if (naccept) { float fiddle_q = 0.0; ! ! accs = (accept_rec *)neg->accept_langs->elts; ! ! for (i = 0; i < neg->accept_langs->nelts; ++i) { ! if (!strcmp(accs[i].type_name, "*")) { ! star = &accs[i]; ! continue; ! } ! ! /* Find language. We match if either the variant language ! * tag exactly matches, or the prefix of the tag up to the ! * '-' character matches the whole of the language in the ! * Accept-Language header */ ! if ((!strcmp (lang, accs[i].type_name) || ! (prefixlen && ! !strncmp(lang, accs[i].type_name, prefixlen) && ! (accs[i].type_name[prefixlen] == '\0'))) && ! ((len = strlen(accs[i].type_name)) > ! longest_lang_range_len)) { ! longest_lang_range_len = len; ! best = &accs[i]; ! } ! ! if (! best) { ! /* The next bit is a fiddle. Some browsers might be ! * configured to send more specific language ranges ! * than desirable. For example, an Accept-Language of ! * en-US should never match variants with languages en ! * or en-GB. But US English speakers might pick en-US ! * as their language choice. So this fiddle checks if ! * the language range has a prefix, and if so, it ! * matches variants which match that prefix with a ! * priority of 0.001. So a request for en-US would ! * match variants of types en and en-GB, but at much ! * lower priority than matches of en-US directly, or ! * of any other language listed on the Accept-Language ! * header ! */ ! if ((p = strchr(accs[i].type_name, '-'))) { ! int plen = p - accs[i].type_name; ! if (!strncmp(lang, accs[i].type_name, plen)) ! fiddle_q = 0.001; ! } ! } ! ! } ! variant->lang_quality = best ? best->quality : (star ? star->quality : fiddle_q); - variant->definite = variant->definite && best; } ! /* Now set the old lang_index field */ index = 0; if (naccept == 0) /* Client doesn't care */ ! index = find_default_index (conf, ! variant->content_language); else /* Client has Accept-Language */ ! index = find_lang_index (neg->accept_langs, ! variant->content_language); variant->lang_index = index; return; --- 1037,1172 ---- void set_language_quality(negotiation_state *neg, var_rec *variant) { int i; int naccept = neg->accept_langs->nelts; int index; neg_dir_config *conf = NULL; ! char *firstlang; if (naccept == 0) conf = (neg_dir_config *) get_module_config (neg->r->per_dir_config, &negotiation_module); ! if (naccept == 0 && (!variant->content_languages || ! !variant->content_languages->nelts)) ! return; /* no accept-language and no variant lang */ ! if (!variant->content_languages || !variant->content_languages->nelts) { /* This variant has no content-language, so use the default * quality factor for variants with no content-language * (previously set by set_default_lang_quality()). */ variant->lang_quality = neg->default_lang_quality; + + if (naccept == 0) + return; /* no accept-language items */ + } else if (naccept) { + /* Variant has one (or more) langauges, and we have one (or more) + * language ranges on the Accept-Language header. Look for + * the best match. We do this by going through each language + * on the variant description looking for a match on the + * Accept-Language header. The best match is the longest matching + * language on the header. The final result is the best q value + * from all the languages on the variant description. + */ + int j; float fiddle_q = 0.0; ! accept_rec *accs = (accept_rec *)neg->accept_langs->elts; ! accept_rec *best = NULL, *star = NULL; ! char *p; ! for (j = 0; j < variant->content_languages->nelts; ++j) { ! char *lang; /* language from variant description */ ! accept_rec *bestthistag = NULL; ! int prefixlen = 0; ! int longest_lang_range_len = 0; ! int len; ! /* lang is the variant's language-tag, which is the one ! * we are allowed to use the prefix of in HTTP/1.1 ! */ ! lang = ((char **)(variant->content_languages->elts))[j]; ! p = strchr(lang, '-'); /* find prefix part (if any) */ ! if (p) ! prefixlen = p - lang; ! ! /* now find the best (i.e. longest) matching Accept-Language ! * header language. We put the best match for this tag in ! * bestthistag. We cannot update the overall best (based on ! * q value) because the best match for this tag is the longest ! * language item on the accept header, not necessarily the ! * highest q. ! */ ! for (i = 0; i < neg->accept_langs->nelts; ++i) { ! if (!strcmp(accs[i].type_name, "*")) { ! if (!star) ! star = &accs[i]; ! continue; ! } ! ! /* Find language. We match if either the variant language ! * tag exactly matches, or the prefix of the tag up to the ! * '-' character matches the whole of the language in the ! * Accept-Language header. We only use this accept-language ! * item as the best match for the current tag if it ! * is longer than the previous best match */ ! if ((!strcmp (lang, accs[i].type_name) || ! (prefixlen && ! !strncmp(lang, accs[i].type_name, prefixlen) && ! (accs[i].type_name[prefixlen] == '\0'))) && ! ((len = strlen(accs[i].type_name)) > ! longest_lang_range_len)) { ! longest_lang_range_len = len; ! bestthistag = &accs[i]; ! } ! ! if (! bestthistag) { ! /* The next bit is a fiddle. Some browsers might be ! * configured to send more specific language ranges ! * than desirable. For example, an Accept-Language of ! * en-US should never match variants with languages en ! * or en-GB. But US English speakers might pick en-US ! * as their language choice. So this fiddle checks if ! * the language range has a prefix, and if so, it ! * matches variants which match that prefix with a ! * priority of 0.001. So a request for en-US would ! * match variants of types en and en-GB, but at much ! * lower priority than matches of en-US directly, or ! * of any other language listed on the Accept-Language ! * header ! */ ! if ((p = strchr(accs[i].type_name, '-'))) { ! int plen = p - accs[i].type_name; ! if (!strncmp(lang, accs[i].type_name, plen)) ! fiddle_q = 0.001; ! } ! } ! } ! /* Finished looking at Accept-Language headers, the best ! * (longest) match is in bestthistag, or NULL if no match ! */ ! if (!best || ! (bestthistag && bestthistag->quality > best->quality)) ! best = bestthistag; ! } ! ! variant->lang_quality = best ? best->quality : (star ? star->quality : fiddle_q); } ! /* Now set the old lang_index field. Since this is old ! * stuff anyway, don't both with handling multiple languages ! * per variant, just use the first one assigned to it ! */ index = 0; + if (variant->content_languages && variant->content_languages->nelts) + firstlang = ((char**)variant->content_languages->elts)[0]; + else + firstlang = ""; if (naccept == 0) /* Client doesn't care */ ! index = find_default_index (conf, firstlang); else /* Client has Accept-Language */ ! index = find_lang_index (neg->accept_langs, firstlang); variant->lang_index = index; return; *************** *** 1326,1332 **** fprintf(stderr, "Variant: file=%s type=%s lang=%s acceptq=%1.3f langq=%1.3f typeq=%1.3f q=%1.3f definite=%d\n", variant->file_name ? variant->file_name : "", variant->type_name ? variant->type_name : "", ! variant->content_language ? variant->content_language : "", variant->accept_type_quality, variant->lang_quality, variant->type_quality, --- 1409,1415 ---- fprintf(stderr, "Variant: file=%s type=%s lang=%s acceptq=%1.3f langq=%1.3f typeq=%1.3f q=%1.3f definite=%d\n", variant->file_name ? variant->file_name : "", variant->type_name ? variant->type_name : "", ! variant->content_languages ? merge_string_array(neg->pool, variant->content_languages, ",") : "", variant->accept_type_quality, variant->lang_quality, variant->type_quality, *************** *** 1594,1605 **** else if (strcmp(sample_type, variant->type_name)) vary_by_type = 1; } ! if (variant->content_language) { ! if (*variant->content_language) ! rec = pstrcat(r->pool, rec, " {language ", ! variant->content_language, "}", NULL); ! if (!sample_language) sample_language = variant->content_language; ! else if (strcmp(sample_language, variant->content_language)) vary_by_language = 1; } if (variant->content_encoding) { --- 1677,1688 ---- else if (strcmp(sample_type, variant->type_name)) vary_by_type = 1; } ! if (variant->content_languages && variant->content_languages->nelts) { ! char *langs = ! merge_string_array (r->pool, variant->content_languages, ","); ! rec = pstrcat(r->pool, rec, " {language ", langs, "}", NULL); ! if (!sample_language) sample_language = langs; ! else if (strcmp(sample_language, langs)) vary_by_language = 1; } if (variant->content_encoding) { *************** *** 1654,1672 **** var_rec *variant = &((var_rec *)neg->avail_vars->elts)[i]; char *filename = variant->file_name ? variant->file_name : ""; char *content_type = variant->type_name ? variant->type_name : ""; ! char *content_language = ! variant->content_language ? variant->content_language : ""; char *description = variant->description ? variant->description : ""; /* The format isn't very neat, and it would be nice to make * the tags human readable (eg replace 'language en' with * 'English'). */ t = pstrcat(r->pool, t, "<li><a href=\"", filename, "\">", ! filename, "</a> ", description, ! " type ", content_type, ! *content_language ? " language " : "", content_language, ! "\n", ! NULL); } t = pstrcat(r->pool, t, "</ul>\n", NULL); --- 1737,1757 ---- var_rec *variant = &((var_rec *)neg->avail_vars->elts)[i]; char *filename = variant->file_name ? variant->file_name : ""; char *content_type = variant->type_name ? variant->type_name : ""; ! array_header *languages = variant->content_languages; char *description = variant->description ? variant->description : ""; /* The format isn't very neat, and it would be nice to make * the tags human readable (eg replace 'language en' with * 'English'). */ t = pstrcat(r->pool, t, "<li><a href=\"", filename, "\">", ! filename, "</a> ", description, NULL); ! if (content_type) ! t = pstrcat(r->pool, t, " type ", content_type, NULL); ! if (languages && languages->nelts) ! t = pstrcat(r->pool, t, " language ", ! merge_string_array(r->pool, languages, ", "), ! NULL); ! t = pstrcat(r->pool, t, "\n", NULL); } t = pstrcat(r->pool, t, "</ul>\n", NULL); *************** *** 1853,1859 **** r->handler = sub_req->handler; r->content_type = sub_req->content_type; r->content_encoding = sub_req->content_encoding; ! r->content_language = sub_req->content_language; r->finfo = sub_req->finfo; return OK; --- 1938,1944 ---- r->handler = sub_req->handler; r->content_type = sub_req->content_type; r->content_encoding = sub_req->content_encoding; ! r->content_languages = sub_req->content_languages; r->finfo = sub_req->finfo; return OK;