Re: [PATCH] http: Add Accept-Language header if possible
I agree that a list of char* is enough for language_tags. Thanks for your review and patch. I'll apply your patch and send v9. On Wed, Jan 28, 2015 at 3:15 PM, Junio C Hamano wrote: > On Tue, Jan 27, 2015 at 3:34 PM, Junio C Hamano wrote: >> Yi EungJun writes: >> >>> + >>> + sprintf(q_format, ";q=0.%%0%dd", decimal_places); >>> + >>> + strbuf_addstr(buf, "Accept-Language: "); >>> + >>> + for(i = 0; i < num_langs; i++) { >>> + if (i > 0) >>> + strbuf_addstr(buf, ", "); >>> + >>> + strbuf_addstr(buf, strbuf_detach(&language_tags[i], >>> NULL)); >> >> This is not wrong per-se, but it looks somewhat convoluted to me. >> ... > > Actually, this is wrong, isn't it? > > strbuf_detach() removes the language_tags[i].buf from the strbuf, > and the caller now owns that piece of memory. Then strbuf_addstr() > appends a copy of that string to buf, and the piece of memory > that was originally held by language_tags[i].buf is now lost forever. > > This is leaking. > >>> + /* free language tags */ >>> + for(i = 0; i < num_langs; i++) { >>> + strbuf_release(&language_tags[i]); >>> + } > > ... because this loop does not free memory for earlier parts of > language_tags[]. > >> I am wondering if using strbuf for each of the language_tags[] is >> even necessary. How about doing it this way instead? > > And I think my counter-proposal does not leak (as it does not us strbuf for > language_tags[] anymore). > >> >> http.c | 22 +- >> 1 file changed, 9 insertions(+), 13 deletions(-) >> >> diff --git a/http.c b/http.c >> index 6111c6a..db591b3 100644 >> --- a/http.c >> +++ b/http.c >> @@ -1027,7 +1027,7 @@ static void write_accept_language(struct strbuf *buf) >> const int MAX_DECIMAL_PLACES = 3; >> const int MAX_LANGUAGE_TAGS = 1000; >> const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000; >> - struct strbuf *language_tags = NULL; >> + char **language_tags = NULL; >> int num_langs = 0; >> const char *s = get_preferred_languages(); >> int i; >> @@ -1053,9 +1053,7 @@ static void write_accept_language(struct strbuf *buf) >> if (tag.len) { >> num_langs++; >> REALLOC_ARRAY(language_tags, num_langs); >> - strbuf_init(&language_tags[num_langs - 1], 0); >> - strbuf_swap(&tag, &language_tags[num_langs - 1]); >> - >> + language_tags[num_langs - 1] = strbuf_detach(&tag, >> NULL); >> if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for >> '*' */ >> break; >> } >> @@ -1070,13 +1068,12 @@ static void write_accept_language(struct strbuf *buf) >> >> /* add '*' */ >> REALLOC_ARRAY(language_tags, num_langs + 1); >> - strbuf_init(&language_tags[num_langs], 0); >> - strbuf_addstr(&language_tags[num_langs++], "*"); >> + language_tags[num_langs++] = "*"; /* it's OK; this won't be >> freed */ >> >> /* compute decimal_places */ >> for (max_q = 1, decimal_places = 0; >> - max_q < num_langs && decimal_places <= >> MAX_DECIMAL_PLACES; >> - decimal_places++, max_q *= 10) >> +max_q < num_langs && decimal_places <= >> MAX_DECIMAL_PLACES; >> +decimal_places++, max_q *= 10) >> ; >> >> sprintf(q_format, ";q=0.%%0%dd", decimal_places); >> @@ -1087,7 +1084,7 @@ static void write_accept_language(struct strbuf *buf) >> if (i > 0) >> strbuf_addstr(buf, ", "); >> >> - strbuf_addstr(buf, strbuf_detach(&language_tags[i], >> NULL)); >> + strbuf_addstr(buf, language_tags[i]); >> >> if (i > 0) >> strbuf_addf(buf, q_format, max_q - i); >> @@ -1101,10 +1098,9 @@ static void write_accept_language(struct strbuf *buf) >> } >> } >> >> - /* free language tags */ >> - for(i = 0; i < num_langs; i++) { >> - strbuf_release(&language_tags[i]); >> - } >> + /* free language tags -- last one is a static '*' */ >> + for(i = 0; i < num_langs - 1; i++) >> + free(language_tags[i]); >> free(language_tags); >> } >> -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
On Tue, Jan 27, 2015 at 3:34 PM, Junio C Hamano wrote: > Yi EungJun writes: > >> + >> + sprintf(q_format, ";q=0.%%0%dd", decimal_places); >> + >> + strbuf_addstr(buf, "Accept-Language: "); >> + >> + for(i = 0; i < num_langs; i++) { >> + if (i > 0) >> + strbuf_addstr(buf, ", "); >> + >> + strbuf_addstr(buf, strbuf_detach(&language_tags[i], >> NULL)); > > This is not wrong per-se, but it looks somewhat convoluted to me. > ... Actually, this is wrong, isn't it? strbuf_detach() removes the language_tags[i].buf from the strbuf, and the caller now owns that piece of memory. Then strbuf_addstr() appends a copy of that string to buf, and the piece of memory that was originally held by language_tags[i].buf is now lost forever. This is leaking. >> + /* free language tags */ >> + for(i = 0; i < num_langs; i++) { >> + strbuf_release(&language_tags[i]); >> + } ... because this loop does not free memory for earlier parts of language_tags[]. > I am wondering if using strbuf for each of the language_tags[] is > even necessary. How about doing it this way instead? And I think my counter-proposal does not leak (as it does not us strbuf for language_tags[] anymore). > > http.c | 22 +- > 1 file changed, 9 insertions(+), 13 deletions(-) > > diff --git a/http.c b/http.c > index 6111c6a..db591b3 100644 > --- a/http.c > +++ b/http.c > @@ -1027,7 +1027,7 @@ static void write_accept_language(struct strbuf *buf) > const int MAX_DECIMAL_PLACES = 3; > const int MAX_LANGUAGE_TAGS = 1000; > const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000; > - struct strbuf *language_tags = NULL; > + char **language_tags = NULL; > int num_langs = 0; > const char *s = get_preferred_languages(); > int i; > @@ -1053,9 +1053,7 @@ static void write_accept_language(struct strbuf *buf) > if (tag.len) { > num_langs++; > REALLOC_ARRAY(language_tags, num_langs); > - strbuf_init(&language_tags[num_langs - 1], 0); > - strbuf_swap(&tag, &language_tags[num_langs - 1]); > - > + language_tags[num_langs - 1] = strbuf_detach(&tag, > NULL); > if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' > */ > break; > } > @@ -1070,13 +1068,12 @@ static void write_accept_language(struct strbuf *buf) > > /* add '*' */ > REALLOC_ARRAY(language_tags, num_langs + 1); > - strbuf_init(&language_tags[num_langs], 0); > - strbuf_addstr(&language_tags[num_langs++], "*"); > + language_tags[num_langs++] = "*"; /* it's OK; this won't be > freed */ > > /* compute decimal_places */ > for (max_q = 1, decimal_places = 0; > - max_q < num_langs && decimal_places <= > MAX_DECIMAL_PLACES; > - decimal_places++, max_q *= 10) > +max_q < num_langs && decimal_places <= > MAX_DECIMAL_PLACES; > +decimal_places++, max_q *= 10) > ; > > sprintf(q_format, ";q=0.%%0%dd", decimal_places); > @@ -1087,7 +1084,7 @@ static void write_accept_language(struct strbuf *buf) > if (i > 0) > strbuf_addstr(buf, ", "); > > - strbuf_addstr(buf, strbuf_detach(&language_tags[i], > NULL)); > + strbuf_addstr(buf, language_tags[i]); > > if (i > 0) > strbuf_addf(buf, q_format, max_q - i); > @@ -1101,10 +1098,9 @@ static void write_accept_language(struct strbuf *buf) > } > } > > - /* free language tags */ > - for(i = 0; i < num_langs; i++) { > - strbuf_release(&language_tags[i]); > - } > + /* free language tags -- last one is a static '*' */ > + for(i = 0; i < num_langs - 1; i++) > + free(language_tags[i]); > free(language_tags); > } > -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
Yi EungJun writes: > +static void write_accept_language(struct strbuf *buf) > +{ > + /* > + * MAX_DECIMAL_PLACES must not be larger than 3. If it is larger than > + * that, q-value will be smaller than 0.001, the minimum q-value the > + * HTTP specification allows. See > + * http://tools.ietf.org/html/rfc7231#section-5.3.1 for q-value. > + */ > + const int MAX_DECIMAL_PLACES = 3; > + const int MAX_LANGUAGE_TAGS = 1000; > + const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000; > + struct strbuf *language_tags = NULL; > + int num_langs = 0; > + const char *s = get_preferred_languages(); > + int i; > + struct strbuf tag = STRBUF_INIT; > + > + /* Don't add Accept-Language header if no language is preferred. */ > + if (!s) > + return; > + > + /* > + * Split the colon-separated string of preferred languages into > + * language_tags array. > + */ > + do { > + /* collect language tag */ > + for (; *s && (isalnum(*s) || *s == '_'); s++) > + strbuf_addch(&tag, *s == '_' ? '-' : *s); > + > + /* skip .codeset, @modifier and any other unnecessary parts */ > + while (*s && *s != ':') > + s++; > + > + if (tag.len) { > + num_langs++; > + REALLOC_ARRAY(language_tags, num_langs); > + strbuf_init(&language_tags[num_langs - 1], 0); > + strbuf_swap(&tag, &language_tags[num_langs - 1]); > + > + if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */ > + break; > + } > + } while (*s++); The structure of this function is much easier to understand than any of the previous rounds. You collect up to the max you are going to support, and then you format up to the max you are going to send. Very straight-forward and simple. > + /* write Accept-Language header into buf */ > + if (num_langs >= 1) { micronit: should be OK to just say "if (num_langs)". > + int last_buf_len = 0; > + int max_q; > + int decimal_places; > + char q_format[32]; > + > + /* add '*' */ > + REALLOC_ARRAY(language_tags, num_langs + 1); > + strbuf_init(&language_tags[num_langs], 0); > + strbuf_addstr(&language_tags[num_langs++], "*"); > + > + /* compute decimal_places */ > + for (max_q = 1, decimal_places = 0; > + max_q < num_langs && decimal_places <= > MAX_DECIMAL_PLACES; > + decimal_places++, max_q *= 10) > + ; micronit: the second and the third line are indented too deeply and made me wonder if this has an overlong first line (i.e. the set-up part to enter the for-loop) split into multiple lines. > + > + sprintf(q_format, ";q=0.%%0%dd", decimal_places); > + > + strbuf_addstr(buf, "Accept-Language: "); > + > + for(i = 0; i < num_langs; i++) { > + if (i > 0) > + strbuf_addstr(buf, ", "); > + > + strbuf_addstr(buf, strbuf_detach(&language_tags[i], > NULL)); This is not wrong per-se, but it looks somewhat convoluted to me. You can just peek language_tags[i].buf here without detaching, as you will free the strbufs after you exit the loop anyway. Your version is not wrong and it does not result in double-freeing, because detach clears the strbuf, but at the same time, makes the responsibility to free language_tags[] strbuf split into here (for elements up to the ones that are used to fill buf) and the cleanup loop (for elements that are not used in this loop). > + if (i > 0) > + strbuf_addf(buf, q_format, max_q - i); > + > + if (buf->len > MAX_ACCEPT_LANGUAGE_HEADER_SIZE) { > + strbuf_remove(buf, last_buf_len, buf->len - > last_buf_len); > + break; > + } > + > + last_buf_len = buf->len; > + } > + } > + > + /* free language tags */ > + for(i = 0; i < num_langs; i++) { > + strbuf_release(&language_tags[i]); > + } > + free(language_tags); > +} I am wondering if using strbuf for each of the language_tags[] is even necessary. How about doing it this way instead? http.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/http.c b/http.c index 6111c6a..db591b3 100644 --- a/http.c +++ b/http.c @@ -1027,7 +1027,7 @@ static void write_accept_language(struct strbuf *buf) const int MAX_DECIMAL_PLACES = 3; const int MAX_LANGUAGE_TAGS = 1000; const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000; - struct strbuf *language_tags = NULL; + char **lang
[PATCH] http: Add Accept-Language header if possible
From: Yi EungJun Add an Accept-Language header which indicates the user's preferred languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG. Examples: LANGUAGE= -> "" LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1" LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1" LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1" This gives git servers a chance to display remote error messages in the user's preferred language. Limit the number of languages to 1,000 because q-value must not be smaller than 0.001, and limit the length of Accept-Language header to 4,000 bytes for some HTTP servers which cannot accept such long header. Signed-off-by: Yi EungJun --- http.c | 151 + remote-curl.c | 2 + t/t5550-http-fetch-dumb.sh | 42 + 3 files changed, 195 insertions(+) diff --git a/http.c b/http.c index 040f362..6111c6a 100644 --- a/http.c +++ b/http.c @@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header; static struct active_request_slot *active_queue_head; +static char *cached_accept_language; + size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_) { size_t size = eltsize * nmemb; @@ -515,6 +517,9 @@ void http_cleanup(void) cert_auth.password = NULL; } ssl_cert_password_required = 0; + + free(cached_accept_language); + cached_accept_language = NULL; } struct active_request_slot *get_active_slot(void) @@ -986,6 +991,146 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type, strbuf_addstr(charset, "ISO-8859-1"); } +/* + * Guess the user's preferred languages from the value in LANGUAGE environment + * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. + * + * The result can be a colon-separated list like "ko:ja:en". + */ +static const char *get_preferred_languages(void) +{ + const char *retval; + + retval = getenv("LANGUAGE"); + if (retval && *retval) + return retval; + +#ifndef NO_GETTEXT + retval = setlocale(LC_MESSAGES, NULL); + if (retval && *retval && + strcmp(retval, "C") && + strcmp(retval, "POSIX")) + return retval; +#endif + + return NULL; +} + +static void write_accept_language(struct strbuf *buf) +{ + /* +* MAX_DECIMAL_PLACES must not be larger than 3. If it is larger than +* that, q-value will be smaller than 0.001, the minimum q-value the +* HTTP specification allows. See +* http://tools.ietf.org/html/rfc7231#section-5.3.1 for q-value. +*/ + const int MAX_DECIMAL_PLACES = 3; + const int MAX_LANGUAGE_TAGS = 1000; + const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000; + struct strbuf *language_tags = NULL; + int num_langs = 0; + const char *s = get_preferred_languages(); + int i; + struct strbuf tag = STRBUF_INIT; + + /* Don't add Accept-Language header if no language is preferred. */ + if (!s) + return; + + /* +* Split the colon-separated string of preferred languages into +* language_tags array. +*/ + do { + /* collect language tag */ + for (; *s && (isalnum(*s) || *s == '_'); s++) + strbuf_addch(&tag, *s == '_' ? '-' : *s); + + /* skip .codeset, @modifier and any other unnecessary parts */ + while (*s && *s != ':') + s++; + + if (tag.len) { + num_langs++; + REALLOC_ARRAY(language_tags, num_langs); + strbuf_init(&language_tags[num_langs - 1], 0); + strbuf_swap(&tag, &language_tags[num_langs - 1]); + + if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */ + break; + } + } while (*s++); + + /* write Accept-Language header into buf */ + if (num_langs >= 1) { + int last_buf_len = 0; + int max_q; + int decimal_places; + char q_format[32]; + + /* add '*' */ + REALLOC_ARRAY(language_tags, num_langs + 1); + strbuf_init(&language_tags[num_langs], 0); + strbuf_addstr(&language_tags[num_langs++], "*"); + + /* compute decimal_places */ + for (max_q = 1, decimal_places = 0; + max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES; + decimal_places++, max_q *= 10) + ; + + sprintf(q_format, ";q=0.%%0%dd", decimal_places); + + strbuf_addstr(buf, "Accept-Language: "); + + for(i = 0; i < num_langs; i++) { + if (i > 0) +
Re: [PATCH] http: Add Accept-Language header if possible
Jeff King: If that is the case, though, I wonder if we should actually be adding it as a git-protocol header so that all transports can benefit (i.e., we could be localizing human-readable error messages in upload-pack, receive-pack, etc). That would be very nice, thre is a lot of language mixing going on at the moment. -- \\// Peter - http://www.softwolves.pp.se/ -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
2014-07-12 1:24 GMT+09:00 Eric Sunshine : > On Fri, Jul 11, 2014 at 5:22 AM, Yi, EungJun wrote: >> 2014-07-09 6:52 GMT+09:00 Eric Sunshine : + grep "^Accept-Language: ko; q=1.000, en; q=0.999, \*; q=0.001" actual >>> >>> Do you want to \-escape the periods? (Or maybe use 'grep -F'?) >> >> I just want to match '*' character. I tried 'grep -F' but it does not help. > > I meant that the periods in your grep pattern are matching any > character. If you want to be very strict, so that they match only > period, then you should \-escape them. Oops, I misunderstood you. You are right. I'll \- escape them. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
2014-07-11 5:10 GMT+09:00 Jeff King : > On Wed, Jul 09, 2014 at 11:46:14AM +0100, Peter Krefting wrote: > >> Jeff King: >> >> >I did some digging, and I think the public API is setlocale with a NULL >> >parameter, like: >> > >> > printf("%s\n", setlocale(LC_MESSAGES, NULL)); >> > >> >That still will end up like "en_US.UTF-8", though; >> >> And it only yields the highest-priority language, I think. > > I wasn't clear on whether POSIX locale variables actually supported > multiple languages with priorities. I have never seen that, though the > original commit message indicated that LANGUAGE=x:y was a thing (I > wasn't sure if that was a made-up thing, or something that libc actually > supported). > >> Debian's website has a nice writeup on the subject: >> http://www.debian.org/intro/cn#howtoset > > That seems to be about language settings in browsers, which are a much > richer set of preferences than POSIX locales (I think). > > It would not be wrong to have that level of configuration for git's http > requests, but I do not know if it is worth the effort. Mapping the > user's gettext locale into an accept-language header seems like a > straightforward way to communicate to the other side what the client is > using to show errors (so that errors coming from the server can match). Thanks for you advice. I'll write a path to use both of setlocale(LC_MESSAGES, NULL) and getenv("LANGUAGE") to get the user's preferred language. setlocale(LC_MESSAGES, NULL) is quite nice way because it takes LC_ALL, LC_MESSAGES and LANG into account, but not LANGUAGE. I think we should take also LANGUAGE into account as gettext does. [1] [1]: http://www.gnu.org/software/gettext/manual/gettext.html#Locale-Environment-Variables -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
2014-07-09 19:40 GMT+09:00 Peter Krefting : > Yi EungJun: > > >> Example: >> LANGUAGE= -> "" >> LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" >> LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" > > > Avoid adding "q=1.000". It is redundant (the default for any unqualified > language names is 1.0, and additionally there has historically been some > buggy servers that failed if it was included. Ok, I'll fix it. > > >> + p1 = getenv("LANGUAGE"); > > > You need a fallback mechanism here to parse all the possible language > variables. I would use the first one I find of these: > > 1. LANGUAGE > 2. LC_ALL > 3. LC_MESSAGES > 4. LANG > > Only "LANGUAGE" holds a colon-separated list, but the same code can parse > all of them, just yielding a single entry for the others. I'll use setlocale(LC_MESSAGES, NULL) as well as getenv("LANGUAGE"). > > >> + strbuf_add(buf, p1, p2 - p1); > > > The tokens are on the form language_COUNTRY.encoding@identifier, whereas > Accept-Language wants language-COUNTRY, so you need to a) replace "_" with > "-", and b) chop off anything following a "." or "@". > > >> + strbuf_addf(buf, "; q=%.3f", q); >> + q -= 0.001; > > > Three decimals seems a bit overkill, but some experimentation might be > necessary. I'll use three decimals only if there are 100 or more preferred languages. > > >> + strbuf_addstr(buf, "*; q=0.001\r\n"); > > > You should probably also add an explicit "en" here, if none was already > included. I've seen some servers break horribly if "en" isn't included. I'll send Accept-Language only if there is at least one preferred language. Is it enough? Thanks for your review. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
On Wed, Jul 09, 2014 at 11:46:14AM +0100, Peter Krefting wrote: > Jeff King: > > >I did some digging, and I think the public API is setlocale with a NULL > >parameter, like: > > > > printf("%s\n", setlocale(LC_MESSAGES, NULL)); > > > >That still will end up like "en_US.UTF-8", though; > > And it only yields the highest-priority language, I think. I wasn't clear on whether POSIX locale variables actually supported multiple languages with priorities. I have never seen that, though the original commit message indicated that LANGUAGE=x:y was a thing (I wasn't sure if that was a made-up thing, or something that libc actually supported). > Debian's website has a nice writeup on the subject: > http://www.debian.org/intro/cn#howtoset That seems to be about language settings in browsers, which are a much richer set of preferences than POSIX locales (I think). It would not be wrong to have that level of configuration for git's http requests, but I do not know if it is worth the effort. Mapping the user's gettext locale into an accept-language header seems like a straightforward way to communicate to the other side what the client is using to show errors (so that errors coming from the server can match). If that is the case, though, I wonder if we should actually be adding it as a git-protocol header so that all transports can benefit (i.e., we could be localizing human-readable error messages in upload-pack, receive-pack, etc). -Peff -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
Jeff King: I did some digging, and I think the public API is setlocale with a NULL parameter, like: printf("%s\n", setlocale(LC_MESSAGES, NULL)); That still will end up like "en_US.UTF-8", though; And it only yields the highest-priority language, I think. I couldn't find any standard functions for parsing that. It seems like it would be pretty straightforward to do so, though. RFC 5646 is the current specification on language tags, btw. From my brief reading of rfc2616, that should probably become "en-us", and any time we add "x-y", we may want to add "x" as a fallback (that is certainly true for English; I don't know about other languages with dialects). Yes, adding the generic fallback is necessary, as "en-US" on the server matches a client's "en", but not vice versa. So if you request "en-US" and "de" and the server only has "en-GB" and "de", you'd get the "de" version. Debian's website has a nice writeup on the subject: http://www.debian.org/intro/cn#howtoset -- \\// Peter - http://www.softwolves.pp.se/ -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
Yi EungJun: Example: LANGUAGE= -> "" LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" Avoid adding "q=1.000". It is redundant (the default for any unqualified language names is 1.0, and additionally there has historically been some buggy servers that failed if it was included. + p1 = getenv("LANGUAGE"); You need a fallback mechanism here to parse all the possible language variables. I would use the first one I find of these: 1. LANGUAGE 2. LC_ALL 3. LC_MESSAGES 4. LANG Only "LANGUAGE" holds a colon-separated list, but the same code can parse all of them, just yielding a single entry for the others. + strbuf_add(buf, p1, p2 - p1); The tokens are on the form language_COUNTRY.encoding@identifier, whereas Accept-Language wants language-COUNTRY, so you need to a) replace "_" with "-", and b) chop off anything following a "." or "@". + strbuf_addf(buf, "; q=%.3f", q); + q -= 0.001; Three decimals seems a bit overkill, but some experimentation might be necessary. + strbuf_addstr(buf, "*; q=0.001\r\n"); You should probably also add an explicit "en" here, if none was already included. I've seen some servers break horribly if "en" isn't included. For reference, I have my LANGUAGE variable set to "sv_SE.utf8:sv:nb_NO.utf8:nb:da_DK.utf8:da:nn_NO.utf8:nn:en_GB.utf8:en_US.utf8:en" -- \\// Peter - http://www.softwolves.pp.se/ -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
On Wed, Jul 09, 2014 at 02:46:35PM +0900, Yi, EungJun wrote: > I agree with you. In fact, I tried to get user's preferred language in > the same way as gettext. It has guess_category_value() to do that and > the function is good enough because it considers $LANGUAGE, $LC_ALL, > $LANG, and also system-dependent preferences. But the function does > not seem a public API and I don't know how can I use the function in > Git. So I chose to use $LANGUAGE only. I did some digging, and I think the public API is setlocale with a NULL parameter, like: printf("%s\n", setlocale(LC_MESSAGES, NULL)); That still will end up like "en_US.UTF-8", though; I couldn't find any standard functions for parsing that. It seems like it would be pretty straightforward to do so, though. >From my brief reading of rfc2616, that should probably become "en-us", and any time we add "x-y", we may want to add "x" as a fallback (that is certainly true for English; I don't know about other languages with dialects). -Peff -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
2014-07-09 14:10 GMT+09:00 Jeff King : > On Wed, Jul 09, 2014 at 12:54:06AM +0900, Yi EungJun wrote: > >> From: Yi EungJun >> >> Add an Accept-Language header which indicates the user's preferred >> languages defined by 'LANGUAGE' environment variable if the variable is >> not empty. >> >> Example: >> LANGUAGE= -> "" >> LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" >> LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" >> >> This gives git servers a chance to display remote error messages in >> the user's preferred language. > > Should this also take into account other language-related variables? I'd > think $LC_ALL, $LC_MESSAGES, and $LANG would affect it, too. Are > colon-separated values a standard in $LANGUAGE? I have never seen them, > but I admit I am not very knowledgeable about localization issues. > > Also, we do we need to do more parsing? My $LANG is set to en_US.UTF-8. > The encoding part is presumably uninteresting to the remote server. I > also wonder if there are support functions in libc or as part of gettext > that can help us get these values. > > -Peff I agree with you. In fact, I tried to get user's preferred language in the same way as gettext. It has guess_category_value() to do that and the function is good enough because it considers $LANGUAGE, $LC_ALL, $LANG, and also system-dependent preferences. But the function does not seem a public API and I don't know how can I use the function in Git. So I chose to use $LANGUAGE only. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
On Wed, Jul 09, 2014 at 12:54:06AM +0900, Yi EungJun wrote: > From: Yi EungJun > > Add an Accept-Language header which indicates the user's preferred > languages defined by 'LANGUAGE' environment variable if the variable is > not empty. > > Example: > LANGUAGE= -> "" > LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" > LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" > > This gives git servers a chance to display remote error messages in > the user's preferred language. Should this also take into account other language-related variables? I'd think $LC_ALL, $LC_MESSAGES, and $LANG would affect it, too. Are colon-separated values a standard in $LANGUAGE? I have never seen them, but I admit I am not very knowledgeable about localization issues. Also, we do we need to do more parsing? My $LANG is set to en_US.UTF-8. The encoding part is presumably uninteresting to the remote server. I also wonder if there are support functions in libc or as part of gettext that can help us get these values. -Peff -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] http: Add Accept-Language header if possible
On Tue, Jul 8, 2014 at 11:54 AM, Yi EungJun wrote: > From: Yi EungJun > > Add an Accept-Language header which indicates the user's preferred > languages defined by 'LANGUAGE' environment variable if the variable is > not empty. > > Example: > LANGUAGE= -> "" > LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" > LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" > > This gives git servers a chance to display remote error messages in > the user's preferred language. > --- > http.c | 43 +++ > t/t5550-http-fetch-dumb.sh | 10 ++ > 2 files changed, 53 insertions(+) > > diff --git a/http.c b/http.c > index 3a28b21..c345616 100644 > --- a/http.c > +++ b/http.c > @@ -983,6 +983,47 @@ static void extract_content_type(struct strbuf *raw, > struct strbuf *type, > strbuf_addstr(charset, "ISO-8859-1"); > } > > +/* > + * Add an Accept-Language header which indicates user's preferred languages > + * defined by 'LANGUAGE' environment variable if the variable is not empty. > + * > + * Example: > + * LANGUAGE= -> "" > + * LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" > + * LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; > q=0.001" > + */ > +static void add_accept_language(struct strbuf *buf) > +{ > + const char *p1, *p2; > + float q = 1.000; > + > + p1 = getenv("LANGUAGE"); > + > + if (p1 != NULL && p1[0] != '\0') { > + strbuf_reset(buf); It seems wrong to clear 'buf' in a function named add_accept_language(). > + strbuf_addstr(buf, "Accept-Language: "); > + for (p2 = p1; q > 0.001; p2++) { > + if ((*p2 == ':' || *p2 == '\0') && p1 != p2) { > + if (q < 1.0) { > + strbuf_addstr(buf, ", "); > + } > + strbuf_add(buf, p1, p2 - p1); > + strbuf_addf(buf, "; q=%.3f", q); > + q -= 0.001; > + p1 = p2 + 1; > + > + if (*p2 == '\0') { > + break; > + } > + } > + } > + if (q < 1.0) { > + strbuf_addstr(buf, ", "); > + } > + strbuf_addstr(buf, "*; q=0.001\r\n"); Manually adding "\r\n" is contraindicated. Headers passed to curl_easy_setopt(c, CURLOPT_HTTPHEADER, headers) must not have "\r\n", since curl will add terminators itself [1]. [1]: http://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html > + } > +} > + > /* http_request() targets */ > #define HTTP_REQUEST_STRBUF0 > #define HTTP_REQUEST_FILE 1 > @@ -1020,6 +1061,8 @@ static int http_request(const char *url, > fwrite_buffer); > } > > + add_accept_language(&buf); This is inconsistent with how other headers are handled by this function. The existing idiom is: strbuf_add(&buf, ...); /* construct header */ headers = curl_slist_apend(headers, buf.buf); strbuf_reset(&buf); > + > strbuf_addstr(&buf, "Pragma:"); > if (options && options->no_cache) > strbuf_addstr(&buf, " no-cache"); > diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh > index ac71418..ea15158 100755 > --- a/t/t5550-http-fetch-dumb.sh > +++ b/t/t5550-http-fetch-dumb.sh > @@ -196,5 +196,15 @@ test_expect_success 'reencoding is robust to whitespace > oddities' ' > grep "this is the error message" stderr > ' > > +test_expect_success 'git client sends Accept-Language' ' > + GIT_CURL_VERBOSE=1 LANGUAGE=ko:en git clone > "$HTTPD_URL/accept/language" 2>actual Broken &&-chain. > + grep "^Accept-Language: ko; q=1.000, en; q=0.999, \*; q=0.001" actual Do you want to \-escape the periods? (Or maybe use 'grep -F'?) > +' > + > +test_expect_success 'git client does not send Accept-Language' ' > + GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" > 2>actual Broken &&-chain. > + test_must_fail grep "^Accept-Language:" actual > +' > + > stop_httpd > test_done > -- > 2.0.1.473.gafdefd9.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] http: Add Accept-Language header if possible
From: Yi EungJun Add an Accept-Language header which indicates the user's preferred languages defined by 'LANGUAGE' environment variable if the variable is not empty. Example: LANGUAGE= -> "" LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" This gives git servers a chance to display remote error messages in the user's preferred language. --- http.c | 43 +++ t/t5550-http-fetch-dumb.sh | 10 ++ 2 files changed, 53 insertions(+) diff --git a/http.c b/http.c index 3a28b21..c345616 100644 --- a/http.c +++ b/http.c @@ -983,6 +983,47 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type, strbuf_addstr(charset, "ISO-8859-1"); } +/* + * Add an Accept-Language header which indicates user's preferred languages + * defined by 'LANGUAGE' environment variable if the variable is not empty. + * + * Example: + * LANGUAGE= -> "" + * LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001" + * LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001" + */ +static void add_accept_language(struct strbuf *buf) +{ + const char *p1, *p2; + float q = 1.000; + + p1 = getenv("LANGUAGE"); + + if (p1 != NULL && p1[0] != '\0') { + strbuf_reset(buf); + strbuf_addstr(buf, "Accept-Language: "); + for (p2 = p1; q > 0.001; p2++) { + if ((*p2 == ':' || *p2 == '\0') && p1 != p2) { + if (q < 1.0) { + strbuf_addstr(buf, ", "); + } + strbuf_add(buf, p1, p2 - p1); + strbuf_addf(buf, "; q=%.3f", q); + q -= 0.001; + p1 = p2 + 1; + + if (*p2 == '\0') { + break; + } + } + } + if (q < 1.0) { + strbuf_addstr(buf, ", "); + } + strbuf_addstr(buf, "*; q=0.001\r\n"); + } +} + /* http_request() targets */ #define HTTP_REQUEST_STRBUF0 #define HTTP_REQUEST_FILE 1 @@ -1020,6 +1061,8 @@ static int http_request(const char *url, fwrite_buffer); } + add_accept_language(&buf); + strbuf_addstr(&buf, "Pragma:"); if (options && options->no_cache) strbuf_addstr(&buf, " no-cache"); diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index ac71418..ea15158 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -196,5 +196,15 @@ test_expect_success 'reencoding is robust to whitespace oddities' ' grep "this is the error message" stderr ' +test_expect_success 'git client sends Accept-Language' ' + GIT_CURL_VERBOSE=1 LANGUAGE=ko:en git clone "$HTTPD_URL/accept/language" 2>actual + grep "^Accept-Language: ko; q=1.000, en; q=0.999, \*; q=0.001" actual +' + +test_expect_success 'git client does not send Accept-Language' ' + GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>actual + test_must_fail grep "^Accept-Language:" actual +' + stop_httpd test_done -- 2.0.1.473.gafdefd9.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html