Does this patch make sense? Seems like a rather old bug (and I've been bitten by it a lot for a long time; always looked elsewhere for fixes).
commit 4a46e5f2cd6b8d3922d085c9e9e0a67918cde6ff Author: Stephen R. van den Berg <[email protected]> Date: Sat Feb 28 20:57:00 2009 +0100 Perform proper characterset decoding for multipart/form-data. diff --git a/server/protocols/http.pike b/server/protocols/http.pike index 7f99086..afb5254 100644 --- a/server/protocols/http.pike +++ b/server/protocols/http.pike @@ -999,21 +999,58 @@ private int parse_got( string new_data ) /* FIXME: Should this be reported to the client? */ } else { mapping(string:array) post_vars = misc->post_variables = ([]); + array(string) todecode = ({}); foreach (messg->body_parts, object part) { string n = part->disp_params->name; string d = part->getdata(); post_vars[n] += ({d}); - real_variables[n] += ({d}); if (string fn = part->disp_params->filename) { post_vars[n + ".filename"] += ({fn}); - real_variables[n + ".filename"] += ({fn}); misc->files += ({n}); } if (string ct = part->headers["content-type"]) { post_vars[n + ".mimetype"] += ({ct}); - real_variables[n + ".mimetype"] += ({ct}); } + else + todecode += ({n}); } + do + { function(string:string) decoder; + if(input_charset) + { if(mixed err = catch + { decoder = Roxen.get_decoder_for_client_charset(input_charset); + foreach(todecode, string n) + post_vars[n] = map(post_vars[n], decoder); + break; + }) + report_debug ("Client %O requested path %O " + "which failed to decode " + "with the input charset %O: %s", + client_var->fullname, raw_url, input_charset, + describe_error (err)); + } + catch + { array(string) magic = + real_variables->magic_roxen_automatic_charset_variable + || post_vars->magic_roxen_automatic_charset_variable; + input_charset = Roxen->get_client_charset(magic[0]); + decoder = Roxen.get_decoder_for_client_charset(input_charset); + foreach(todecode, string n) + post_vars[n] = map(post_vars[n], decoder); + break; + }; + catch + { decoder = utf8_to_string; + input_charset = "utf-8"; // Why not UTF8 ? + foreach(post_vars; string n; array v) + post_vars[n] = map(v, decoder); + break; + }; + input_charset = 0; + } + while(0); + foreach(post_vars; string n; array v) + real_variables[n] += v; } break; } -- Sincerely, Stephen R. van den Berg. Human beings were created by water to transport it uphill.
