Fix mochiweb_html missing charref error #167
Project: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/commit/0f2fe442 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/tree/0f2fe442 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/diff/0f2fe442 Branch: refs/heads/master Commit: 0f2fe4428aa0f00d0891177bd062533bb0c0ded8 Parents: d024b4a Author: Bob Ippolito <b...@redivi.com> Authored: Sun Mar 13 13:57:03 2016 -0700 Committer: Bob Ippolito <b...@redivi.com> Committed: Sun Mar 13 13:57:03 2016 -0700 ---------------------------------------------------------------------- .editorconfig | 17 +++++++++++++++++ CHANGES.md | 9 +++++++-- src/mochiweb.app.src | 2 +- src/mochiweb_html.erl | 6 ++++-- test/mochiweb_html_tests.erl | 10 +++++++++- 5 files changed, 38 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/0f2fe442/.editorconfig ---------------------------------------------------------------------- diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d03550e --- /dev/null +++ b/.editorconfig @@ -0,0 +1,17 @@ +# EditorConfig file: http://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +# 4 space indentation +[*.{erl,src,hrl}] +indent_style = space +indent_size = 4 http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/0f2fe442/CHANGES.md ---------------------------------------------------------------------- diff --git a/CHANGES.md b/CHANGES.md index af80a19..1b88f92 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +Version 2.13.1 released 2016-03-13 + +* Fix mochiweb_html regression parsing invalid charref sequences + https://github.com/mochi/mochiweb/issues/167 + Version 2.13.0 released 2016-02-08 * Support parsing of UTF-16 surrogate pairs encoded as character @@ -114,7 +119,7 @@ Version 2.7.0 released 2013-08-01 call instead of an asynchronous cast * `mochiweb_html:parse_tokens/1` (and `parse/1`) will now create a html element to wrap documents that have a HTML5 doctype - (`<!doctype html>`) but no html element + (`<!doctype html>`) but no html element https://github.com/mochi/mochiweb/issues/110 Version 2.6.0 released 2013-04-15 @@ -133,7 +138,7 @@ Version 2.5.0 released 2013-03-04 (URL and Filename safe alphabet, see RFC 4648). * Fix rebar.config in mochiwebapp_skel to use {branch, "master"} https://github.com/mochi/mochiweb/issues/105 - + Version 2.4.2 released 2013-02-05 * Fixed issue in mochiweb_response introduced in v2.4.0 http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/0f2fe442/src/mochiweb.app.src ---------------------------------------------------------------------- diff --git a/src/mochiweb.app.src b/src/mochiweb.app.src index dd049af..4f7f7dc 100644 --- a/src/mochiweb.app.src +++ b/src/mochiweb.app.src @@ -1,7 +1,7 @@ %% This is generated from src/mochiweb.app.src {application, mochiweb, [{description, "MochiMedia Web Server"}, - {vsn, "2.13.0"}, + {vsn, "2.13.1"}, {modules, []}, {registered, []}, {env, []}, http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/0f2fe442/src/mochiweb_html.erl ---------------------------------------------------------------------- diff --git a/src/mochiweb_html.erl b/src/mochiweb_html.erl index 3c5c4f9..70723af 100644 --- a/src/mochiweb_html.erl +++ b/src/mochiweb_html.erl @@ -640,7 +640,7 @@ find_gt(Bin, S=#decoder{offset=O}, HasSlash) -> tokenize_charref(Bin, S=#decoder{offset=O}) -> try case tokenize_charref_raw(Bin, S, O) of - {C1, S1=#decoder{offset=O1}} when C1 >= 16#D800 andalso C1 =< 16#DFFF -> + {C1, S1} when C1 >= 16#D800 andalso C1 =< 16#DFFF -> %% Surrogate pair tokeninize_charref_surrogate_pair(Bin, S1, C1); {Unichar, S1} when is_integer(Unichar) -> @@ -648,7 +648,9 @@ tokenize_charref(Bin, S=#decoder{offset=O}) -> S1}; {Unichars, S1} when is_list(Unichars) -> {{data, unicode:characters_to_binary(Unichars), false}, - S1} + S1}; + {undefined, _} -> + throw(invalid_charref) end catch throw:invalid_charref -> http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/0f2fe442/test/mochiweb_html_tests.erl ---------------------------------------------------------------------- diff --git a/test/mochiweb_html_tests.erl b/test/mochiweb_html_tests.erl index f67759a..0310b28 100644 --- a/test/mochiweb_html_tests.erl +++ b/test/mochiweb_html_tests.erl @@ -562,7 +562,15 @@ parse_amp_test_() -> ?_assertEqual( {<<"html">>,[], [{<<"body">>,[],[<<"&">>]}]}, - mochiweb_html:parse("<html><body>&</body></html>"))]. + mochiweb_html:parse("<html><body>&</body></html>")), + ?_assertEqual( + {<<"html">>,[], + [{<<"body">>,[],[<<"&;">>]}]}, + mochiweb_html:parse("<html><body>&;</body></html>")), + ?_assertEqual( + {<<"html">>,[], + [{<<"body">>,[],[<<"&MISSING;">>]}]}, + mochiweb_html:parse("<html><body>&MISSING;</body></html>"))]. parse_unescaped_lt_test() -> D1 = <<"<div> < < <a href=\"/\">Back</a></div>">>,