create <html> if we see a HTML5 doctype
Project: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/commit/fa0b40ea Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/tree/fa0b40ea Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/diff/fa0b40ea Branch: refs/heads/1843-feature-bigcouch Commit: fa0b40ea2705dfcbe8af156851c69d737791ad70 Parents: 13f9316 Author: Bob Ippolito <[email protected]> Authored: Mon May 6 15:14:57 2013 -0700 Committer: Bob Ippolito <[email protected]> Committed: Mon May 6 15:21:37 2013 -0700 ---------------------------------------------------------------------- src/mochiweb_html.erl | 32 +++++++++++++++++++++----------- test/mochiweb_html_tests.erl | 9 +++++++++ 2 files changed, 30 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/fa0b40ea/src/mochiweb_html.erl ---------------------------------------------------------------------- diff --git a/src/mochiweb_html.erl b/src/mochiweb_html.erl index c38f138..3732924 100644 --- a/src/mochiweb_html.erl +++ b/src/mochiweb_html.erl @@ -5,13 +5,14 @@ -module(mochiweb_html). -export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1, escape_attr/1, to_html/1]). +-compile([export_all]). -ifdef(TEST). -export([destack/1, destack/2, is_singleton/1]). -endif. %% This is a macro to placate syntax highlighters.. --define(QUOTE, $\"). --define(SQUOTE, $\'). +-define(QUOTE, $\"). %% $\" +-define(SQUOTE, $\'). %% $\' -define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column, offset=N+S#decoder.offset}). @@ -66,18 +67,25 @@ parse(Input) -> %% @doc Transform the output of tokens(Doc) into a HTML tree. parse_tokens(Tokens) when is_list(Tokens) -> %% Skip over doctype, processing instructions - F = fun (X) -> - case X of - {start_tag, _, _, false} -> - false; - _ -> - true - end - end, - [{start_tag, Tag, Attrs, false} | Rest] = lists:dropwhile(F, Tokens), + [{start_tag, Tag, Attrs, false} | Rest] = find_document(Tokens, normal), {Tree, _} = tree(Rest, [norm({Tag, Attrs})]), Tree. +find_document(Tokens=[{start_tag, _Tag, _Attrs, false} | _Rest], Mode) -> + maybe_add_html_tag(Tokens, Mode); +find_document([{doctype, [<<"html">>]} | Rest], _Mode) -> + find_document(Rest, html5); +find_document([_T | Rest], Mode) -> + find_document(Rest, Mode); +find_document([], _Mode) -> + []. + +maybe_add_html_tag(Tokens=[{start_tag, Tag, _Attrs, false} | _], html5) + when Tag =/= <<"html">> -> + [{start_tag, <<"html">>, [], false} | Tokens]; +maybe_add_html_tag(Tokens, _Mode) -> + Tokens. + %% @spec tokens(StringOrBinary) -> [html_token()] %% @doc Transform the input UTF-8 HTML into a token stream. tokens(Input) -> @@ -302,6 +310,8 @@ tokenize(B, S=#decoder{offset=O}) -> case B of <<_:O/binary, "<!--", _/binary>> -> tokenize_comment(B, ?ADV_COL(S, 4)); + <<_:O/binary, "<!doctype", _/binary>> -> + tokenize_doctype(B, ?ADV_COL(S, 10)); <<_:O/binary, "<!DOCTYPE", _/binary>> -> tokenize_doctype(B, ?ADV_COL(S, 10)); <<_:O/binary, "<![CDATA[", _/binary>> -> http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/fa0b40ea/test/mochiweb_html_tests.erl ---------------------------------------------------------------------- diff --git a/test/mochiweb_html_tests.erl b/test/mochiweb_html_tests.erl index c9fec6f..3d35400 100644 --- a/test/mochiweb_html_tests.erl +++ b/test/mochiweb_html_tests.erl @@ -571,6 +571,15 @@ parse_unescaped_lt_test() -> [<<"Back">>]}]}, mochiweb_html:parse(D2)). +html5_doctype_test() -> + ?assertEqual( + [{doctype,[<<"html">>]}, + {start_tag,<<"head">>,[],false}, + {end_tag,<<"head">>}, + {start_tag,<<"body">>,[],false}, + {end_tag,<<"body">>}], + mochiweb_html:tokens("<!doctype html><head></head><body></body>")). + implicit_html_test() -> %% https://github.com/mochi/mochiweb/issues/110 ?assertEqual(
