--- include/hubbub/types.h | 3 +- src/tokeniser/tokeniser.c | 11 ++--- test/data/tokeniser2/INDEX | 2 +- test/data/tree-construction/tests5.dat | 84 +++++++++++++++++++++------------- test/tokeniser2.c | 9 ++-- test/tokeniser3.c | 9 ++-- 6 files changed, 72 insertions(+), 46 deletions(-)
diff --git a/include/hubbub/types.h b/include/hubbub/types.h index e5c208b..6e2b1a9 100644 --- a/include/hubbub/types.h +++ b/include/hubbub/types.h @@ -33,7 +33,8 @@ typedef enum hubbub_content_model { HUBBUB_CONTENT_MODEL_PCDATA, HUBBUB_CONTENT_MODEL_RCDATA, HUBBUB_CONTENT_MODEL_CDATA, - HUBBUB_CONTENT_MODEL_PLAINTEXT + HUBBUB_CONTENT_MODEL_PLAINTEXT, + HUBBUB_CONTENT_MODEL_RAWTEXT } hubbub_content_model; /** diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 3087ac8..4f87287 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -689,8 +689,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser) } else if (c == '-' && tokeniser->escape_flag == false && (tokeniser->content_model == - HUBBUB_CONTENT_MODEL_RCDATA || - tokeniser->content_model == HUBBUB_CONTENT_MODEL_CDATA) && tokeniser->context.pending >= 3) { size_t ignore; @@ -712,6 +710,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser) HUBBUB_CONTENT_MODEL_PCDATA || ((tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA || + tokeniser->content_model == + HUBBUB_CONTENT_MODEL_RAWTEXT || tokeniser->content_model == HUBBUB_CONTENT_MODEL_CDATA) && tokeniser->escape_flag == false))) { @@ -899,6 +899,7 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser) tokeniser->state = STATE_CLOSE_TAG_OPEN; } else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA || + tokeniser->content_model == HUBBUB_CONTENT_MODEL_RAWTEXT || tokeniser->content_model == HUBBUB_CONTENT_MODEL_CDATA) { /* Return to data state with '<' still in "chars" */ @@ -971,6 +972,7 @@ hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser) /**\todo fragment case */ if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA || + tokeniser->content_model == HUBBUB_CONTENT_MODEL_RAWTEXT || tokeniser->content_model == HUBBUB_CONTENT_MODEL_CDATA) { uint8_t *start_tag_name = @@ -3004,7 +3006,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity( ctx->match_entity.length += len; } else { ctx->match_entity.base = 10; - printf("base 10\n"); } } @@ -3041,7 +3042,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity( if (ctx->match_entity.numeric_state.ucs4 > 0x10FFFF) { ctx->match_entity.overflow = true; - printf("overflow\n"); } } @@ -3061,11 +3061,9 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity( if (0x80 <= cp && cp <= 0x9F) { cp = cp1252Table[cp - 0x80]; - printf("converting1\n"); } else if (ctx->match_entity.overflow || (0xD800 <= cp && cp <= 0xDFFF) || (cp == 0x00)) { - printf("converting\n"); cp = 0xFFFD; } else if((0x0001<=cp && cp <= 0x0008) || (0x000D <= cp && cp <= 0x001F) || @@ -3074,7 +3072,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity( (cp ==0x000B) || ((cp & 0xFFFE) == 0xFFFE) || ((cp & 0xFFFF) == 0xFFFF) ){ - printf("converting\n"); /* the check for cp > 0x10FFFF per spec is performed * in the loop above to avoid overflow */ } diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX index 9b165c0..9ff8596 100644 --- a/test/data/tokeniser2/INDEX +++ b/test/data/tokeniser2/INDEX @@ -7,7 +7,7 @@ test2.test html5lib tests (part 2) test3.test html5lib tests (part 3) test4.test html5lib tests (part 4) entities.test html5lib entity tests -#escapeFlag.test html5lib escape flag tests +escapeFlag.test html5lib escape flag tests numericEntities.test html5lib numeric entities tests unicodeChars.test html5lib unicode character tests #unicodeCharsProblematic.test html5lib problematic unicode character tests diff --git a/test/data/tree-construction/tests5.dat b/test/data/tree-construction/tests5.dat index 2c95031..4d5fcd7 100644 --- a/test/data/tree-construction/tests5.dat +++ b/test/data/tree-construction/tests5.dat @@ -1,31 +1,33 @@ #data <style> <!-- </style>x #errors -Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. -Line: 1 Col: 22 Unexpected end of file. Expected end tag (style). +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> | <style> -| " <!-- </style>x" +| " <!-- " | <body> +| "x" #data <style> <!-- </style> --> </style>x #errors -Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag +(1,34): unexpected-end-tag #document | <html> | <head> | <style> -| " <!-- </style> --> " +| " <!-- " +| " " | <body> -| "x" +| "--> x" #data <style> <!--> </style>x #errors -Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> @@ -37,7 +39,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. #data <style> <!---> </style>x #errors -Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> @@ -49,7 +51,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. #data <iframe> <!---> </iframe>x #errors -Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. +(1,8): expected-doctype-but-got-start-tag #document | <html> | <head> @@ -61,55 +63,63 @@ Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. #data <iframe> <!--- </iframe>->x</iframe> --> </iframe>x #errors -Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. +(1,8): expected-doctype-but-got-start-tag +(1,36): unexpected-end-tag +(1,50): unexpected-end-tag #document | <html> | <head> | <body> | <iframe> -| " <!--- </iframe>->x</iframe> --> " -| "x" +| " <!--- " +| "->x --> x" #data <script> <!-- </script> --> </script>x #errors -Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +(1,8): expected-doctype-but-got-start-tag +(1,37): unexpected-end-tag #document | <html> | <head> | <script> -| " <!-- </script> --> " +| " <!-- " +| " " | <body> -| "x" +| "--> x" #data <title> <!-- </title> --> </title>x #errors -Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag +(1,34): unexpected-end-tag #document | <html> | <head> | <title> -| " <!-- </title> --> " +| " <!-- " +| " " | <body> -| "x" +| "--> x" #data <textarea> <!--- </textarea>->x</textarea> --> </textarea>x #errors -Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE. +(1,10): expected-doctype-but-got-start-tag +(1,42): unexpected-end-tag +(1,58): unexpected-end-tag #document | <html> | <head> | <body> | <textarea> -| " <!--- </textarea>->x</textarea> --> " -| "x" +| " <!--- " +| "->x --> x" #data <style> <!</-- </style>x #errors -Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> @@ -119,9 +129,20 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. | "x" #data +<p><xmp></xmp> +#errors +(1,3): expected-doctype-but-got-start-tag +#document +| <html> +| <head> +| <body> +| <p> +| <xmp> + +#data <xmp> <!-- > --> </xmp> #errors -Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE. +(1,5): expected-doctype-but-got-start-tag #document | <html> | <head> @@ -132,7 +153,7 @@ Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE. #data <title>&</title> #errors -Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> @@ -143,33 +164,34 @@ Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. #data <title><!--&--></title> #errors -Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> | <title> -| "<!--&-->" +| "<!--&-->" | <body> #data <title><!--</title> #errors -Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. -Line: 1 Col: 19 Unexpected end of file. Expected end tag (title). +(1,7): expected-doctype-but-got-start-tag #document | <html> | <head> | <title> -| "<!--</title>" +| "<!--" | <body> #data <noscript><!--</noscript>--></noscript> #errors -Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE. +(1,10): expected-doctype-but-got-start-tag +(1,39): unexpected-end-tag #document | <html> | <head> | <noscript> -| "<!--</noscript>-->" +| "<!--" | <body> +| "-->" diff --git a/test/tokeniser2.c b/test/tokeniser2.c index 3024e81..7c56aeb 100644 --- a/test/tokeniser2.c +++ b/test/tokeniser2.c @@ -177,15 +177,18 @@ void run_test(context *ctx) (struct json_object *) array_list_get_idx(ctx->content_model, i)); - if (strcmp(cm, "PCDATA") == 0) { + if (strcmp(cm, "PCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_PCDATA; - } else if (strcmp(cm, "RCDATA") == 0) { + } else if (strcmp(cm, "RCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_RCDATA; - } else if (strcmp(cm, "CDATA") == 0) { + } else if (strcmp(cm, "CDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_CDATA; + } else if (strcmp(cm, "RAWTEXT state") == 0) { + params.content_model.model = + HUBBUB_CONTENT_MODEL_RAWTEXT; } else { params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; diff --git a/test/tokeniser3.c b/test/tokeniser3.c index c4c5231..a68e0ba 100644 --- a/test/tokeniser3.c +++ b/test/tokeniser3.c @@ -175,15 +175,18 @@ void run_test(context *ctx) (struct json_object *) array_list_get_idx(ctx->content_model, i)); - if (strcmp(cm, "PCDATA") == 0) { + if (strcmp(cm, "PCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_PCDATA; - } else if (strcmp(cm, "RCDATA") == 0) { + } else if (strcmp(cm, "RCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_RCDATA; - } else if (strcmp(cm, "CDATA") == 0) { + } else if (strcmp(cm, "CDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_CDATA; + } else if (strcmp(cm, "RAWTEXT state") == 0) { + params.content_model.model = + HUBBUB_CONTENT_MODEL_RAWTEXT; } else { params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; -- 1.8.3.2