[PATCH 08/10] adding rawtext without fixing treebuilder

rsk1994 Mon, 31 Mar 2014 09:10:30 -0700

---
 include/hubbub/types.h                 |  3 +-
 src/tokeniser/tokeniser.c              | 11 ++---
 test/data/tokeniser2/INDEX             |  2 +-
 test/data/tree-construction/tests5.dat | 84 +++++++++++++++++++++-------------
 test/tokeniser2.c                      |  9 ++--
 test/tokeniser3.c                      |  9 ++--
 6 files changed, 72 insertions(+), 46 deletions(-)


diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index e5c208b..6e2b1a9 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -33,7 +33,8 @@ typedef enum hubbub_content_model {
        HUBBUB_CONTENT_MODEL_PCDATA,
        HUBBUB_CONTENT_MODEL_RCDATA,
        HUBBUB_CONTENT_MODEL_CDATA,
-       HUBBUB_CONTENT_MODEL_PLAINTEXT
+       HUBBUB_CONTENT_MODEL_PLAINTEXT,
+       HUBBUB_CONTENT_MODEL_RAWTEXT
 } hubbub_content_model;
 
 /**
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 3087ac8..4f87287 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -689,8 +689,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser 
*tokeniser)
                } else if (c == '-' &&
                                tokeniser->escape_flag == false &&
                                (tokeniser->content_model ==
-                                               HUBBUB_CONTENT_MODEL_RCDATA ||
-                               tokeniser->content_model ==
                                                HUBBUB_CONTENT_MODEL_CDATA) &&
                                tokeniser->context.pending >= 3) {
                        size_t ignore;
@@ -712,6 +710,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser 
*tokeniser)
                                                HUBBUB_CONTENT_MODEL_PCDATA ||
                                        ((tokeniser->content_model ==
                                                HUBBUB_CONTENT_MODEL_RCDATA ||
+                                       tokeniser->content_model == 
+                                               HUBBUB_CONTENT_MODEL_RAWTEXT ||
                                        tokeniser->content_model ==
                                                HUBBUB_CONTENT_MODEL_CDATA) &&
                                tokeniser->escape_flag == false))) {
@@ -899,6 +899,7 @@ hubbub_error 
hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
 
                tokeniser->state = STATE_CLOSE_TAG_OPEN;
        } else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+                       tokeniser->content_model == 
HUBBUB_CONTENT_MODEL_RAWTEXT ||
                        tokeniser->content_model ==
                                        HUBBUB_CONTENT_MODEL_CDATA) {
                /* Return to data state with '<' still in "chars" */
@@ -971,6 +972,7 @@ hubbub_error 
hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
        /**\todo fragment case */
 
        if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+                       tokeniser->content_model == 
HUBBUB_CONTENT_MODEL_RAWTEXT ||
                        tokeniser->content_model ==
                                        HUBBUB_CONTENT_MODEL_CDATA) {
                uint8_t *start_tag_name =
@@ -3004,7 +3006,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
                        ctx->match_entity.length += len;
                } else {
                        ctx->match_entity.base = 10;
-                       printf("base 10\n");
                }
        }
 
@@ -3041,7 +3042,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
 
                if (ctx->match_entity.numeric_state.ucs4 > 0x10FFFF) {
                        ctx->match_entity.overflow = true;
-                       printf("overflow\n");
                }
        }
 
@@ -3061,11 +3061,9 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
 
                if (0x80 <= cp && cp <= 0x9F) {
                        cp = cp1252Table[cp - 0x80];
-                       printf("converting1\n");
                } else if (ctx->match_entity.overflow || 
                                (0xD800 <= cp && cp <= 0xDFFF) ||
                                (cp == 0x00)) {
-                       printf("converting\n");
                        cp = 0xFFFD;
                } else if((0x0001<=cp && cp <= 0x0008) ||
                                (0x000D <= cp && cp <= 0x001F) ||
@@ -3074,7 +3072,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
                                (cp ==0x000B) ||
                                ((cp & 0xFFFE) == 0xFFFE) ||
                                ((cp & 0xFFFF) == 0xFFFF) ){
-                       printf("converting\n");
                        /* the check for cp > 0x10FFFF per spec is performed
                         * in the loop above to avoid overflow */
                }
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 9b165c0..9ff8596 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -7,7 +7,7 @@ test2.test              html5lib tests (part 2)
 test3.test             html5lib tests (part 3)
 test4.test             html5lib tests (part 4)
 entities.test          html5lib entity tests
-#escapeFlag.test               html5lib escape flag tests
+escapeFlag.test                html5lib escape flag tests
 numericEntities.test   html5lib numeric entities tests
 unicodeChars.test      html5lib unicode character tests
 #unicodeCharsProblematic.test  html5lib problematic unicode character tests
diff --git a/test/data/tree-construction/tests5.dat 
b/test/data/tree-construction/tests5.dat
index 2c95031..4d5fcd7 100644
--- a/test/data/tree-construction/tests5.dat
+++ b/test/data/tree-construction/tests5.dat
@@ -1,31 +1,33 @@
 #data
 <style> <!-- </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <style>
-|       " <!-- </style>x"
+|       " <!-- "
 |   <body>
+|     "x"
 
 #data
 <style> <!-- </style> --> </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <style>
-|       " <!-- </style> --> "
+|       " <!-- "
+|     " "
 |   <body>
-|     "x"
+|     "--> x"
 
 #data
 <style> <!--> </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -37,7 +39,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
 #data
 <style> <!---> </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -49,7 +51,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
 #data
 <iframe> <!---> </iframe>x
 #errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -61,55 +63,63 @@ Line: 1 Col: 8 Unexpected start tag (iframe). Expected 
DOCTYPE.
 #data
 <iframe> <!--- </iframe>->x</iframe> --> </iframe>x
 #errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+(1,50): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <iframe>
-|       " <!--- </iframe>->x</iframe> --> "
-|     "x"
+|       " <!--- "
+|     "->x --> x"
 
 #data
 <script> <!-- </script> --> </script>x
 #errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <script>
-|       " <!-- </script> --> "
+|       " <!-- "
+|     " "
 |   <body>
-|     "x"
+|     "--> x"
 
 #data
 <title> <!-- </title> --> </title>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <title>
-|       " <!-- </title> --> "
+|       " <!-- "
+|     " "
 |   <body>
-|     "x"
+|     "--> x"
 
 #data
 <textarea> <!--- </textarea>->x</textarea> --> </textarea>x
 #errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+(1,10): expected-doctype-but-got-start-tag
+(1,42): unexpected-end-tag
+(1,58): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <textarea>
-|       " <!--- </textarea>->x</textarea> --> "
-|     "x"
+|       " <!--- "
+|     "->x --> x"
 
 #data
 <style> <!</-- </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -119,9 +129,20 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected 
DOCTYPE.
 |     "x"
 
 #data
+<p><xmp></xmp>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <xmp>
+
+#data
 <xmp> <!-- > --> </xmp>
 #errors
-Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+(1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -132,7 +153,7 @@ Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
 #data
 <title>&amp;</title>
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -143,33 +164,34 @@ Line: 1 Col: 7 Unexpected start tag (title). Expected 
DOCTYPE.
 #data
 <title><!--&amp;--></title>
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <title>
-|       "<!--&amp;-->"
+|       "<!--&-->"
 |   <body>
 
 #data
 <title><!--</title>
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <title>
-|       "<!--</title>"
+|       "<!--"
 |   <body>
 
 #data
 <noscript><!--</noscript>--></noscript>
 #errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <noscript>
-|       "<!--</noscript>-->"
+|       "<!--"
 |   <body>
+|     "-->"
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 3024e81..7c56aeb 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -177,15 +177,18 @@ void run_test(context *ctx)
                                (struct json_object *)
                                array_list_get_idx(ctx->content_model, i));
 
-                       if (strcmp(cm, "PCDATA") == 0) {
+                       if (strcmp(cm, "PCDATA state") == 0) {
                                params.content_model.model =
                                                HUBBUB_CONTENT_MODEL_PCDATA;
-                       } else if (strcmp(cm, "RCDATA") == 0) {
+                       } else if (strcmp(cm, "RCDATA state") == 0) {
                                params.content_model.model =
                                                HUBBUB_CONTENT_MODEL_RCDATA;
-                       } else if (strcmp(cm, "CDATA") == 0) {
+                       } else if (strcmp(cm, "CDATA state") == 0) {
                                params.content_model.model =
                                                HUBBUB_CONTENT_MODEL_CDATA;
+                       } else if (strcmp(cm, "RAWTEXT state") == 0) {
+                               params.content_model.model =
+                                               HUBBUB_CONTENT_MODEL_RAWTEXT;
                        } else {
                                params.content_model.model =
                                        HUBBUB_CONTENT_MODEL_PLAINTEXT;
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index c4c5231..a68e0ba 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -175,15 +175,18 @@ void run_test(context *ctx)
                                (struct json_object *)
                                array_list_get_idx(ctx->content_model, i));
 
-                       if (strcmp(cm, "PCDATA") == 0) {
+                       if (strcmp(cm, "PCDATA state") == 0) {
                                params.content_model.model =
                                                HUBBUB_CONTENT_MODEL_PCDATA;
-                       } else if (strcmp(cm, "RCDATA") == 0) {
+                       } else if (strcmp(cm, "RCDATA state") == 0) {
                                params.content_model.model =
                                                HUBBUB_CONTENT_MODEL_RCDATA;
-                       } else if (strcmp(cm, "CDATA") == 0) {
+                       } else if (strcmp(cm, "CDATA state") == 0) {
                                params.content_model.model =
                                                HUBBUB_CONTENT_MODEL_CDATA;
+                       } else if (strcmp(cm, "RAWTEXT state") == 0) {
+                               params.content_model.model =
+                                               HUBBUB_CONTENT_MODEL_RAWTEXT;
                        } else {
                                params.content_model.model =
                                        HUBBUB_CONTENT_MODEL_PLAINTEXT;
-- 
1.8.3.2

[PATCH 08/10] adding rawtext without fixing treebuilder

Reply via email to