--- test/testutils.h | 19 +++++++++++++++++++ test/tokeniser2.c | 51 +++++++++++++++++++++++++++++---------------------- test/tokeniser3.c | 48 +++++++++++++++++++++++++++--------------------- 3 files changed, 75 insertions(+), 43 deletions(-)
diff --git a/test/testutils.h b/test/testutils.h index 45870f9..fa159d6 100644 --- a/test/testutils.h +++ b/test/testutils.h @@ -63,6 +63,7 @@ typedef bool (*line_func)(const char *data, size_t datalen, void *pw); static size_t parse_strlen(const char *str, size_t limit); bool parse_testfile(const char *filename, line_func callback, void *pw); size_t parse_filesize(const char *filename); +size_t n_str(const char *str); /** * Testcase datafile parser driver @@ -147,6 +148,24 @@ size_t parse_filesize(const char *filename) return len; } +/** + * Utility string length measurer; assumes strings are '\0' terminated + * + * \param str String to measure length of + * \return String length + */ +size_t n_str(const char *str) +{ + size_t len = 0; + + if (str == NULL) + return 0; + + for (; *str++;len++); + + return len; +} + #ifndef strndup char *my_strndup(const char *s, size_t n); diff --git a/test/tokeniser2.c b/test/tokeniser2.c index c8ab9c0..db7c8f8 100644 --- a/test/tokeniser2.c +++ b/test/tokeniser2.c @@ -14,6 +14,8 @@ #include "testutils.h" +#define strlen n_str + typedef struct context { const uint8_t *pbuffer; @@ -25,7 +27,7 @@ typedef struct context { size_t char_off; const char *last_start_tag; - struct array_list *content_model; + struct array_list *initial_state; bool process_cdata; } context; @@ -63,7 +65,7 @@ int main(int argc, char **argv) (struct json_object *) array_list_get_idx(tests, i); ctx.last_start_tag = NULL; - ctx.content_model = NULL; + ctx.initial_state = NULL; ctx.process_cdata = false; /* Extract settings */ @@ -86,8 +88,8 @@ int main(int argc, char **argv) } else if (strcmp(key, "lastStartTag") == 0) { ctx.last_start_tag = (const char *) json_object_get_string(val); - } else if (strcmp(key, "contentModelFlags") == 0) { - ctx.content_model = + } else if (strcmp(key, "initialStates") == 0) { + ctx.initial_state = json_object_get_array(val); } else if (strcmp(key, "processCDATA") == 0) { ctx.process_cdata = @@ -114,10 +116,10 @@ void run_test(context *ctx) int i, max_i; struct array_list *outputsave = ctx->output; - if (ctx->content_model == NULL) { + if (ctx->initial_state == NULL) { max_i = 1; } else { - max_i = array_list_length(ctx->content_model); + max_i = array_list_length(ctx->initial_state); } /* We test for each of the content models specified */ @@ -161,30 +163,34 @@ void run_test(context *ctx) HUBBUB_TOKENISER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); - if (ctx->content_model == NULL) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_PCDATA; + if (ctx->initial_state == NULL) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_DATA; } else { const char *cm = json_object_get_string( (struct json_object *) - array_list_get_idx(ctx->content_model, i)); + array_list_get_idx(ctx->initial_state, i)); if (strcmp(cm, "PCDATA") == 0) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_PCDATA; - } else if (strcmp(cm, "RCDATA") == 0) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_RCDATA; - } else if (strcmp(cm, "CDATA") == 0) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_CDATA; + params.initial_state.state = + HUBBUB_INITIAL_STATE_DATA; + } else if (strcmp(cm, "RCDATA state") == 0) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_RCDATA; + } else if (strcmp(cm, "CDATA state") == 0) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_CDATA; + } else if (strcmp(cm, "RAWTEXT state") == 0) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_RAWTEXT; } else { - params.content_model.model = - HUBBUB_CONTENT_MODEL_PLAINTEXT; + params.initial_state.state = + HUBBUB_INITIAL_STATE_PLAINTEXT; } } + assert(hubbub_tokeniser_setopt(tok, - HUBBUB_TOKENISER_CONTENT_MODEL, + HUBBUB_TOKENISER_INITIAL_STATE, ¶ms) == HUBBUB_OK); assert(parserutils_inputstream_append(stream, @@ -301,7 +307,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) gotsys, (int) token->data.doctype.system_id.len); } - + printf(":%d: :%d:\n", (int)token->data.doctype.name.len, (int) strlen(expname)); + printf(":%s: :%s:\n", gotname, expname); assert(token->data.doctype.name.len == strlen(expname)); assert(strncmp(gotname, expname, strlen(expname)) == 0); diff --git a/test/tokeniser3.c b/test/tokeniser3.c index 949ddd0..7ce2602 100644 --- a/test/tokeniser3.c +++ b/test/tokeniser3.c @@ -14,6 +14,8 @@ #include "testutils.h" +#define strlen n_str + typedef struct context { const uint8_t *input; size_t input_len; @@ -23,7 +25,7 @@ typedef struct context { size_t char_off; const char *last_start_tag; - struct array_list *content_model; + struct array_list *initial_state; bool process_cdata; } context; @@ -61,7 +63,7 @@ int main(int argc, char **argv) (struct json_object *) array_list_get_idx(tests, i); ctx.last_start_tag = NULL; - ctx.content_model = NULL; + ctx.initial_state = NULL; ctx.process_cdata = false; /* Extract settings */ @@ -85,8 +87,8 @@ int main(int argc, char **argv) } else if (strcmp(key, "lastStartTag") == 0) { ctx.last_start_tag = (const char *) json_object_get_string(val); - } else if (strcmp(key, "contentModelFlags") == 0) { - ctx.content_model = + } else if (strcmp(key, "initialStates") == 0) { + ctx.initial_state = json_object_get_array(val); } else if (strcmp(key, "processCDATA") == 0) { ctx.process_cdata = @@ -112,10 +114,10 @@ void run_test(context *ctx) size_t j; struct array_list *outputsave = ctx->output; - if (ctx->content_model == NULL) { + if (ctx->initial_state == NULL) { max_i = 1; } else { - max_i = array_list_length(ctx->content_model); + max_i = array_list_length(ctx->initial_state); } /* We test for each of the content models specified */ @@ -159,30 +161,34 @@ void run_test(context *ctx) HUBBUB_TOKENISER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); - if (ctx->content_model == NULL) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_PCDATA; + if (ctx->initial_state == NULL) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_DATA; } else { const char *cm = json_object_get_string( (struct json_object *) - array_list_get_idx(ctx->content_model, i)); + array_list_get_idx(ctx->initial_state, i)); if (strcmp(cm, "PCDATA") == 0) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_PCDATA; - } else if (strcmp(cm, "RCDATA") == 0) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_RCDATA; - } else if (strcmp(cm, "CDATA") == 0) { - params.content_model.model = - HUBBUB_CONTENT_MODEL_CDATA; + params.initial_state.state = + HUBBUB_INITIAL_STATE_DATA; + } else if (strcmp(cm, "RCDATA state") == 0) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_RCDATA; + } else if (strcmp(cm, "CDATA state") == 0) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_CDATA; + } else if (strcmp(cm, "RAWTEXT state") == 0) { + params.initial_state.state = + HUBBUB_INITIAL_STATE_RAWTEXT; } else { - params.content_model.model = - HUBBUB_CONTENT_MODEL_PLAINTEXT; + params.initial_state.state = + HUBBUB_INITIAL_STATE_PLAINTEXT; } } + assert(hubbub_tokeniser_setopt(tok, - HUBBUB_TOKENISER_CONTENT_MODEL, + HUBBUB_TOKENISER_INITIAL_STATE, ¶ms) == HUBBUB_OK); printf("Input: '%.*s' (%d)\n", (int) ctx->input_len, -- 1.8.3.2