On Thu, Mar 5, 2009 at 12:12 AM, John-Mark Bell <[email protected]>wrote:
> On Wed, 2009-03-04 at 22:44 +0800, Bo Yang wrote:
>
> This is mostly fine, thanks. A few more issues remain, however.
> Comments below.
>
Fix the second bunch of faults and add a new API
dom_hubbub_parser_get_encoding(). And I copy the implementation code from
netsurf/render/hubbub_bindings.c here to implement the change_encoding
callback. Thanks!
Regards!
Bo
diff --git a/bindings/hubbub/parser.c b/bindings/hubbub/parser.c
index 7b5e6ab..f09f5a0 100644
--- a/bindings/hubbub/parser.c
+++ b/bindings/hubbub/parser.c
@@ -16,13 +16,19 @@
#include "utils.h"
/**
- * libdom Hubbub parser object
+ * libdom Hubbub parser context
*/
struct dom_hubbub_parser {
hubbub_parser *parser; /**< Hubbub parser instance */
+ hubbub_tree_handler tree_handler;
+ /**< Hubbub parser tree handler */
struct dom_document *doc; /**< DOM Document we're building */
+ dom_hubbub_encoding_source encoding_source;
+ /**< The document's encoding source */
+ const char *encoding; /**< The document's encoding */
+
bool complete; /**< Indicate stream completion */
struct dom_implementation *impl;/**< DOM implementation */
@@ -34,39 +40,83 @@ struct dom_hubbub_parser {
void *mctx; /**< Pointer to client data */
};
-static hubbub_error __dom_hubbub_token_handler(const hubbub_token *token,
- void *pw);
-static bool __initialised;
+/* The callbacks declarations */
+static int create_comment(void *parser, const hubbub_string *data,
+ void **result);
+static int create_doctype(void *parser, const hubbub_doctype *doctype,
+ void **result);
+static int create_element(void *parser, const hubbub_tag *tag, void **result);
+static int create_text(void *parser, const hubbub_string *data,
+ void **result);
+static int ref_node(void *parser, void *node);
+static int unref_node(void *parser, void *node);
+static int append_child(void *parser, void *parent, void *child,
+ void **result);
+static int insert_before(void *parser, void *parent, void *child,
+ void *ref_child, void **result);
+static int remove_child(void *parser, void *parent, void *child,
+ void **result);
+static int clone_node(void *parser, void *node, bool deep, void **result);
+static int reparent_children(void *parser, void *node, void *new_parent);
+static int get_parent(void *parser, void *node, bool element_only,
+ void **result);
+static int has_children(void *parser, void *node, bool *result);
+static int form_associate(void *parser, void *form, void *node);
+static int add_attributes(void *parser, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes);
+static int set_quirks_mode(void *parser, hubbub_quirks_mode mode);
+static int change_encoding(void *parser, const char *charset);
+
+static hubbub_tree_handler tree_handler = {
+ create_comment,
+ create_doctype,
+ create_element,
+ create_text,
+ ref_node,
+ unref_node,
+ append_child,
+ insert_before,
+ remove_child,
+ clone_node,
+ reparent_children,
+ get_parent,
+ has_children,
+ form_associate,
+ add_attributes,
+ set_quirks_mode,
+ change_encoding,
+ NULL
+};
+
+static bool __initialised = false;
/**
* Create a Hubbub parser instance
*
* \param aliases Path to encoding alias mapping file
* \param enc Source charset, or NULL
- * \param int_enc Desired charset of document buffer (UTF-8 or UTF-16)
+ * \param fix_enc Whether fix the encoding
* \param alloc Memory (de)allocation function
* \param pw Pointer to client-specific private data
* \param msg Informational message function
* \param mctx Pointer to client-specific private data
* \return Pointer to instance, or NULL on memory exhaustion
*/
-dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
- const char *enc, const char *int_enc,
+dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
+ const char *enc, bool fix_enc,
dom_alloc alloc, void *pw, dom_msg msg, void *mctx)
{
dom_hubbub_parser *parser;
hubbub_parser_optparams params;
- struct dom_string *features;
+ hubbub_error error;
dom_exception err;
- hubbub_error e;
-
- UNUSED(int_enc);
+ struct dom_string *features;
if (__initialised == false) {
- e = hubbub_initialise(aliases, (hubbub_alloc) alloc, pw);
- if (e != HUBBUB_OK) {
- msg(DOM_MSG_ERROR, mctx,
+ error = hubbub_initialise(aliases, (hubbub_alloc) alloc, pw);
+ if (error != HUBBUB_OK) {
+ msg(DOM_MSG_ERROR, mctx,
"Failed initialising hubbub");
return NULL;
}
@@ -74,36 +124,33 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
__initialised = true;
}
+
parser = alloc(NULL, sizeof(dom_hubbub_parser), pw);
if (parser == NULL) {
- msg(DOM_MSG_CRITICAL, mctx, "No memory for parser");
+ msg(DOM_MSG_CRITICAL, mctx, "No memory for parsing context");
return NULL;
}
- e = hubbub_parser_create(enc, true, (hubbub_alloc) alloc, pw,
- &parser->parser);
- if (e != HUBBUB_OK) {
- alloc(parser, 0, pw);
- msg(DOM_MSG_CRITICAL, mctx, "Failed to create hubbub parser");
- return NULL;
- }
+ parser->parser = NULL;
+ parser->doc = NULL;
+ parser->encoding = enc;
+ parser->encoding_source = enc != NULL ? ENCODING_SOURCE_HEADER
+ : ENCODING_SOURCE_DETECTED;
+ parser->complete = false;
+ parser->impl = NULL;
- params.token_handler.handler = __dom_hubbub_token_handler;
- params.token_handler.pw = parser;
- e = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_TOKEN_HANDLER,
- ¶ms);
- if (e != HUBBUB_OK) {
- hubbub_parser_destroy(parser->parser);
- alloc(parser, 0, pw);
- msg(DOM_MSG_CRITICAL, mctx,
- "Failed registering hubbub token handler");
+ parser->alloc = alloc;
+ parser->pw = pw;
+ parser->msg = msg;
+ parser->mctx = mctx;
+
+ error = hubbub_parser_create(enc, fix_enc, alloc, pw, &parser->parser);
+ if (error != HUBBUB_OK) {
+ parser->alloc(parser, 0, parser->pw);
+ msg(DOM_MSG_CRITICAL, mctx, "Can't create parser");
return NULL;
}
- parser->doc = NULL;
-
- parser->complete = false;
-
/* Get DOM implementation */
/* Create string representation of the features we want */
err = dom_string_create(alloc, pw,
@@ -115,7 +162,7 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
return NULL;
}
- /* Now, try to get an appropriate implementation from the registry */
+ /* Now, try to get an appropriate implementation from the registry */
err = dom_implregistry_get_dom_implementation(features,
&parser->impl, alloc, pw);
if (err != DOM_NO_ERR) {
@@ -125,15 +172,28 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
msg(DOM_MSG_ERROR, mctx, "No suitable DOMImplementation");
return NULL;
}
-
/* no longer need the features string */
dom_string_unref(features);
- parser->alloc = alloc;
- parser->pw = pw;
+ err = dom_implementation_create_document(parser->impl, NULL, NULL, NULL,
+ &parser->doc, alloc, pw);
+ if (err != DOM_NO_ERR) {
+ hubbub_parser_destroy(parser->parser);
+ alloc(parser, 0, pw);
+ msg(DOM_MSG_ERROR, mctx, "Can't create DOM document");
+ return NULL;
+ }
- parser->msg = msg;
- parser->mctx = mctx;
+ parser->tree_handler = tree_handler;
+ parser->tree_handler.ctx = (void *) parser;
+
+ params.tree_handler = &parser->tree_handler;
+ hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_TREE_HANDLER,
+ ¶ms);
+
+ params.document_node = parser->doc;
+ hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_DOCUMENT_NODE,
+ ¶ms);
return parser;
}
@@ -142,114 +202,543 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
void dom_hubbub_parser_destroy(dom_hubbub_parser *parser)
{
dom_implementation_unref(parser->impl);
-
hubbub_parser_destroy(parser->parser);
+ parser->parser = NULL;
- /** \todo do we want to clean up the document here too? */
-
+ if (parser->doc != NULL)
+ dom_node_unref((struct dom_node *) parser->doc);
parser->alloc(parser, 0, parser->pw);
}
-/* Parse a chunk of data */
dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser,
uint8_t *data, size_t len)
{
hubbub_error err;
err = hubbub_parser_parse_chunk(parser->parser, data, len);
- if (err != HUBBUB_OK) {
- parser->msg(DOM_MSG_ERROR, parser->mctx,
- "hubbub_parser_parse_chunk failed: %d", err);
+ if (err != HUBBUB_OK)
return DOM_HUBBUB_HUBBUB_ERR | err;
- }
return DOM_HUBBUB_OK;
}
-/* Notify parser that datastream is empty */
dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser)
{
hubbub_error err;
+ parser->complete = true;
err = hubbub_parser_completed(parser->parser);
if (err != DOM_HUBBUB_OK) {
parser->msg(DOM_MSG_ERROR, parser->mctx,
"hubbub_parser_completed failed: %d", err);
return DOM_HUBBUB_HUBBUB_ERR | err;
}
-
- parser->complete = true;
-
+
return DOM_HUBBUB_OK;
}
-/* Retrieve the created DOM Document */
struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser)
{
- return (parser->complete ? parser->doc : NULL);
+ struct dom_document *doc = NULL;
+ if (parser->complete) {
+ doc = parser->doc;
+ parser->doc = NULL;
+ }
+
+ return doc;
+}
+
+const char *dom_hubbub_parser_get_encoding(dom_hubbub_parser *parser,
+ dom_hubbub_encoding_source *source)
+{
+ *source = parser->encoding_source;
+
+ return parser->encoding != NULL ? parser->encoding
+ : "Windows-1252";
}
-hubbub_error __dom_hubbub_token_handler(const hubbub_token *token, void *pw)
+/* The callbacks definitions */
+static int create_comment(void *parser, const hubbub_string *data,
+ void **result)
{
- dom_hubbub_parser *parser = (dom_hubbub_parser *) pw;
- static const char *token_names[] = {
- "DOCTYPE", "START TAG", "END TAG",
- "COMMENT", "CHARACTERS", "EOF"
- };
- size_t i;
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *str;
+ struct dom_comment *comment;
+
+ *result = NULL;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw, data->ptr,
+ data->len, &str);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create comment node text");
+ return 1;
+ }
+
+ err = dom_document_create_comment(dom_parser->doc, str, &comment);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(str);
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create comment node with text '%.*s'",
+ data->len, data->ptr);
+ return 1;
+ }
+
+ *result = comment;
+ dom_string_unref(str);
+
+ return 0;
+}
+
+static int create_doctype(void *parser, const hubbub_doctype *doctype,
+ void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *qname, *public_id = NULL, *system_id = NULL;
+ struct dom_document_type *dtype;
+
+ *result = NULL;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ doctype->name.ptr, doctype->name.len, &qname);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create doctype name");
+ goto fail;
+ }
+
+ if (doctype->public_missing == false) {
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ doctype->public_id.ptr,
+ doctype->public_id.len, &public_id);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create doctype public id");
+ goto clean1;
+ }
+ }
+
+ if (doctype->system_missing == false) {
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ doctype->system_id.ptr,
+ doctype->system_id.len, &system_id);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create doctype system id");
+ goto clean2;
+ }
+ }
+
+ err = dom_implementation_create_document_type(dom_parser->impl, qname,
+ public_id, system_id, &dtype, dom_parser->alloc,
+ dom_parser->pw);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the document type");
+ goto clean3;
+ }
+
+ *result = dtype;
+
+clean3:
+ if (doctype->system_missing == false)
+ dom_string_unref(system_id);
+
+clean2:
+ if (doctype->public_missing == false)
+ dom_string_unref(public_id);
+
+clean1:
+ dom_string_unref(qname);
+
+fail:
+ if (*result == NULL)
+ return 1;
+ else
+ return 0;
+}
+
+static int create_element(void *parser, const hubbub_tag *tag, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *name;
+ struct dom_element *element = NULL;
+
+ *result = NULL;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ tag->name.ptr, tag->name.len, &name);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create element name");
+ goto fail;
+ }
+
+ if (tag->ns == HUBBUB_NS_NULL) {
+ err = dom_document_create_element(dom_parser->doc, name,
+ &element);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the DOM element");
+ goto clean1;
+ }
+ } else {
+ err = dom_document_create_element_ns(dom_parser->doc,
+ dom_namespaces[tag->ns], name, &element);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the DOM element");
+ goto clean1;
+ }
+ }
+
+ *result = element;
+clean1:
+ dom_string_unref(name);
+
+fail:
+ if (*result == NULL)
+ return 1;
+ else
+ return 0;
+}
+
+static int create_text(void *parser, const hubbub_string *data, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *str;
+ struct dom_text *text = NULL;
+
+ *result = NULL;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw, data->ptr,
+ data->len, &str);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create text '%.*s'", data->len,
+ data->ptr);
+ goto fail;
+ }
+
+ err = dom_document_create_text_node(dom_parser->doc, str, &text);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the DOM text node");
+ goto clean1;
+ }
+
+ *result = text;
+clean1:
+ dom_string_unref(str);
+
+fail:
+ if (*result == NULL)
+ return 1;
+ else
+ return 0;
+
+}
+
+static int ref_node(void *parser, void *node)
+{
+ UNUSED(parser);
+ struct dom_node *dnode = (struct dom_node *) node;
+ dom_node_ref(dnode);
+
+ return 0;
+}
+
+static int unref_node(void *parser, void *node)
+{
+ UNUSED(parser);
+ struct dom_node *dnode = (struct dom_node *) node;
+ dom_node_unref(dnode);
+
+ return 0;
+}
+
+static int append_child(void *parser, void *parent, void *child, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_append_child((struct dom_node *) parent,
+ (struct dom_node *) child,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't append child '%p' for parent '%p'",
+ child, parent);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int insert_before(void *parser, void *parent, void *child,
+ void *ref_child, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_insert_before((struct dom_node *) parent,
+ (struct dom_node *) child,
+ (struct dom_node *) ref_child,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't insert node '%p' before node '%p'",
+ child, ref_child);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int remove_child(void *parser, void *parent, void *child, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_remove_child((struct dom_node *) parent,
+ (struct dom_node *) child,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't remove child '%p'", child);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int clone_node(void *parser, void *node, bool deep, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_clone_node((struct dom_node *) node, deep,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't clone node '%p'", node);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int reparent_children(void *parser, void *node, void *new_parent)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_node *child, *result;
+
+ while(true) {
+ err = dom_node_get_first_child((struct dom_node *) node,
+ &child);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_note_get_first_child");
+ return 1;
+ }
+ if (child == NULL)
+ break;
+
+ err = dom_node_remove_child(node, (struct dom_node *) child,
+ &result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_remove_child");
+ goto fail;
+ }
+ dom_node_unref(child);
+
+ err = dom_node_append_child((struct dom_node *) new_parent,
+ (struct dom_node *) child, &result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_append_child");
+ goto fail;
+ }
+ dom_node_unref(child);
+ dom_node_unref(result);
+ }
+ return 0;
+
+fail:
+ dom_node_unref(child);
+ return 1;
+}
+
+static int get_parent(void *parser, void *node, bool element_only,
+ void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_node *parent;
+ dom_node_type type = DOM_NODE_TYPE_COUNT;
+
+ err = dom_node_get_parent_node((struct dom_node *) node,
+ &parent);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_get_parent");
+ return 1;
+ }
+ if (element_only == false) {
+ *result = parent;
+ return 0;
+ }
+
+ err = dom_node_get_node_type(parent, &type);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_get_type");
+ goto fail;
+ }
+ if (type == DOM_ELEMENT_NODE) {
+ *result = parent;
+ return 0;
+ } else {
+ *result = NULL;
+ dom_node_unref(parent);
+ return 0;
+ }
+
+fail:
+ dom_node_unref(parent);
+ return 1;
+}
+
+static int has_children(void *parser, void *node, bool *result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
UNUSED(parser);
+ err = dom_node_has_child_nodes((struct dom_node *) node, result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_has_child_nodes");
+ return 1;
+ }
+ return 0;
+}
+
+static int form_associate(void *parser, void *form, void *node)
+{
+ UNUSED(parser);
+ UNUSED(form);
+ UNUSED(node);
+
+ return 0;
+}
- printf("%s: ", token_names[token->type]);
-
- switch (token->type) {
- case HUBBUB_TOKEN_DOCTYPE:
- printf("'%.*s' (%svalid)\n",
- (int) token->data.doctype.name.len,
- token->data.doctype.name.ptr,
- token->data.doctype.force_quirks ? "in" : "");
- break;
- case HUBBUB_TOKEN_START_TAG:
- printf("'%.*s' %s\n",
- (int) token->data.tag.name.len,
- token->data.tag.name.ptr,
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
- for (i = 0; i < token->data.tag.n_attributes; i++) {
- printf("\t'%.*s' = '%.*s'\n",
- (int) token->data.tag.attributes[i].name.len,
- token->data.tag.attributes[i].name.ptr,
- (int) token->data.tag.attributes[i].value.len,
- token->data.tag.attributes[i].value.ptr);
+static int add_attributes(void *parser, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ uint32_t i;
+
+ for (i = 0; i < n_attributes; i++) {
+ struct dom_string *name, *value;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ attributes[i].name.ptr,
+ attributes[i].name.len, &name);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create attribute name");
+ goto fail;
}
- break;
- case HUBBUB_TOKEN_END_TAG:
- printf("'%.*s' %s\n",
- (int) token->data.tag.name.len,
- token->data.tag.name.ptr,
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
- for (i = 0; i < token->data.tag.n_attributes; i++) {
- printf("\t'%.*s' = '%.*s'\n",
- (int) token->data.tag.attributes[i].name.len,
- token->data.tag.attributes[i].name.ptr,
- (int) token->data.tag.attributes[i].value.len,
- token->data.tag.attributes[i].value.ptr);
+
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ attributes[i].value.ptr,
+ attributes[i].value.len, &value);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create attribute value");
+ dom_string_unref(name);
+ goto fail;
}
- break;
- case HUBBUB_TOKEN_COMMENT:
- printf("'%.*s'\n", (int) token->data.comment.len,
- token->data.comment.ptr);
- break;
- case HUBBUB_TOKEN_CHARACTER:
- printf("'%.*s'\n", (int) token->data.character.len,
- token->data.character.ptr);
- break;
- case HUBBUB_TOKEN_EOF:
- printf("\n");
- break;
- }
-
- return HUBBUB_OK;
+
+ if (attributes[i].ns == HUBBUB_NS_NULL) {
+ err = dom_element_set_attribute(
+ (struct dom_element *) node, name,
+ value);
+ dom_string_unref(name);
+ dom_string_unref(value);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL,
+ dom_parser->mctx,
+ "Can't add attribute");
+ goto fail;
+ }
+ } else {
+ err = dom_element_set_attribute_ns(
+ (struct dom_element *) node,
+ dom_namespaces[attributes[i].ns], name,
+ value);
+ dom_string_unref(name);
+ dom_string_unref(value);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL,
+ dom_parser->mctx,
+ "Can't add attribute ns");
+ goto fail;
+ }
+ }
+ }
+
+ return 0;
+
+fail:
+ return 1;
+}
+
+static int set_quirks_mode(void *parser, hubbub_quirks_mode mode)
+{
+ UNUSED(parser);
+ UNUSED(mode);
+
+ return 0;
+}
+
+static int change_encoding(void *parser, const char *charset)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ uint32_t source;
+ const char *name;
+
+ /* If we have an encoding here, it means we are *certain* */
+ if (dom_parser->encoding != NULL) {
+ return 0;
+ }
+
+ /* Find the confidence otherwise (can only be from a BOM) */
+ name = hubbub_parser_read_charset(dom_parser->parser, &source);
+
+ if (source == HUBBUB_CHARSET_CONFIDENT) {
+ dom_parser->encoding_source = ENCODING_SOURCE_DETECTED;
+ dom_parser->encoding = (char *) charset;
+ return 0;
+ }
+
+ /* So here we have something of confidence tentative... */
+ /* http://www.whatwg.org/specs/web-apps/current-work/#change */
+
+ /* 2. "If the new encoding is identical or equivalent to the encoding
+ * that is already being used to interpret the input stream, then set
+ * the confidence to confident and abort these steps." */
+
+ /* Whatever happens, the encoding should be set here; either for
+ * reprocessing with a different charset, or for confirming that the
+ * charset is in fact correct */
+ dom_parser->encoding = charset;
+ dom_parser->encoding_source = ENCODING_SOURCE_META;
+
+ /* Equal encodings will have the same string pointers */
+ return (charset == name) ? 0 : 1;
}
diff --git a/bindings/hubbub/parser.h b/bindings/hubbub/parser.h
index f4c2ac4..b278acb 100644
--- a/bindings/hubbub/parser.h
+++ b/bindings/hubbub/parser.h
@@ -19,9 +19,36 @@ struct dom_document;
typedef struct dom_hubbub_parser dom_hubbub_parser;
+/* The encoding source of the document */
+typedef enum dom_hubub_encoding_source {
+ ENCODING_SOURCE_HEADER,
+ ENCODING_SOURCE_DETECTED,
+ ENCODING_SOURCE_META
+} dom_hubbub_encoding_source;
+
+/* The recommended way to use the parser is:
+ *
+ * dom_hubbub_parser_create(...);
+ * dom_hubbub_parser_parse_chunk(...);
+ * call _parser_chunk any times...
+ * After you have parsed all string,
+ *
+ * dom_hubbub_parser_completed(...);
+ * dom_bubbub_parser_get_document(...);
+ * dom_hubbub_parser_destroy(...);
+ *
+ * Clients should take care of the last three function calls, the
+ * dom_hubbub_parser_get_document() will pass the ownership of the
+ * document to the client. And after that, the parser should be destroyed.
+ * The client can't call any method of this parser after destruction.
+ *
+ * The client must call dom_hubbub_parser_completed() before calling
+ * dom_hubbub_parser_get_document().
+ */
+
/* Create a Hubbub parser instance */
dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
- const char *enc, const char *int_enc,
+ const char *enc, bool fix_enc,
dom_alloc alloc, void *pw, dom_msg msg, void *mctx);
/* Destroy a Hubbub parser instance */
@@ -37,4 +64,7 @@ dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser);
/* Retrieve the created DOM Document */
struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser);
+/* Retrieve the document's encoding */
+const char *dom_hubbub_parser_get_encoding(dom_hubbub_parser *parser,
+ dom_hubbub_encoding_source *source);
#endif
diff --git a/include/dom/dom.h b/include/dom/dom.h
index 4d17a3f..fd8b9d8 100644
--- a/include/dom/dom.h
+++ b/include/dom/dom.h
@@ -35,4 +35,18 @@
#include <dom/core/string.h>
#include <dom/core/text.h>
+typedef enum dom_namespace {
+ DOM_NAMESPACE_NULL = 0,
+ DOM_NAMESPACE_HTML = 1,
+ DOM_NAMESPACE_MATHML = 2,
+ DOM_NAMESPACE_SVG = 3,
+ DOM_NAMESPACE_XLINK = 4,
+ DOM_NAMESPACE_XML = 5,
+ DOM_NAMESPACE_XMLNS = 6,
+
+ DOM_NAMESPACE_COUNT = 7
+} dom_namespace;
+
+extern struct dom_string *dom_namespaces[DOM_NAMESPACE_COUNT];
+
#endif
diff --git a/src/utils/namespace.c b/src/utils/namespace.c
index 8002b8e..444f6a0 100644
--- a/src/utils/namespace.c
+++ b/src/utils/namespace.c
@@ -7,11 +7,12 @@
#include <string.h>
-#include <dom/core/string.h>
+#include <dom/dom.h>
#include "utils/namespace.h"
#include "utils/utils.h"
+
/** XML prefix */
static struct dom_string *xml;
/** XML namespace URI */
@@ -21,6 +22,22 @@ static struct dom_string *xmlns;
/** XMLNS namespace URI */
static struct dom_string *xmlns_ns;
+/** The namespaces strings */
+static const char *namespaces[] = {
+ NULL,
+ "http://www.w3.org/1999/xhtml",
+ "http://www.w3.org/1998/Math/MathML",
+ "http://www.w3.org/2000/svg",
+ "http://www.w3.org/1999/xlink",
+ "http://www.w3.org/XML/1998/namespace",
+ "http://www.w3.org/2000/xmlns/"
+};
+
+/** Maybe use the two same number is ugly */
+struct dom_string *dom_namespaces[DOM_NAMESPACE_COUNT] = {
+ NULL,
+};
+
/**
* Initialise the namespace component
*
@@ -66,6 +83,16 @@ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
return err;
}
+ int i;
+ int size = sizeof(namespaces) / sizeof(namespaces[0]);
+ for (i = 0; i < size; i++) {
+ err = dom_string_create(
+ alloc, pw, (const uint8_t *)namespaces[i],
+ strlen(namespaces[i]), &dom_namespaces[i]);
+ if (err != DOM_NO_ERR)
+ return err;
+ }
+
return DOM_NO_ERR;
}
@@ -81,6 +108,13 @@ dom_exception _dom_namespace_finalise(void)
dom_string_unref(xml_ns);
dom_string_unref(xml);
+ int i;
+ int size = sizeof(namespaces) / sizeof(namespaces[0]);
+ for (i = 0; i < size; i++) {
+ if (dom_namespaces[i] != NULL)
+ dom_string_unref(dom_namespaces[i]);
+ }
+
return DOM_NO_ERR;
}