On Wed, 2009-03-04 at 22:44 +0800, Bo Yang wrote:
This is mostly fine, thanks. A few more issues remain, however.
Comments below.
> diff --git a/bindings/hubbub/parser.c b/bindings/hubbub/parser.c
> index 7b5e6ab..bd8062f 100644
> --- a/bindings/hubbub/parser.c
> +++ b/bindings/hubbub/parser.c
>
> /**
> * Create a Hubbub parser instance
> *
> * \param aliases Path to encoding alias mapping file
> * \param enc Source charset, or NULL
> - * \param int_enc Desired charset of document buffer (UTF-8 or
> UTF-16)
> + * \param fix_enc Whether fix the encoding
> * \param alloc Memory (de)allocation function
> * \param pw Pointer to client-specific private data
> * \param msg Informational message function
> * \param mctx Pointer to client-specific private data
> * \return Pointer to instance, or NULL on memory exhaustion
> + * Create the hubbub parsing context
You probably want to lose the last line here.
> -/* Parse a chunk of data */
> dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser
> *parser,
> uint8_t *data, size_t len)
> {
> hubbub_error err;
>
> err = hubbub_parser_parse_chunk(parser->parser, data, len);
> - if (err != HUBBUB_OK) {
> - parser->msg(DOM_MSG_ERROR, parser->mctx,
> - "hubbub_parser_parse_chunk failed: %
> d", err);
> + if (err == HUBBUB_ENCODINGCHANGE)
> return DOM_HUBBUB_HUBBUB_ERR | err;
> - }
I think we want to return the other errors, too. So make the condition
if (err != HUBBUB_OK).
> +static int reparent_children(void *parser, void *node, void
> *new_parent)
> +{
> + dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
> + dom_exception err;
> + struct dom_node *child, *result;
> +
> + while(true) {
> + err = dom_node_get_first_child((struct dom_node *)
> node,
> + &child);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL,
> dom_parser->mctx,
> + "Error in
> dom_note_get_first_child");
> + goto fail;
Just return 1 here.
> + }
> + if (child == NULL)
> + break;
Child's reference count is 1.
> + err = dom_node_remove_child(node, (struct dom_node *)
> child,
> + &result);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL,
> dom_parser->mctx,
> + "Error in
> dom_node_remove_child");
> + goto fail;
> + }
Child's reference count is 2 (as result == child)
Unref child here => child's reference count is 1.
> + err = dom_node_append_child((struct dom_node *)
> new_parent,
> + (struct dom_node *) child, &result);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL,
> dom_parser->mctx,
> + "Error in
> dom_node_append_child");
> + goto fail;
> + }
> + }
Child's reference count is 2.
Unref child here => child's reference count is 1.
Unref child here => child's reference count is 0.
> + return 0;
> +
> +fail:
Unref child here => child's reference count is 0.
> + return 1;
> }
>
> +static int get_parent(void *parser, void *node, bool element_only,
> + void **result)
> {
> + dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
> + dom_exception err;
> + struct dom_node *parent;
> + dom_node_type type = DOM_NODE_TYPE_COUNT;
> +
> + err = dom_node_get_parent_node((struct dom_node *) node,
> + &parent);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
> + "Error in dom_node_get_parent");
> + goto fail;
> + }
> + if (element_only == false)
> + return 0;
This needs to be:
if (element_only == false) {
*result = parent;
return 0;
}
Parent's reference count is 1.
> + err = dom_node_get_node_type((struct dom_node *) node, &type);
We want the type of the parent here. So s/node/parent/
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
> + "Error in dom_node_get_type");
> + goto fail;
> + }
> + if (type == DOM_ELEMENT_NODE)
> + return 0;
> + else
> + *result = NULL;
> +
This needs to be:
if (type == DOM_ELEMENT_NODE) {
*result = parent;
return 0;
} else {
*result = NULL;
dom_node_unref((struct dom_node *) parent);
return 0;
}
> +fail:
Unref parent here.
> + return 1;
> +}
>
> +static int add_attributes(void *parser, void *node,
> + const hubbub_attribute *attributes, uint32_t
> n_attributes)
> +{
> + dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
> + dom_exception err;
> + uint32_t i;
> +
> + for (i = 0; i < n_attributes; i++) {
> + struct dom_string *name, *value;
> + err = dom_string_create(dom_parser->alloc,
> dom_parser->pw,
> + attributes[i].name.ptr,
> + attributes[i].name.len, &name);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL,
> dom_parser->mctx,
> + "Can't create attribute
> name");
> + goto fail;
> + }
> +
> + err = dom_string_create(dom_parser->alloc,
> dom_parser->pw,
> + attributes[i].value.ptr,
> + attributes[i].value.len, &name);
s/name/value/
> + if (attributes[i].ns == HUBBUB_NS_NULL) {
> + err = dom_element_set_attribute(
> + (struct dom_element *) node,
> name,
> + value);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL,
> + dom_parser->mctx,
> + "Can't add
> attribute");
> + dom_string_unref(name);
> + dom_string_unref(value);
> + goto fail;
> + }
> + } else {
> + err = dom_element_set_attribute_ns(
> + (struct dom_element *) node,
> + dom_namespaces[attributes[i].ns],
> name,
> + value);
> + if (err != DOM_NO_ERR) {
> + dom_parser->msg(DOM_MSG_CRITICAL,
> + dom_parser->mctx,
> + "Can't add attribute
> ns");
> + dom_string_unref(name);
> + dom_string_unref(value);
> + goto fail;
> + }
> }
In both of the above cases, we want to unref name & value regardless of
whether attribute setting was successful. Otherwise, they'll have
incorrect reference counts. So, I'd move the calls to dom_string_unref()
to before the if (err != DOM_NO_ERR) { lines.
> diff --git a/bindings/hubbub/parser.h b/bindings/hubbub/parser.h
> index f4c2ac4..3896bfa 100644
> --- a/bindings/hubbub/parser.h
> +++ b/bindings/hubbub/parser.h
> @@ -19,9 +19,29 @@ struct dom_document;
>
> typedef struct dom_hubbub_parser dom_hubbub_parser;
>
> +/* The recommend way to use the parser is:
recommended
> + *
> + * dom_hubbub_parser_create(...);
> + * dom_hubbub_parser_parse_chunk(...);
> + * call the _parser_chunk any times...
* call _parse_chunk any times...
> + * After you have parse all string,
parsed
> + *
> + * dom_hubbub_parser_completed(...);
> + * dom_bubbub_parser_get_document(...);
> + * dom_hubbub_parser_destroy(...);
> + *
> + * Clien should take care of the last three function calls, the
Clients
> + * dom_hubbub_parser_get_document() will pass the onwership of the
ownership
> + * document to the client. And after that, the parser should be
> destroy.
destroyed
> + * The client can't call any method of this parser.
s/parser/parser after destruction/
> + *
> + * And client must call dom_hubbub_parser_completed() before calling
> + * dom_hubbub_parser_get_document().
s/And/The/
> diff --git a/include/dom/dom.h b/include/dom/dom.h
> index 4d17a3f..49f52a6 100644
> --- a/include/dom/dom.h
> +++ b/include/dom/dom.h
> @@ -35,4 +35,6 @@
> #include <dom/core/string.h>
> #include <dom/core/text.h>
I'd add the following here:
typedef enum dom_namespace {
DOM_NAMESPACE_NULL = 0,
DOM_NAMESPACE_HTML = 1,
DOM_NAMESPACE_MATHML = 2,
DOM_NAMESPACE_SVG = 3,
DOM_NAMESPACE_XLINK = 4,
DOM_NAMESPACE_XML = 5,
DOM_NAMESPACE_XMLNS = 6,
DOM_NAMESPACE_COUNT = 7
} dom_namespace;
> +extern struct dom_string *dom_namespaces[];
s/[]/[DOM_NAMESPACE_COUNT]/
> #endif
> diff --git a/src/utils/namespace.c b/src/utils/namespace.c
> index 8002b8e..2defda6 100644
> --- a/src/utils/namespace.c
> +++ b/src/utils/namespace.c
> @@ -12,6 +12,7 @@
> #include "utils/namespace.h"
> #include "utils/utils.h"
>
> +
> /** XML prefix */
> static struct dom_string *xml;
> /** XML namespace URI */
> @@ -21,6 +22,22 @@ static struct dom_string *xmlns;
> /** XMLNS namespace URI */
> static struct dom_string *xmlns_ns;
>
> +/** The namespaces strings */
> +static const char *namespaces[] = {
> + NULL,
> + "http://www.w3.org/1999/xhtml",
> + "http://www.w3.org/1998/Math/MathML",
> + "http://www.w3.org/2000/svg",
> + "http://www.w3.org/1999/xlink",
> + "http://www.w3.org/XML/1998/namespace",
> + "http://www.w3.org/2000/xmlns/"
> +};
> +
> +/** Maybe use the two same number is ugly */
> +struct dom_string *dom_namespaces[7] = {
> + NULL,
> +};
> +
s/7/DOM_NAMESPACE_COUNT/
J.