On Mon, May 16, 2005 at 02:04:44PM +0200, [EMAIL PROTECTED] wrote:
> Hi,
>
> > Von: [EMAIL PROTECTED]
> > Datum: Mon, 16 May 2005 13:59:34 +0200 (MEST)
>
> [...]
>
> > I attached a first sketch for an adopt function. It tries the "oldNs"
>
> I did not. But now.
Quick comments on it:
> [...]
>
> Kasimier
> static int
> xmlDOMWrapAdoptNode(void *ctxt, xmlDocPtr sourceDoc, xmlDocPtr destDoc,
> xmlNodePtr node, xmlNodePtr parent, int unlink)
> {
sourceDoc is redundant, can be extracted from node->doc
parent should be optional NULL would be similar to the real DOM function
Error handling should be designed. A simple -1 error code back is not really
suitable for the kind of complex operation that is being designed here.
> int ret = 0;
> xmlNodePtr cur, curElem, par;
> xmlNsPtr *nsList = NULL;
> int nbNs, sizeNs, sameDict;
> xmlNsPtr ns;
>
> if (node == NULL)
> return(-1);
> switch (node->type) {
> case XML_DOCUMENT_NODE:
> case XML_HTML_DOCUMENT_NODE:
XML_HTML_DOCUMENT_NODE and XML_DOCUMENT_NODE may not generate an error...
I could think of a semantic for this, need to be checked against DOM.
> #ifdef LIBXML_DOCB_ENABLED
> case XML_DOCB_DOCUMENT_NODE:
> #endif
> case XML_DOCUMENT_TYPE_NODE:
> case XML_NOTATION_NODE:
> case XML_DTD_NODE:
> case XML_ELEMENT_DECL:
> case XML_ATTRIBUTE_DECL:
> case XML_ENTITY_DECL:
> case XML_ENTITY_NODE:
> return (-1);
> default:
> break;
> }
> sameDict = ((sourceDoc->dict == destDoc->dict) &&
> (destDoc->dict != NULL)) ? 1 : 0;
> cur = node;
if parent != NULL collect existing inscope namespaces
> /*
> * TODO: Unlink.
> */
> while (cur != NULL) {
> switch (cur->type) {
> case XML_ELEMENT_NODE:
> curElem = cur;
> /* No break on purpose. */
> case XML_ATTRIBUTE_NODE:
> /*
> * Adopt the localName.
> */
> if (! sameDict) {
Wrong you need to check xmlDictOwns(sourceDoc->dict, cur->name)
too or you are gonna leak cur->name if the node was added manually
> if (destDoc->dict)
> cur->name = xmlDictLookup(destDoc->dict, cur->name, -1);
> else if (sourceDoc->dict)
> cur->name = BAD_CAST xmlStrdup(cur->name);
> /*
> * TODO: Are namespace declarations ever in a dict?
> */
> }
> /*
> * Adopt out-of-scope namespace declarations.
> */
> if (cur->ns != NULL) {
> int i, j;
I would rather use a hash table than comparing all namespaces string
> /*
> * Did we come across this declaration already?
> */
> if (nsList != NULL) {
> for (i = 0, j = 0; i < nbNs; i++, j += 2) {
> if (nsList[j] == cur->ns) {
> /*
> * If the entry is NULL, then the ns declaration
> * is in scope.
> */
> if (nsList[++j] != NULL)
> cur->ns = nsList[j];
> goto ns_adopt_done;
> }
> }
> }
> if (ctxt == NULL) {
> /*
> * Default behaviour: lookup if not in scope; if so,
> * then pick or add a ns decl. using oldNs of xmlDoc.
> */
> /*
> * Is the namespace declaration in scope?
> */
> if (curElem != NULL) {
> par = curElem;
> do {
> if ((par->type == XML_ELEMENT_NODE) &&
> (par->nsDef != NULL)) {
> ns = par->nsDef;
> do {
> if (ns == cur->ns) {
> /*
> * In scope; add a mapping.
> */
> ns = NULL;
> goto ns_add_mapping;
> }
> ns = ns->next;
> } while (ns != NULL);
> }
> par = par->parent;
> } while (par != node);
> }
> /*
> * No luck, the namespace will be out of scope if the
> * node is unlinked; anchor it temporarily on the
> * xmlDoc.
> */
> ns = destDoc->oldNs;
> while (ns != NULL) {
> if ((((ns->prefix == NULL) &&
> (cur->ns->prefix == NULL)) ||
> ((ns->prefix != NULL) &&
> xmlStrEqual(ns->prefix, cur->ns->prefix))) &&
> xmlStrEqual(ns->href, cur->ns->href)) {
>
> goto ns_add_mapping;
> }
> if (ns->next == NULL)
> break;
> ns = ns->next;
> }
> /*
> * Again, no luck; add a namespace declaration to oldNs.
> */
> if (ns == NULL) {
> /*
> * Libxml2 expects the XML namespace to be
> * in oldNs.
> */
> ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
> if (ns == NULL) {
> xmlTreeErrMemory(
> "allocating temporary namespace");
> goto internal_error;
> }
> memset(ns, 0, sizeof(xmlNs));
> ns->type = XML_LOCAL_NAMESPACE;
> ns->href = xmlStrdup(XML_XML_NAMESPACE);
> ns->prefix = xmlStrdup(
> (const xmlChar *)"xml");
> destDoc->oldNs = ns;
> }
> ns->next = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
> if (ns->next == NULL) {
> xmlTreeErrMemory(
> "allocating temporary namespace");
> goto internal_error;
> }
> ns = ns->next;
>
> memset(ns, 0, sizeof(xmlNs));
> ns->type = XML_LOCAL_NAMESPACE;
> if (cur->ns->prefix != NULL)
> ns->prefix = xmlStrdup(cur->ns->prefix);
> ns->href = xmlStrdup(cur->ns->href);
> } else {
> /*
> * User-defined behaviour.
> */
you can't do that. ctxt need to be refined to be actually useful, a
void * won't work. And adding 2 args might be just a bit too much, this need
more thinking
> #if 0
> ctxt->aquireNsDecl(ctxt, cur->ns, &ns);
> #endif
> }
>
> ns_add_mapping:
> if (nsList == NULL) {
> nsList = (xmlNsPtr *) xmlMalloc(10 *
> sizeof(xmlNsPtr));
> if (nsList == NULL) {
> xmlTreeErrMemory(
> "allocating namespace map");
> goto internal_error;
> }
> nbNs = 0;
> sizeNs = 5;
> } else if (nbNs >= sizeNs) {
> sizeNs *= 2;
> nsList = (xmlNsPtr *) xmlRealloc(nsList,
> sizeNs * 2 * sizeof(xmlNsPtr));
> if (nsList == NULL) {
> xmlTreeErrMemory(
> "re-allocating namespace map");
> goto internal_error;
> }
> }
> nsList[nbNs *2] = cur->ns;
> nsList[nbNs *2 +1] = ns;
> nbNs++;
> if (ns != NULL)
> cur->ns = ns;
> }
I would really rather use a dictionnary for nsList it would be way cleaner.
the only problem is that it would require a trick like a function recursion
when encountering a namespace deactivation like xmlns="" or xmlns:foo=""
or namespace redefinition to a diferent value but that quite unfrequent.
> ns_adopt_done:
> cur->doc = destDoc;
> if (cur->type == XML_ELEMENT_NODE) {
> cur->psvi = NULL;
> cur->line = 0;
> cur->extra = 0;
> /*
> * Attributes.
> */
> if (cur->properties != NULL) {
> cur = (xmlNodePtr) cur->properties;
> continue;
> }
> } else {
> ((xmlAttrPtr) cur)->atype = 0;
> ((xmlAttrPtr) cur)->psvi = 0;
> }
>
> break;
> case XML_TEXT_NODE:
> case XML_CDATA_SECTION_NODE:
> /*
> * TODO: When to adopt the content?
> */
use xmlDictOwn to check !
> goto internal_error;
> break;
> case XML_XINCLUDE_START:
> case XML_XINCLUDE_END:
> /* TODO */
> goto internal_error;
> break;
should not generate an error but be ingnored instead
> case XML_ENTITY_REF_NODE:
> /*
> * TODO: Remove entity child nodes.
> */
> goto internal_error;
> break;
forces a recursion see other examples of recursive tree walk with
entities references. Potentially a lookup of the entity being ref'ed
from the target document. XInclude has a semantic for such entities
remapping might use the same.
> case XML_ENTITY_NODE:
> case XML_NOTATION_NODE:
> /*
> * TODO: Remove those nodes.
> */
> goto internal_error;
> break;
> case XML_PI_NODE:
> case XML_COMMENT_NODE:
> /*
> * TODO: Adopt something?
> */
> goto internal_error;
> break;
> case XML_DOCUMENT_FRAG_NODE:
> break;
> default:
> break;
Hum, I seems to have missed handling XML_ELEMENT_NODE especially the
part handling nsDef on those.
> }
> /*
> * Walk the brach.
> */
> if (cur->children != NULL) {
> cur = cur->children;
> continue;
> }
>
> next_sibling:
> if (cur == node)
> break;
> if (cur->next != NULL)
> cur = cur->next;
> else {
> cur = cur->parent;
> goto next_sibling;
> }
> }
>
> return (ret);
>
> internal_error:
> if (nsList != NULL)
> xmlFree(nsList);
> return (-1);
> }
Obviously lot of thinking and testing need to be carried on. I would really
like to get something we can finally rely on and not half of solutions.
Thanks a lot for starting the effort though there is obviously some work
left :-)
Daniel
--
Daniel Veillard | Red Hat Desktop team http://redhat.com/
[EMAIL PROTECTED] | libxml GNOME XML XSLT toolkit http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/
_______________________________________________
xml mailing list, project page http://xmlsoft.org/
[email protected]
http://mail.gnome.org/mailman/listinfo/xml