Daniel Veillard wrote:
> On Fri, Jan 25, 2008 at 02:39:22PM +0100, Julien Charbon wrote:
>> Thus:
>> "&myent;" -> (1) -> "&myent;" -> (2) -> "&myent;"
> 
>   argh, right .... I'm afraid the escaping has been added as an afterthought
> it was not supposed to be that way, oh well, one can still build the 
> complex attrubute values 'by hand' with the help of the API, but I think
> somehow we defeated the initial purpose for the xmlStringGetNodeList() call
> 
>> It's give to me with current libxml2 trunk:
>> $ gcc test-xml-tiny.c -o test-xml-tiny $(xml2-config --cflags) \
>>    $(xml2-config --libs)
>> $ ./test-xml-tiny
>> Only one element in return of xmlStringGetNodeList
>> &foo; &bar; &amp; <tag> &myent </tag> &&
>>
>>   No change. Maybe, historically, it was not always the case...
> 
>   Hum, yes. The only other thing that your suggested change would loose
> are the error message resulting from the validations occuring in 
> xmlEncodeEntitiesReentrant() , problems reported there would go unnoticed
> otherwise. Is that still worth the extra complexity or not, I'm not sure.

  Hum, sure, UTF-8 validation shall not be removed. Anyway to evaluate 
this extra complexity, I made the simple program [see below] that do 
1000 iterations of xmlSetProp(node, name, value) and calculate the sum 
of all these calls with various 'value' parameter:

- With current libxml2 trunk , on my laptop:
[Time is given in second:nanosecond format]

$ ./test-setprop "value"
Time:   0:000980992
$ ./test-setprop "&myent;"
Time:   0:001219099
$ ./test-setprop "&コンニチハ"
Time:   0:004148401

- With patch below [not a final patch, just a patch that verify the 
UTF-8 validity of value, and directly add the XML_TEXT_NODE]

$ ./test-setprop "value"
Time:   0:000794779
$ ./test-setprop "&myent;"
Time:   0:000793037
$ ./test-setprop "&コンニチハ"
Time:   0:000819009

  Thus on my machine it is always faster, and depending on complexity 
of 'value' passed to xmlSetProp(), improvement vary between 20% and 
80%. Because measured times are very short[< 1ms], their standard 
deviation are high, but these numbers give an order of magnitude of 
removed extra complexity.

test-setprop.c is compiled like:

$ gcc -Wall test-setprop.c -o test-setprop \
   $(xml2-config --cflags) $(xml2-config --libs) -lrt

== test-setprop.c ==
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include <libxml/tree.h>

#define NAME "name"
#define ITER 1000

static void usage(const char *progname) {

   printf("Usage : %s <attribut-value>\n", progname);
}

struct timespec diff(struct timespec start, struct timespec end)
{
   struct timespec time;
   if ((end.tv_nsec-start.tv_nsec)<0) {
     time.tv_sec = end.tv_sec-start.tv_sec-1;
     time.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
   } else {
     time.tv_sec = end.tv_sec-start.tv_sec;
     time.tv_nsec = end.tv_nsec-start.tv_nsec;
   }
   return time;
}

int main(int argc, char *argv[]) {

   if (argc != 2) {

     usage(argv[0]);
     return(1);
   }

   LIBXML_TEST_VERSION;

   xmlDocPtr doc = NULL;
   xmlNodePtr root_node = NULL;

   struct timespec start, end, time;

   doc = xmlNewDoc(BAD_CAST "1.0");
   root_node = xmlNewNode(NULL, BAD_CAST "root");
   xmlDocSetRootElement(doc, root_node);

   unsigned long long result_sec = 0;
   unsigned long long result_nsec = 0;

   unsigned int i;
   for(i = 0; i < ITER; i++) {

     clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
     xmlSetProp(root_node, (unsigned char*)NAME, (unsigned char*)argv[1]);
     clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

     time = diff(start,end);
     result_sec += time.tv_sec;
     result_nsec += time.tv_nsec;
   }

   printf("Time:\t%lld:%09lld\n", result_sec, result_nsec);

   xmlFreeDoc(doc);

   return 0;
}
== test-setprop.c ==

Patch applied to current libxml2 trunk:

== tree.c.patch ==
Index: include/libxml/xmlerror.h
===================================================================
--- include/libxml/xmlerror.h   (revision 3681)
+++ include/libxml/xmlerror.h   (working copy)
@@ -398,6 +398,7 @@
      XML_TREE_INVALID_HEX = 1300,
      XML_TREE_INVALID_DEC, /* 1301 */
      XML_TREE_UNTERMINATED_ENTITY, /* 1302 */
+    XML_TREE_NOT_UTF8, /* 1303 */
      XML_SAVE_NOT_UTF8 = 1400,
      XML_SAVE_CHAR_INVALID, /* 1401 */
      XML_SAVE_NO_DOCTYPE, /* 1402 */
Index: tree.c
===================================================================
--- tree.c      (revision 3681)
+++ tree.c      (working copy)
@@ -92,6 +92,9 @@
        case XML_TREE_UNTERMINATED_ENTITY:
            msg = "unterminated entity reference %15s\n";
            break;
+       case XML_TREE_NOT_UTF8:
+           msg = "string is not in UTF-8\n";
+           break;
        default:
            msg = "unexpected error number\n";
      }
@@ -1814,11 +1817,13 @@
          cur->name = name;

      if (value != NULL) {
-        xmlChar *buffer;
          xmlNodePtr tmp;

-        buffer = xmlEncodeEntitiesReentrant(doc, value);
-        cur->children = xmlStringGetNodeList(doc, buffer);
+        if(!xmlCheckUTF8(value)) {
+            xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) doc,
+                       NULL);
+        }
+        cur->children = xmlNewDocText(doc, value);
          cur->last = NULL;
          tmp = cur->children;
          while (tmp != NULL) {
@@ -1827,7 +1832,6 @@
                  cur->last = tmp;
              tmp = tmp->next;
          }
-        xmlFree(buffer);
      }

      /*
@@ -6466,11 +6470,13 @@
        prop->last = NULL;
        prop->ns = ns;
        if (value != NULL) {
-           xmlChar *buffer;
            xmlNodePtr tmp;
        
-           buffer = xmlEncodeEntitiesReentrant(node->doc, value);
-           prop->children = xmlStringGetNodeList(node->doc, buffer);
+           if(!xmlCheckUTF8(value)) {
+               xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) node->doc,
+                          NULL);
+           }
+           prop->children = xmlNewDocText(node->doc, value);
            prop->last = NULL;
            tmp = prop->children;
            while (tmp != NULL) {
@@ -6479,7 +6485,6 @@
                    prop->last = tmp;
                tmp = tmp->next;
            }
-           xmlFree(buffer);
        }
        if (prop->atype == XML_ATTRIBUTE_ID)
            xmlAddID(NULL, node->doc, value, prop);
== tree.c.patch ==

--
Julien

_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
[email protected]
http://mail.gnome.org/mailman/listinfo/xml

Reply via email to