Package: libxml2-dev
Version: 2.7.8.dfsg-2

        [While filing a bug against an older version of the package, I
        don't seem to find anything in the Debian changelog of
        2.7.8.dfsg-2+squeeze1 that'd suggest that any change was made to
        the behavior described below.]

        The xmlSaveUri () function appears to format the URI's with
        :-delimited paths incorrectly, by adding a superfluous // (which
        is simple to resolve, see below), and %-encoding the :'s
        themselves (much harder, I guess), effectively preventing
        urn:-scheme URN's from being used.  (As in: catalogs.)

        Consider the output of the example program (MIME'd):

URI             urn:example:animal:ferret:nose
scheme          urn
opaque          (null)
authority       (null)
server          (null)
user            (null)
port            0
path            example:animal:ferret:nose
query           (null)
fragment        (null)
cleanup         0
query_raw       (null)
xmlSaveUri      urn://example%3Aanimal%3Aferret%3Anose

        Cf. the example in RFC 3986, section 3 [1]:

--cut--
         foo://example.com:8042/over/there?name=ferret#nose
         \_/   \______________/\_________/ \_________/ \__/
          |           |            |            |        |
       scheme     authority       path        query   fragment
          |   _____________________|__
         / \ /                        \
         urn:example:animal:ferret:nose
--cut--

        As the example URN has no authority part, it shouldn't have the
        // separator either.

[1] http://tools.ietf.org/html/rfc3986#section-3

        The relevant parts of the code (as of 2ee91eb6) seem to be:

   999  xmlChar *
  1000  xmlSaveUri(xmlURIPtr uri) {
×
  1019      if (uri->scheme != NULL) {
× formatting the scheme×
  1047      }
  1048      if (uri->opaque != NULL) {
×
  1072      } else {
  1073          if (uri->server != NULL) {
× adding //[USER@]SERVER[:PORT]
  1161          } else if (uri->authority != NULL) {
× adding //AUTHORITY
  1203          } else if (uri->scheme != NULL) {
×
  1216              ret[len++] = '/';
  1217              ret[len++] = '/';

        Here, we've added the superfluous // part.  Arguably, it should
        only be done for the file: scheme, and even then, it may worth
        using an explicit empty string for uri->server instead.

  1218          }
  1219          if (uri->path != NULL) {
×
  1245              while (*p != 0) {
×
  1258                  if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
  1259                      ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == 
'&')) ||
  1260                      ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == 
'$')) ||
  1261                      ((*(p) == ',')))

        Note that the :'s aren't in the list above.

  1262                      ret[len++] = *p++;
  1263                  else {
  1264                      int val = *(unsigned char *)p++;
  1265                      int hi = val / 0x10, lo = val % 0x10;
  1266                      ret[len++] = '%';
  1267                      ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  1268                      ret[len++] = lo + (lo > 9? 'A'-10 : '0');

        And here, we're %-encoding the :'s.

        This issue is, however, harder to overcome, unless
        uri->cleanup |= 2 is done before parsing, as
        xmlURIUnescapeString () will be called on the path part of the
        URI, thus effectively making %-encoded :'s indistinguishable
        from the :'s used as URN path delimiters.

  1269                  }
  1270              }
  1271          }

-- 
FSF associate member #7257
/*** b87fb7f6-2b9c-11e1-aa29-001966aaa0b6.c  -*- C -*- */

/*** Code: */
#include <assert.h>             /* for assert () */
#include <stdio.h>              /* for printf () */

#include <libxml/uri.h>
#include <libxml/xmlstring.h>   /* for xmlChar */

int
main ()
{
  /* NB: as per the example from RFC 3986, section 3;
     http://tools.ietf.org/html/rfc3986#section-3 */
  const char *uri_s
    = "urn:example:animal:ferret:nose";

  xmlURIPtr uri
    = xmlParseURI (uri_s);

  assert (uri != 0);

  char *uri_saved_s
    = xmlSaveUri (uri);

  assert (uri_saved_s != 0);

  /* NB: assume that printf () is able to %s-format NULL */
  printf (("%-15s %s\n"
           "%-15s %s\n" "%-15s %s\n" "%-15s %s\n" "%-15s %s\n"
           "%-15s %s\n" "%-15s %d\n" "%-15s %s\n" "%-15s %s\n"
           "%-15s %s\n" "%-15s %d\n" "%-15s %s\n"
           "%-15s %s\n"),
          "URI",        uri_s,          /* -1 */
          "scheme",     uri->scheme,    /* 0 */
          "opaque",     uri->opaque,
          "authority",  uri->authority,
          "server",     uri->server,
          "user",       uri->user,      /* 4 */
          "port",       uri->port,      /* NB: %d */
          "path",       uri->path,
          "query",      uri->query,
          "fragment",   uri->fragment,  /* 8 */
          "cleanup",    uri->cleanup, /* NB: %d */
          "query_raw",  uri->query_raw,
          "xmlSaveUri", uri_saved_s);

  /* . */
  return 0;
}

/*** b87fb7f6-2b9c-11e1-aa29-001966aaa0b6.c ends here */

Reply via email to