Hi!

----

Attached is a small prototype patch
("ksh93_libxml2_sax_bindings20071205.diff.txt"; it's the same as
attached to http://bugs.grommit.com/attachment.cgi?id=164) which adds
SAX-style XML parsing support to ksh93 via a new "xmlsaxparse" builtin
command.
The idea is to provide a builtin XML parsing facility to handle the
shortcomings of a XML parser written in ksh93 code, e.g. ...
- ... it's difficult to handle encoding conversions (e.g. document in
ASCII, ISO8859-1 or UTF-8 while the shell runs in the current user's
locale. It could be solved via /usr/bin/iconv but this is _slow_ because
it would be done on a per word-basis)
- ... it's slow (assuming the parser should really handle all details of
XML) 
- ... doesn't handle any extra XML features like external entity
support, namespaces, DTD, Schemas etc. unless the ksh93 script code
becomes very large&&complex

AFAIK it may be better to handle the issue via builtin XML support based
on libxml2 which is loaded via libbusybox.so.1

Example usage (the usage assumes some knowledge how the SAX API works):
1. Create an associative array which contains the sames of shell
functions which should be called per SAX callback
2. Load the "xmlsaxparse" builtin
3. Run the "xmlsaxparse" command to parse a specific document
("myfile.xml" in the example below). The builtin will call the matching
callbacks for each SAX event hit.

Example code fragment:
-- snip --
function run_xml_sax
{
    builtin -f libshell.so.1 xmlsaxparse || fatal_error "xmlsaxparse
builtin not found."
    
    typeset -A xcallbacks=(
        ["startelement"]="mystartelement_callback"
        ["startdocument"]="mystartdocument_callback"
        ["enddocument"]="myenddocument_callback"
        ["characters"]="mytextdata_callback"
       )
    
    xmlsaxparse -u "user-defined_data_string" -c xcallbacks "myfile.xml"
}
-- snip --
The callback functions are called with the callback name itself passed
as arg1 (if "xmlsaxparse" is called with the "-c" argument, otherwise
the callback name is obmitted) and a name of a (temporary) compound
variable as arg2 (and the variable "<compoundvar>.userdata" contains the
string passed via the "-u" option to "xmlsaxparse"). The SAX parser will
abort the SAX run if any registered callback function returns a non-zero
return code.

Notes:
- I've modified the "xmldocumenttree1" and "rssread" demos to use the
"xmlsaxparse" builtin if they are called using the "-S" option
- The code automagically converts the document encoding to the current
locale/encoding of the shell, even if LC_*/LANG is changed in one of the
callbacks
- The code is fully re-entrant, e.g. a callback function may itself
start another "xmlsaxparse" run
- The patch is only a technology demonstrator. AFAIK a better way may be
to use libxml2's "xmlreader" API which won't require a complex set of
callbacks. I'll try to post a prototype patch for that either tonight or
later next week

Comments/rants/etc welcome...

----

Bye,
Roland

-- 
  __ .  . __
 (o.\ \/ /.o) roland.mainz at nrubsig.org
  \__\/\/__/  MPEG specialist, C&&JAVA&&Sun&&Unix programmer
  /O /==\ O\  TEL +49 641 7950090
 (;O/ \/ \O;)
-------------- next part --------------
Index: src/lib/libshell/common/fun/xmldocumenttree1
===================================================================
--- src/lib/libshell/common/fun/xmldocumenttree1        (revision 903)
+++ src/lib/libshell/common/fun/xmldocumenttree1        (working copy)
@@ -228,15 +228,76 @@
         fi
     done
 
-    [[ ! -z "${callbacks["document_end"]}" ]] && 
${callbacks["document_start"]} "${1}" "document_end" "exit_success"
+    [[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_end"]} 
"${1}" "document_end" "exit_success"
     
     print # final newline to make filters like "sed" happy
 }
 
+# This is a translation later between the "xmlsaxparse" builtin and the 
original "xml_tok" function
+function xml_tok_builtin_sax_dispatch
+{
+    typeset callback_name="$1"
+    nameref sax_ctx="$2"
+    nameref callbacks="${sax_ctx.userdata}"
+
+#    print -u2 "## numargs=$# args=$@"
+
+    case "${callback_name}" in
+        startelement)
+           typeset s=""
+           if (( $# == 4 )) ; then
+               nameref attrlist=$4
+               integer i
+               
+               for ((i=0 ; i < ${#attrlist[*]} ; i+=2 )) ; do
+                   s+="${attrlist[i]}=\"${attrlist[i+1]}\" "
+               done
+           fi
+            [[ ! -z "${callbacks["tag_begin"]}" ]] && 
${callbacks["tag_begin"]} "${sax_ctx.userdata}" "tag_begin" "${3}" "$s"
+           ;;
+        endelement)
+           [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} 
"${sax_ctx.userdata}" "tag_end" "${3}"
+           ;;
+        startdocument)
+           [[ ! -z "${callbacks["document_start"]}" ]] && 
${callbacks["document_start"]} "${sax_ctx.userdata}" "document_start"
+           ;;
+        enddocument)
+           [[ ! -z "${callbacks["document_end"]}" ]] && 
${callbacks["document_end"]} "${sax_ctx.userdata}" "document_end" "exit_success"
+           ;;
+        comment)
+           [[ ! -z "${callbacks["tag_comment"]}" ]] && 
${callbacks["tag_comment"]} "${sax_ctx.userdata}" "tag_comment" "${3}"
+           ;;
+        characters)
+           [[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} 
"${sax_ctx.userdata}" "tag_text" "${3}"
+           ;;
+
+        *)
+           fatal_error "Unknown callback type ${callback_name}."
+           ;;
+    esac
+
+    return 0
+}
+
+function xml_tok_builtin_sax
+{
+    builtin -f libshell.so.1 xmlsaxparse || fatal_error "xmlsaxparse builtin 
not found."
+    
+    typeset -A xcallbacks=(
+        ["startelement"]="xml_tok_builtin_sax_dispatch"
+        ["endelement"]="xml_tok_builtin_sax_dispatch"
+        ["startdocument"]="xml_tok_builtin_sax_dispatch"
+        ["enddocument"]="xml_tok_builtin_sax_dispatch"
+        ["comment"]="xml_tok_builtin_sax_dispatch"
+        ["characters"]="xml_tok_builtin_sax_dispatch"
+       )
+    
+    xmlsaxparse -u "$1" -c xcallbacks "-"
+}
+
 function print_sample1_xml
 {
 cat <<EOF
-<br />
 <score-partwise instrument="flute1">
         <identification>
             <kaiman>nocrocodile</kaiman>
@@ -249,6 +310,8 @@
             <!-- another
                  comment -->
             <ttt>myttttext</ttt>
+           <eee>&lt;my_ent&gt;</eee>
+           <fff>&amp;lt;my_ent&amp;gt;</fff>
         </partlist>
 </score-partwise>
 EOF
@@ -270,22 +333,27 @@
 typeset progname="$(basename "${0}")"
 
 USAGE=$'
-[-?\n@(#)\$Id: xmldocumenttree1 (Roland Mainz) 2007-10-26 \$\n]
-[-author?Roland Mainz <roland.mainz at nrubsig.org]
+[-?\n@(#)\$Id: xmldocumenttree1 (Roland Mainz) 2007-11-26 \$\n]
+[-author?Roland Mainz <roland.mainz at nrubsig.org>]
 [+NAME?xmldocumenttree1 - XML tree demo]
 [+DESCRIPTION?\bxmldocumenttree\b is a small ksh93 compound variable demo
         which reads a XML input file, converts it into an internal
         variable tree representation and outputs it in the format
         specified by viewmode (either "list", "namelist" or "tree").]
+[S:builtin_sax_tok?Use experimental SAX builtin tokenizer.]
 
 file viewmode
 
 [+SEE ALSO?\bksh93\b(1)]
 '
 
+# define which type of tokenizer we should use
+typeset XML_TOK_FUNC="xml_tok"
+
 while getopts -a "${progname}" "${USAGE}" OPT ; do 
 #    printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
     case ${OPT} in
+        S)    XML_TOK_FUNC="xml_tok_builtin_sax" ;;
         *)    usage ;;
     esac
 done
@@ -323,16 +391,16 @@
 
 
 if [[ "${xmlfile}" = "#sample1" ]] ; then
-    print_sample1_xml | xml_tok document_cb
+    print_sample1_xml | ${XML_TOK_FUNC} document_cb
 elif [[ "${xmlfile}" = "#sample2" ]] ; then
     /usr/sfw/bin/wget \
             --user-agent='ksh93_xmldocumenttree' \
            --output-document=- \
            'http://www.google.com/custom?q=gummi+bears' |
         /usr/bin/iconv -f "ISO8859-1" |
-        xml_tok document_cb
+        ${XML_TOK_FUNC} document_cb
 else
-    cat "${xmlfile}" | xml_tok document_cb
+    cat "${xmlfile}" | ${XML_TOK_FUNC} document_cb
 fi
 
 print -u2 "#parsing completed."
Index: src/lib/libshell/common/fun/rssread
===================================================================
--- src/lib/libshell/common/fun/rssread (revision 903)
+++ src/lib/libshell/common/fun/rssread (working copy)
@@ -147,7 +147,7 @@
     # send HTTP request    
     request="GET /${path} HTTP/1.1\r\n"
     request+="Host: ${host}\r\n"
-    request+="User-Agent: rssread/ksh93 (2007-10-28; $(uname -s -r -p))\r\n"
+    request+="User-Agent: rssread/ksh93 (2007-11-11; $(uname -s -r -p))\r\n"
     request+="Connection: close\r\n"
     print -n -- "${request}\r\n" >&${netfd}
     
@@ -170,12 +170,17 @@
     typeset value
 
     # Todo: Add more HTML/MathML entities here
-    entity_cache["nbsp"]=' '
-    entity_cache["lt"]='<'
-    entity_cache["gt"]='>'
-    entity_cache["amp"]='&'
-    entity_cache["quot"]='"'
-    entity_cache["apos"]="'"
+    entity_cache=(
+        # entity to ascii (fixme: add UTF-8 transliterations)
+       ["nbsp"]=' '
+       ["lt"]='<'
+       ["le"]='<='
+       ["gt"]='>'
+       ["ge"]='>='
+       ["amp"]='&'
+       ["quot"]='"'
+       ["apos"]="'"
+    )
     
     buf=""
     while IFS='' read -r -N 1 c ; do
@@ -195,7 +200,7 @@
                     continue
                     ;;
                 *)
-                    debugmsg "error &${entity}${c}#"
+#                    debugmsg "error &${entity}${c}#"
 
                     print -n -r -- "${entity}${c}"
                     entity=""
@@ -206,7 +211,7 @@
         
         value=""
         if [[ "${entity_cache["${entity}"]}" != "" ]] ; then
-            debugmsg "match #${entity}# = #${entity_cache["${entity}"]}#"
+#            debugmsg "match #${entity}# = #${entity_cache["${entity}"]}#"
             value="${entity_cache["${entity}"]}"
         else
             if [[ "${entity:0:1}" = "#" ]] ; then
@@ -222,7 +227,7 @@
 
             entity_cache["${entity}"]="${value}"
 
-            debugmsg "lookup #${entity}# = #${entity_cache["${entity}"]}#"
+#            debugmsg "lookup #${entity}# = #${entity_cache["${entity}"]}#"
         fi
 
         printf "%s" "${value}"
@@ -300,7 +305,8 @@
                 item)
                     # note that each RSS item needs to be converted seperately 
from RSS to HTML to plain text
                     # to make sure that the state of one RSS item doesn't 
affect others
-                    (
+                    {
+                       print "<div>"
                         printf $"<br />#### RSS item: title: %s ####" 
"${item["title"]}"
                         printf $"<br />## author: %s" "${item["author"]}"
                         printf $"<br />## link:   %s" "${item["link"]}"
@@ -308,11 +314,18 @@
                         printf $"<br />## begin description:"
                         printf $"<br />%s<br />" "${item["description"]}"
                         printf $"<br />## end description<br />"
+                       print "</div>"
                         print # extra newline to make sure the sed pipeline 
gets flushed
-                    ) | 
-                        html_entity_to_ascii |       # convert XML entities 
(e.g. decode RSS content to HTML code)
-                        xml_tok "xhtmltok_cb" |      # convert HTML to plain 
text
-                        html_entity_to_ascii         # convert HTML entities
+                    } | 
+                   {
+                       if [[ "${XML_TOK_FUNC}" = "xml_tok_builtin_sax" ]] ; 
then
+                           ${XML_TOK_FUNC} "xhtmltok_cb"
+                       else
+                           html_entity_to_ascii | # decode RSS (convert RSS 
content to HTML code)
+                           ${XML_TOK_FUNC} "xhtmltok_cb" |
+                           html_entity_to_ascii # decode HTML entities
+                       fi
+                   }
                     ;;
                 title)                item["title"]="${callbacks["textbuf"]}"  
      ; callbacks["textbuf"]="" ;;
                 link)                 item["link"]="${callbacks["textbuf"]}"   
      ; callbacks["textbuf"]="" ;;
@@ -346,7 +359,7 @@
         isendtag=false
         
         if [[ "$c" = "<" ]] ; then
-           # flush any text content
+            # flush any text content
             if [[ "$buf" != "" ]] ; then
                 [[ ! -z "${callbacks["tag_text"]}" ]] && 
${callbacks["tag_text"]} "${1}" "tag_text" "$buf"
                 buf=""
@@ -360,40 +373,40 @@
             fi
             IFS='' read -r -d '>' c
             buf+="$c"
-           
-           # handle comments
-           if [[ "$buf" = ~(El)!-- ]] ; then
-               # did we read the comment completely ?
-               if [[ "$buf" != ~(Elr)!--.*-- ]] ; then
-                   buf+=">"
-                   while [[ "$buf" != ~(Elr)!--.*-- ]] ; do
-                       IFS='' read -r -N 1 c || break
-                       buf+="$c"
-                   done
-               fi
-           
-               [[ ! -z "${callbacks["tag_comment"]}" ]] && 
${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}"
-               buf=""
-               continue
-           fi
-           
-           # check if the tag starts and ends at the same time (like "<br />")
-           if [[ "${buf}" = ~(Er).*/ ]] ; then
-               issingletag=true
-               buf="${buf%*/}"
-           else
-               issingletag=false
-           fi
-           
-           # check if the tag has attributes (e.g. space after name)
-           if [[ "$buf" = ~(E)[[:space:][:blank:]] ]] ; then
-               namebuf="${buf%%~(E)[[:space:][:blank:]].*}"
+            
+            # handle comments
+            if [[ "$buf" = ~(El)!-- ]] ; then
+                # did we read the comment completely ?
+                if [[ "$buf" != ~(Elr)!--.*-- ]] ; then
+                    buf+=">"
+                    while [[ "$buf" != ~(Elr)!--.*-- ]] ; do
+                        IFS='' read -r -N 1 c || break
+                        buf+="$c"
+                    done
+                fi
+            
+                [[ ! -z "${callbacks["tag_comment"]}" ]] && 
${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}"
+                buf=""
+                continue
+            fi
+            
+            # check if the tag starts and ends at the same time (like "<br />")
+            if [[ "${buf}" = ~(Er).*/ ]] ; then
+                issingletag=true
+                buf="${buf%*/}"
+            else
+                issingletag=false
+            fi
+            
+            # check if the tag has attributes (e.g. space after name)
+            if [[ "$buf" = ~(E)[[:space:][:blank:]] ]] ; then
+                namebuf="${buf%%~(E)[[:space:][:blank:]].*}"
                 attrbuf="${buf#~(E).*[[:space:][:blank:]]}"
             else
-               namebuf="$buf"
-               attrbuf=""
-           fi
-           
+                namebuf="$buf"
+                attrbuf=""
+            fi
+            
             if ${isendtag} ; then
                 [[ ! -z "${callbacks["tag_end"]}" ]] && 
${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
             else
@@ -410,11 +423,73 @@
         fi
     done
 
-    [[ ! -z "${callbacks["document_end"]}" ]] && 
${callbacks["document_start"]} "${1}" "document_end" "exit_success"
+    [[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_end"]} 
"${1}" "document_end" "exit_success"
     
     print # final newline to make filters like "sed" happy
 }
 
+# This is a translation later between the "xmlsaxparse" builtin and the 
original "xml_tok" function
+function xml_tok_builtin_sax_dispatch
+{
+    typeset callback_name="$1"
+    nameref sax_ctx="$2"
+    nameref callbacks="${sax_ctx.userdata}"
+
+#    print -u2 "## numargs=$# args=$@"
+
+    case "${callback_name}" in
+        startelement)
+           typeset s=""
+           if (( $# == 4 )) ; then
+               nameref attrlist=$4
+               integer i
+               
+               for ((i=0 ; i < ${#attrlist[*]} ; i+=2 )) ; do
+                   s+="${attrlist[i]}=\"${attrlist[i+1]}\" "
+               done
+           fi
+            [[ ! -z "${callbacks["tag_begin"]}" ]] && 
${callbacks["tag_begin"]} "${sax_ctx.userdata}" "tag_begin" "${3}" "$s"
+           ;;
+        endelement)
+           [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} 
"${sax_ctx.userdata}" "tag_end" "${3}"
+           ;;
+        startdocument)
+           [[ ! -z "${callbacks["document_start"]}" ]] && 
${callbacks["document_start"]} "${sax_ctx.userdata}" "document_start"
+           ;;
+        enddocument)
+           [[ ! -z "${callbacks["document_end"]}" ]] && 
${callbacks["document_end"]} "${sax_ctx.userdata}" "document_end" "exit_success"
+           ;;
+        comment)
+           [[ ! -z "${callbacks["tag_comment"]}" ]] && 
${callbacks["tag_comment"]} "${sax_ctx.userdata}" "tag_comment" "${3}"
+           ;;
+        characters)
+           [[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} 
"${sax_ctx.userdata}" "tag_text" "${3}"
+           ;;
+
+        *)
+           fatal_error "Unknown callback type ${callback_name}."
+           ;;
+    esac
+
+    return 0
+}
+
+function xml_tok_builtin_sax
+{
+    builtin -f libshell.so.1 xmlsaxparse || fatal_error "xmlsaxparse builtin 
not found."
+    
+    typeset -A xcallbacks=(
+        ["startelement"]="xml_tok_builtin_sax_dispatch"
+        ["endelement"]="xml_tok_builtin_sax_dispatch"
+        ["startdocument"]="xml_tok_builtin_sax_dispatch"
+        ["enddocument"]="xml_tok_builtin_sax_dispatch"
+        ["comment"]="xml_tok_builtin_sax_dispatch"
+        ["characters"]="xml_tok_builtin_sax_dispatch"
+       )
+   
+    xmlsaxparse -u "$1" -c xcallbacks "-"
+}
+
 # return the value of LC_MESSAGES needed for subprocesses which
 # want to run in a different locale/encoding
 function get_lc_messages
@@ -441,7 +516,7 @@
         LANG="en_US.UTF-8"
         
     cat_http "$1" |
-        xml_tok "rsstok_cb"
+        ${XML_TOK_FUNC} "rsstok_cb"
 }
 
 function usage
@@ -486,30 +561,36 @@
     ["planetsolaris"]="http://www.planetsolaris.org/rss20.xml";
     ["planetopensolaris"]="http://planet.opensolaris.org/rss20.xml";
     ["theregister_uk"]="http://www.theregister.co.uk/headlines.rss";
+    ["heise"]="http://www.heise.de/newsticker/heise.rdf";
+    ["slashdot"]="http://rss.slashdot.org/Slashdot/slashdot";
 )
 
 typeset progname="$(basename "${0}")"
 
 USAGE=$'
-[-?\n@(#)\$Id: rssread (Roland Mainz) 2007-10-28 \$\n]
-[-author?Roland Mainz <roland.mainz at nrubsig.org]
+[-?\n@(#)\$Id: rssread (Roland Mainz) 2007-12-01 \$\n]
+[-author?Roland Mainz <roland.mainz at nrubsig.org>]
 [+NAME?rssread - fetch RSS messages and convert them to plain text]
 [+DESCRIPTION?\brssread\b RSS to plain text converter
         which fetches RSS streams via HTTP and converts them from
        RSS to HTML to plain text in the current locale/encoding.]
 [I:noiconv?Do not convert data from UTF-8 to current locale/encoding.]
+[S:builtin_sax_tok?Use experimental SAX builtin tokenizer.]
 
 [ url ]
 
 [+SEE ALSO?\bksh93\b(1), \bshnote\b(1)]
 '
 
+# define which type of tokenizer we should use
+typeset XML_TOK_FUNC="xml_tok"
 typeset noiconv=false
 
 while getopts -a "${progname}" "${USAGE}" OPT ; do 
 #    printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
     case ${OPT} in
         I)    noiconv=true ;;
+        S)    XML_TOK_FUNC="xml_tok_builtin_sax" ;;
         *)    usage ;;
     esac
 done
Index: src/lib/libshell/common/bltins/xml.c
===================================================================
--- src/lib/libshell/common/bltins/xml.c        (revision 0)
+++ src/lib/libshell/common/bltins/xml.c        (revision 0)
@@ -0,0 +1,1298 @@
+/***********************************************************************
+*                                                                      *
+*               This software is part of the ast package               *
+*          Copyright (c) 1982-2007 AT&T Intellectual Property          *
+*                      and is licensed under the                       *
+*                  Common Public License, Version 1.0                  *
+*                    by AT&T Intellectual Property                     *
+*                                                                      *
+*                A copy of the License is available at                 *
+*            http://www.opensource.org/licenses/cpl1.0.txt             *
+*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
+*                                                                      *
+*              Information and Software Systems Research               *
+*                            AT&T Research                             *
+*                           Florham Park NJ                            *
+*                                                                      *
+*                  David Korn <dgk at research.att.com>                   *
+*                                                                      *
+***********************************************************************/
+#pragma prototyped
+
+#include       <shell.h>
+#include       <stdio.h>
+#include       <option.h>
+#include       <stk.h>
+#include       <tm.h>
+#include       "name.h"
+#ifndef SH_DICT
+#   define SH_DICT     "libshell"
+#endif
+
+/* libxml functions */
+#include <libxml/xmlmemory.h>
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+
+#include <alloca.h>
+#include <dlfcn.h>
+
+#define sh_contexttoshb(context)       ((Shbltin_t*)(context))
+#define sh_contexttoshell(context)     
((context)?(sh_contexttoshb(context)->shp):(NULL))
+
+/* debug */
+#define D(x)
+
+static const char sh_optxmlsaxparse[] =
+"[-?\n@(#)$Id: xmlsaxparse (AT&T Labs Research) 2007-11-28 $\n]"
+"[-author?Roland Mainz <roland.mainz at nrubsig.org]"
+"[-license?http://www.opensource.org/licenses/cpl1.0.txt]";
+"[+NAME? xmlsaxparse - XML SAX parser interface]"
+"[+DESCRIPTION?test.]"
+"[c:callbacktags?Pass the name of the callbacks as callback argument.]"
+"[u:userdata]:[string?String passed to callback functions via 
context.userdata.]"
+"\n"
+"\nvar filename\n"
+"\n"
+"[+EXIT STATUS?]{"
+        "[+0?Success.]"
+        "[+>0?An error occurred.]"
+"}"
+"[+SEE ALSO?\blibxml\b(3)]"
+;
+
+static
+Namval_t *nv_open_fmt(Dt_t *dict, int flags, const char *namefmt, ...)
+{
+       char    varnamebuff[PATH_MAX];
+       va_list ap;
+
+       va_start(ap, namefmt);
+       vsnprintf(varnamebuff, sizeof(varnamebuff), namefmt, ap);
+       va_end(ap);
+       
+       return nv_open(varnamebuff, dict, flags);
+}
+
+
+static
+int nv_unset_fmt(Dt_t *dict, int flags, const char *namefmt, ...)
+{
+       char      varnamebuff[PATH_MAX];
+       va_list   ap;
+       Namval_t *np;
+
+       va_start(ap, namefmt);
+       vsnprintf(varnamebuff, sizeof(varnamebuff), namefmt, ap);
+       va_end(ap);
+
+       np = nv_open(varnamebuff, dict, flags);
+       if (!np)
+               return FALSE;
+       nv_unset(np);
+       nv_close(np);
+       
+       return TRUE;
+}
+
+static
+int nv_putval_fmt(Dt_t *dict, const char *val, int valflags, int openflags, 
const char *namefmt, ...)
+{
+       char      varnamebuff[PATH_MAX];
+       va_list   ap;
+       Namval_t *np;
+
+       va_start(ap, namefmt);
+       vsnprintf(varnamebuff, sizeof(varnamebuff), namefmt, ap);
+       va_end(ap);
+       
+       np = nv_open(varnamebuff, dict, openflags);
+       if (!np)
+           return FALSE;
+
+        nv_putval(np, val, valflags);
+        nv_close(np);
+       
+       return TRUE;
+}
+
+static
+char *xmlchartomb(const xmlChar *str)
+{
+       char *dest = NULL;
+       iconv_t cd;
+       char *outp;
+       char *p = (char *) str;
+       size_t inbytes_remaining = strlen(p);
+       size_t outbuf_size = (inbytes_remaining + 1) * MB_LEN_MAX/2; /* guess 
for initial size */
+       size_t outbytes_remaining;
+       size_t err;
+       int have_error = FALSE;
+       int save_errno;
+
+       outbytes_remaining = outbuf_size - 1;
+
+       cd = iconv_open (NULL, "UTF-8");
+       if (cd == (iconv_t) -1)
+               return NULL;
+
+       outp = dest = malloc (outbuf_size);
+       if (dest == NULL)
+               goto out;
+
+again:
+       err = iconv (cd, &p, &inbytes_remaining, &outp, &outbytes_remaining);
+
+       if (err == (size_t)-1)
+       {
+               switch (errno)
+               {
+                       case EINVAL:
+                               /* Incomplete text (ignored) */
+                               break;
+
+                       case E2BIG:
+                       {
+                               /* Grow output buffer if it's too small */
+                               size_t used = outp - dest;
+                               size_t newsize = outbuf_size * 2;
+                               char *newdest;
+
+                               if (newsize <= outbuf_size)
+                               {
+                                       errno = ENOMEM;
+                                       have_error = TRUE;
+                                       goto out;
+                               }
+                               newdest = realloc (dest, newsize);
+                               if (newdest == NULL)
+                               {
+                                       have_error = TRUE;
+                                       goto out;
+                               }
+                               dest = newdest;
+                               outbuf_size = newsize;
+
+                               outp = dest + used;
+                               outbytes_remaining = outbuf_size - used - 1;
+
+                               goto again;
+                       }
+                               break;
+
+                       case EILSEQ:
+                               have_error = TRUE;
+                               break;
+
+                       default:
+                               have_error = TRUE;
+                               break;
+               }
+       }
+
+       *outp = '\0';
+
+out:
+       save_errno = errno;
+
+       if (iconv_close (cd) < 0 && !have_error)
+       {
+               save_errno = errno;
+               have_error = TRUE;
+       }
+
+       if (have_error && dest)
+       {
+               free (dest);
+               dest = NULL;
+               errno = save_errno;
+       }
+
+       return dest;
+}
+
+
+typedef void (*shSAXfunc)(void);
+
+typedef struct _shSAXfuncRec
+{
+    const char *name;
+    shSAXfunc  *pfunc;
+    shSAXfunc   sfunc;
+    char *shellfunc;
+} shSAXfuncRec;
+
+/* Function datatype use to load |xmlSAXParseFileWithData| via |dlsym()| */
+typedef xmlDocPtr (*shxmlSAXParseFileWithDataFunc) (xmlSAXHandlerPtr sax,
+                                        const char *filename,
+                                        int recovery,
+                                        void *data);
+/* Function datatype use to load |xmlStopParser| via |dlsym()| */
+typedef void (*shxmlStopParserFunc) (xmlParserCtxtPtr ctxt);                   
                
+
+typedef struct _shxmlcontext
+{
+    void         *shextra;
+    shSAXfuncRec *shxmlfunctions;
+    int           ctxvar_init; /* was the context variable created yet ? */
+    char          ctxnamebuff[128];
+    char         *userdata; /* userdata passed as argument */
+    int           do_callbacknames; /* pass names of callbacks as argument to 
callbacks */
+
+    /* functions loaded via |dlsym()| */
+    shxmlStopParserFunc xmlstopparser;
+} shxmlcontext;
+
+#define XMLCTX2SHXMLCTX(ctx) ((shxmlcontext *)(((xmlParserCtxt 
*)ctx)->_private))
+
+static
+int shCreateSAXctxCompoundVar(void *ctx)
+{
+    shxmlcontext *shxmlctx = XMLCTX2SHXMLCTX(ctx);
+    Shell_t      *shp      = sh_contexttoshell(shxmlctx->shextra);
+    Namval_t     *np;
+
+    sprintf(shxmlctx->ctxnamebuff, "sax_ctx%lx", (long)ctx);
+
+    np = nv_open_fmt(shp->var_tree, NV_VARNAME|NV_NOFAIL, "%s", 
shxmlctx->ctxnamebuff);
+    if (!np)
+       return FALSE;
+    nv_setvtree(np);
+    nv_close(np);
+
+    (void)nv_putval_fmt(shp->var_tree, shxmlctx->userdata, 0, 
NV_VARNAME|NV_NOFAIL, "%s.userdata", shxmlctx->ctxnamebuff);
+
+    return TRUE;
+}
+
+static
+void shDestroySAXctxCompoundVar(shxmlcontext *shxmlctx)
+{
+       Shell_t  *shp = sh_contexttoshell(shxmlctx->shextra);
+       Namval_t *np;
+
+       if (shxmlctx->ctxnamebuff[0] == '\0')
+               return;
+       
+       /* remove temporary variable "attsvarname" */
+       (void)nv_unset_fmt(shp->var_tree, NV_VARNAME|NV_NOFAIL, "%s", 
shxmlctx->ctxnamebuff);
+}
+
+static
+int shSAXcommon(void *ctx, shSAXfunc f, int *ac, char **av)
+{
+    shxmlcontext *shxmlctx = (shxmlcontext *)(((xmlParserCtxt 
*)ctx)->_private);
+    int           i;
+    char         *name     = NULL,
+                 *funcname = NULL;
+
+    if (!shxmlctx->ctxvar_init)
+    {
+        shxmlctx->ctxvar_init = shCreateSAXctxCompoundVar(ctx);
+    }
+
+    for(i=0 ; shxmlctx->shxmlfunctions[i].name != NULL ; i++)
+    {
+        if (shxmlctx->shxmlfunctions[i].sfunc == f)
+       {
+           name     = (char *)shxmlctx->shxmlfunctions[i].name;
+           funcname = shxmlctx->shxmlfunctions[i].shellfunc;
+       }
+    }
+
+    if (!name || !funcname)
+        return 1;
+
+    av[(*ac)++]=funcname;              /* shell function */
+    if (shxmlctx->do_callbacknames)
+        av[(*ac)++]=name;              /* type of callback */
+    av[(*ac)++]=shxmlctx->ctxnamebuff; /* context pointer */
+
+    return 0;
+}
+
+/**
+ * resolveEntity:
+ * @ctx:  the user data (XML parser context)
+ * @publicId: The public ID of the entity
+ * @systemId: The system ID of the entity
+ *
+ * Callback:
+ * The entity loader, to control the loading of external entities,
+ * the application can either:
+ *    - override this resolveEntity() callback in the SAX block
+ *    - or better use the xmlSetExternalEntityLoader() function to
+ *      set up it's own entity resolution routine
+ *
+ * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
+ */
+static
+xmlParserInputPtr shSAXresolveEntity(void *ctx,
+                                    const xmlChar *publicId,
+                                    const xmlChar *systemId)
+{
+    return NULL ; /* dummy */
+}
+                               
+/**
+ * internalSubset:
+ * @ctx:  the user data (XML parser context)
+ * @name:  the root element name
+ * @ExternalID:  the external ID
+ * @SystemID:  the SYSTEM ID (e.g. filename or URL)
+ *
+ * Callback on internal subset declaration.
+ */
+static
+void shSAXinternalSubset (void *ctx,
+                         const xmlChar *name,
+                         const xmlChar *ExternalID,
+                         const xmlChar *SystemID)
+{
+    D(printf("#shSAXinternalSubset\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname,
+         *lexternalid,
+        *lsystemid;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXinternalSubset, &ac, av))
+        return;
+       
+    lname = xmlchartomb(name);
+    lexternalid = xmlchartomb(ExternalID);
+    lsystemid = xmlchartomb(SystemID);
+
+    av[ac++]=lname;
+    av[ac++]=lexternalid;
+    av[ac++]=lsystemid;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lsystemid);
+    free(lexternalid);
+    free(lname);
+}
+
+/**
+ * externalSubset:
+ * @ctx:  the user data (XML parser context)
+ * @name:  the root element name
+ * @ExternalID:  the external ID
+ * @SystemID:  the SYSTEM ID (e.g. filename or URL)
+ *
+ * Callback on external subset declaration.
+ */
+static
+void shSAXexternalSubset (void *ctx,
+                         const xmlChar *name,
+                         const xmlChar *ExternalID,
+                         const xmlChar *SystemID)
+{
+    D(printf("#shSAXexternalSubset\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname,
+         *lexternalid,
+        *lsystemid;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXexternalSubset, &ac, av))
+        return;
+   
+    lname = xmlchartomb(name);
+    lexternalid = xmlchartomb(ExternalID);
+    lsystemid = xmlchartomb(SystemID);
+
+    av[ac++]=lname;
+    av[ac++]=lexternalid;
+    av[ac++]=lsystemid;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lsystemid);
+    free(lexternalid);
+    free(lname);
+}
+
+/**
+ * getEntity:
+ * @ctx:  the user data (XML parser context)
+ * @name: The entity name
+ *
+ * Get an entity by name.
+ *
+ * Returns the xmlEntityPtr if found.
+ */
+static
+xmlEntityPtr shSAXgetEntity (void *ctx,
+                            const xmlChar *name)
+{
+    return NULL ; /* dummy */
+}
+
+/**
+ * getParameterEntity:
+ * @ctx:  the user data (XML parser context)
+ * @name: The entity name
+ *
+ * Get a parameter entity by name.
+ *
+ * Returns the xmlEntityPtr if found.
+ */
+static
+xmlEntityPtr shSAXgetParameterEntity(void *ctx,
+                                    const xmlChar *name)
+{
+    return NULL ; /* dummy */
+}
+
+/**
+ * entityDecl:
+ * @ctx:  the user data (XML parser context)
+ * @name:  the entity name 
+ * @type:  the entity type 
+ * @publicId: The public ID of the entity
+ * @systemId: The system ID of the entity
+ * @content: the entity value (without processing).
+ *
+ * An entity definition has been parsed.
+ */
+static
+void shSAXentityDecl (void *ctx,
+                     const xmlChar *name,
+                     int type,
+                     const xmlChar *publicId,
+                     const xmlChar *systemId,
+                     xmlChar *content)
+{
+}
+
+/**
+ * notationDecl:
+ * @ctx:  the user data (XML parser context)
+ * @name: The name of the notation
+ * @publicId: The public ID of the entity
+ * @systemId: The system ID of the entity
+ *
+ * What to do when a notation declaration has been parsed.
+ */
+static
+void shSAXnotationDecl(void *ctx,
+                      const xmlChar *name,
+                      const xmlChar *publicId,
+                      const xmlChar *systemId)
+{
+    D(printf("#shSAXnotationDecl\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname,
+         *lpublicld,
+        *lsystemid;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXnotationDecl, &ac, av))
+        return;
+       
+    lname = xmlchartomb(name);
+    lpublicld = xmlchartomb(publicId);
+    lsystemid = xmlchartomb(systemId);
+
+    av[ac++]=lname;
+    av[ac++]=lpublicld;
+    av[ac++]=lsystemid;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lsystemid);
+    free(lpublicld);
+    free(lname);
+}
+
+/**
+ * attributeDecl:
+ * @ctx:  the user data (XML parser context)
+ * @elem:  the name of the element
+ * @fullname:  the attribute name 
+ * @type:  the attribute type 
+ * @def:  the type of default value
+ * @defaultValue: the attribute default value
+ * @tree:  the tree of enumerated value set
+ *
+ * An attribute definition has been parsed.
+ */
+static
+void shSAXattributeDecl(void *ctx,
+                       const xmlChar *elem,
+                       const xmlChar *fullname,
+                       int type,
+                       int def,
+                       const xmlChar *defaultValue,
+                       xmlEnumerationPtr tree)
+{
+}
+
+/**
+ * elementDecl:
+ * @ctx:  the user data (XML parser context)
+ * @name:  the element name 
+ * @type:  the element type 
+ * @content: the element value tree
+ *
+ * An element definition has been parsed.
+ */
+static
+void shSAXelementDecl(void *ctx,
+                     const xmlChar *name,
+                     int type,
+                     xmlElementContentPtr content)
+{
+}
+
+/**
+ * unparsedEntityDecl:
+ * @ctx:  the user data (XML parser context)
+ * @name: The name of the entity
+ * @publicId: The public ID of the entity
+ * @systemId: The system ID of the entity
+ * @notationName: the name of the notation
+ *
+ * What to do when an unparsed entity declaration is parsed.
+ */
+static
+void shSAXunparsedEntityDecl(void *ctx,
+                            const xmlChar *name,
+                            const xmlChar *publicId,
+                            const xmlChar *systemId,
+                            const xmlChar *notationName)
+{
+    D(printf("#shSAXunparsedEntityDecl\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname,
+         *lpublicld,
+        *lsystemid,
+        *lnotationname;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXunparsedEntityDecl, &ac, av))
+        return;
+
+    lname = xmlchartomb(name);
+    lpublicld = xmlchartomb(publicId);
+    lsystemid = xmlchartomb(systemId);
+    lnotationname = xmlchartomb(notationName);
+
+    av[ac++]=lname;
+    av[ac++]=lpublicld;
+    av[ac++]=lsystemid;
+    av[ac++]=lnotationname;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+
+    free(lnotationname);
+    free(lsystemid);
+    free(lpublicld);
+    free(lname);
+}
+
+/**
+ * setDocumentLocator:
+ * @ctx:  the user data (XML parser context)
+ * @loc: A SAX Locator
+ *
+ * Receive the document locator at startup, actually xmlDefaultSAXLocator.
+ * Everything is available on the context, so this is useless in our case.
+ */
+static
+void shSAXsetDocumentLocator(void *ctx,
+                            xmlSAXLocatorPtr loc)
+{
+}
+
+
+/**
+ * startDocument:
+ * @ctx:  the user data (XML parser context)
+ *
+ * Called when the document start being processed.
+ */
+static
+void shSAXstartDocument (void *ctx)
+{
+    D(printf("#shSAXstartDocument\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXstartDocument, &ac, av))
+        return;
+
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+}
+
+/**
+ * endDocument:
+ * @ctx:  the user data (XML parser context)
+ *
+ * Called when the document end has been detected.
+ */
+static
+void shSAXendDocument(void *ctx)
+{
+    D(printf("#shSAXendDocument\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXendDocument, &ac, av))
+        return;
+
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+}
+
+/**
+ * startElement:
+ * @ctx:  the user data (XML parser context)
+ * @name:  The element name, including namespace prefix
+ * @atts:  An array of name/value attributes pairs, NULL terminated
+ *
+ * Called when an opening tag has been processed.
+ */
+static
+void shSAXstartElement(void *ctx,
+                       const xmlChar *name,
+                       const xmlChar **atts)
+{
+    Shell_t *shp = sh_contexttoshell(((shxmlcontext *)(((xmlParserCtxt 
*)ctx)->_private))->shextra);
+    D(printf("#shSAXstartElement name='%s'\n", name));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *s;
+    char *lname;
+    char *attsvarname,
+          attsvarnamebuff[256];
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXstartElement, &ac, av))
+        return;
+
+    lname = xmlchartomb(name);
+
+    register Namval_t *np;
+    register int n;
+    
+    if (atts)
+    {
+        attsvarname = attsvarnamebuff;
+       sprintf(attsvarname, "se_tmp%lx_%lx", (long)ctx, (long)atts);
+       for(n=0 ; atts[n] != NULL ; n++)
+       {
+            np = nv_open_fmt(shp->var_tree, NV_VARNAME|NV_NOFAIL, "%s[%d]", 
attsvarname, n);
+           if (!np)
+               return;
+            nv_putval(np, (const char *)atts[n], 0);
+            nv_close(np);
+       }
+    }
+    else
+    {
+        attsvarname = NULL;
+    }
+
+    av[ac++]=lname;
+    av[ac++]=attsvarname;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+
+    /* Unset temporary attrs array */
+    if (attsvarname)
+    {
+        /* remove temporary variable "attsvarname" */
+        (void)nv_unset_fmt(shp->var_tree, NV_VARNAME|NV_NOFAIL, "%s", 
attsvarname);
+
+    }
+    
+    free(lname);
+}
+
+/**
+ * endElement:
+ * @ctx:  the user data (XML parser context)
+ * @name:  The element name
+ *
+ * Called when the end of an element has been detected.
+ */
+static
+void shSAXendElement(void *ctx, const xmlChar *name)
+{
+    D(printf("#shSAXendElement\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXendElement, &ac, av))
+        return;
+
+    lname = xmlchartomb(name);
+
+    av[ac++]=lname;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lname);
+}
+
+/**
+ * attribute:
+ * @ctx:  the user data (XML parser context)
+ * @name:  The attribute name, including namespace prefix
+ * @value:  The attribute value
+ *
+ * Handle an attribute that has been read by the parser.
+ * The default handling is to convert the attribute into an
+ * DOM subtree and past it in a new xmlAttr element added to
+ * the element.
+ */
+static
+void shSAXattribute(void *ctx,
+                   const xmlChar *name,
+                   const xmlChar *value)
+{
+    D(printf("#shSAXattribute\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname,
+         *lvalue;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXattribute, &ac, av))
+        return;
+
+    lname = xmlchartomb(name);
+    lvalue = xmlchartomb(value);
+
+    av[ac++]=lname;
+    av[ac++]=lvalue;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lvalue);
+    free(lname);
+}
+
+/**
+ * reference:
+ * @ctx:  the user data (XML parser context)
+ * @name:  The entity name
+ *
+ * Called when an entity reference is detected. 
+ */
+static
+void shSAXreference(void *ctx, const xmlChar *name)
+{
+    D(printf("#shSAXreference\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lname;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXreference, &ac, av))
+        return;
+
+    lname = xmlchartomb(name);
+
+    av[ac++]=lname;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lname);
+}
+
+/**
+ * characters:
+ * @ctx:  the user data (XML parser context)
+ * @ch:  a xmlChar string
+ * @len: the number of xmlChar
+ *
+ * Receiving some chars from the parser.
+ */
+static
+void shSAXcharacters (void *ctx,
+                     const xmlChar *ch,
+                     int len)
+{
+    D(printf("#shSAXcharacters\n"));
+   
+    char      *av[10];
+    int       ac=0;
+    int       exitcode;
+    xmlChar  *chbuff = alloca(len+1); /* copy of |ch| terminated with '\0' */
+    char     *lch; /* copy of |chbuff| in current locale*/
+    char      num_charsbuff[64];
+    size_t    num_chars = 0;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXcharacters, &ac, av))
+        return;
+
+    /* Create '\0'-terminated copy of |ch| ... */
+    memcpy(chbuff, ch, len);
+    chbuff[len]='\0';
+    
+    /* ... and then convert it from UTF-8 to current encoding ... */
+    lch = xmlchartomb(chbuff);
+    /* ... and count the number of multibyte characters */
+    num_chars = mbstowcs(NULL, lch, 0);
+    sprintf(num_charsbuff, "%d", num_chars);
+    
+    av[ac++]=lch;
+    av[ac++]=num_charsbuff;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lch);
+}
+
+
+/**
+ * ignorableWhitespace:
+ * @ctx:  the user data (XML parser context)
+ * @ch:  a xmlChar string
+ * @len: the number of xmlChar
+ *
+ * Receiving some ignorable whitespaces from the parser.
+ * UNUSED: by default the DOM building will use characters.
+ */
+static
+void shSAXignorableWhitespace (void *ctx,
+                              const xmlChar *ch,
+                              int len)
+{
+    D(printf("#shSAXignorableWhitespace\n"));
+   
+    char      *av[10];
+    int       ac=0;
+    int       exitcode;
+    xmlChar  *chbuff = alloca(len+1); /* copy of |ch| terminated with '\0' */
+    char     *lch; /* copy of |chbuff| in current locale*/
+    char      num_charsbuff[64];
+    size_t    num_chars = 0;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXignorableWhitespace, &ac, av))
+        return;
+
+    /* Create '\0'-terminated copy of |ch| ... */
+    memcpy(chbuff, ch, len);
+    chbuff[len]='\0';
+    
+    /* ... and then convert it from UTF-8 to current encoding ... */
+    lch = xmlchartomb(chbuff);
+    /* ... and count the number of multibyte characters */
+    num_chars = mbstowcs(NULL, lch, 0);
+    sprintf(num_charsbuff, "%d", num_chars);
+    
+    av[ac++]=lch;
+    av[ac++]=num_charsbuff;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lch);
+}
+
+/**
+ * processingInstruction:
+ * @ctx:  the user data (XML parser context)
+ * @target:  the target name
+ * @data: the PI data's
+ *
+ * A processing instruction has been parsed.
+ */
+static
+void shSAXprocessingInstruction(void *ctx,
+                               const xmlChar *target,
+                               const xmlChar *data)
+{
+    D(printf("#shSAXprocessingInstruction\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *ltarget,
+         *ldata;
+        
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXprocessingInstruction, &ac, av))
+        return;
+
+    ltarget = xmlchartomb(target);
+    ldata = xmlchartomb(data);
+
+    av[ac++]=ltarget;
+    av[ac++]=ldata;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(ldata);
+    free(ltarget);
+}
+
+/**
+ * comment:
+ * @ctx:  the user data (XML parser context)
+ * @value:  the comment content
+ *
+ * A comment has been parsed.
+ */
+static
+void shSAXcomment(void *ctx, const xmlChar *value)
+{
+    D(printf("#shSAXcomment\n"));
+   
+    char *av[10];
+    int   ac=0;
+    int   exitcode;
+    char *lvalue;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXcomment, &ac, av))
+        return;
+       
+    lvalue = xmlchartomb(value);
+
+    av[ac++]=lvalue;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lvalue);
+}
+
+/**
+ * cdataBlock:
+ * @ctx:  the user data (XML parser context)
+ * @value:  The pcdata content
+ * @len:  the block length
+ *
+ * Called when a pcdata block has been parsed.
+ */
+static
+void shSAXcdataBlock (void *ctx,
+                     const xmlChar *value,
+                     int len)
+{
+    D(printf("#shSAXcdataBlock\n"));
+   
+    char      *av[10];
+    int       ac=0;
+    int       exitcode;
+    xmlChar  *chbuff = alloca(len+1); /* copy of |ch| terminated with '\0' */
+    char     *lch; /* copy of |chbuff| in current locale*/
+    char      num_charsbuff[64];
+    size_t    num_chars = 0;
+
+    if(shSAXcommon(ctx, (shSAXfunc)shSAXcdataBlock, &ac, av))
+        return;
+
+    /* Create '\0'-terminated copy of |ch| ... */
+    memcpy(chbuff, value, len);
+    chbuff[len]='\0';
+    
+    /* ... and then convert it from UTF-8 to current encoding ... */
+    lch = xmlchartomb(chbuff);
+    /* ... and count the number of multibyte characters */
+    num_chars = mbstowcs(NULL, lch, 0);
+    sprintf(num_charsbuff, "%d", num_chars);
+    
+    av[ac++]=lch;
+    av[ac++]=num_charsbuff;
+    av[ac++]=NULL;
+
+    /* call user function */
+    exitcode = sh_run(ac, av);
+    if (exitcode != 0)
+       XMLCTX2SHXMLCTX(ctx)->xmlstopparser(ctx);
+    
+    free(lch);
+}
+
+/**
+ * warning:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format a warning messages, callback.
+ */
+static
+void shSAXwarning (void *ctx, const char *msg, ...)
+{
+}
+
+/**
+ * error:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format an error messages, callback.
+ */
+static
+void shSAXerror (void *ctx, const char *msg, ...)
+{
+}
+
+/**
+ * fatalError:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format fatal error messages, callback.
+ * Note: so far fatalError() SAX callbacks are not used, error()
+ *       get all the callbacks for errors.
+ */
+static
+void shSAXfatalError (void *ctx, const char *msg, ...)
+{
+}
+
+/**
+ * isStandalone:
+ * @ctx:  the user data (XML parser context)
+ *
+ * Is this document tagged standalone?
+ *
+ * Returns 1 if true
+ */
+static
+int shSAXisStandalone (void *ctx)
+{
+    return 0 ; /* dummy */
+}
+
+/**
+ * hasInternalSubset:
+ * @ctx:  the user data (XML parser context)
+ *
+ * Does this document has an internal subset.
+ *
+ * Returns 1 if true
+ */
+static
+int shSAXhasInternalSubset (void *ctx)
+{
+    return 0 ; /* dummy */
+}
+
+/**
+ * hasExternalSubset:
+ * @ctx:  the user data (XML parser context)
+ *
+ * Does this document has an external subset?
+ *
+ * Returns 1 if true
+ */
+static
+int shSAXhasExternalSubset (void *ctx)
+{
+    return 0 ; /* dummy */
+}
+
+extern int b_xmlsaxparse(int argc, char *argv[], void *extra)
+{
+       register Namval_t *np;
+       register int n;
+       Shell_t *shp = sh_contexttoshell(extra);
+       char *varname,
+            *filename;
+       int i;
+       char *s;
+       xmlSAXHandler parser;
+
+       memset(&parser, 0, sizeof(parser));
+
+       shSAXfuncRec shSAXfunctions[] =
+       {
+           { "internalsubset",          (shSAXfunc*)&parser.internalSubset,    
    (shSAXfunc)shSAXinternalSubset,         NULL },
+           { "isstandalone",            (shSAXfunc*)&parser.isStandalone,      
    (shSAXfunc)shSAXisStandalone,           NULL },
+           { "hasinternalsubset",       (shSAXfunc*)&parser.hasInternalSubset, 
    (shSAXfunc)shSAXhasInternalSubset,      NULL },
+           { "hasexternalsubset",       (shSAXfunc*)&parser.hasExternalSubset, 
    (shSAXfunc)shSAXhasExternalSubset,      NULL },
+           { "resolveentity",           (shSAXfunc*)&parser.resolveEntity,     
    (shSAXfunc)shSAXresolveEntity,          NULL },
+           { "getentity",               (shSAXfunc*)&parser.getEntity,         
    (shSAXfunc)shSAXgetEntity,              NULL },
+           { "entitydecl",              (shSAXfunc*)&parser.entityDecl,        
    (shSAXfunc)shSAXentityDecl,             NULL },
+           { "notationdecl",            (shSAXfunc*)&parser.notationDecl,      
    (shSAXfunc)shSAXnotationDecl,           NULL },
+           { "attributedecl",           (shSAXfunc*)&parser.attributeDecl,     
    (shSAXfunc)shSAXattributeDecl,          NULL },
+           { "elementdecl",             (shSAXfunc*)&parser.elementDecl,       
    (shSAXfunc)shSAXelementDecl,            NULL },
+           { "unparsedentitydecl",      
(shSAXfunc*)&parser.unparsedEntityDecl,    (shSAXfunc)shSAXunparsedEntityDecl,  
   NULL },
+           { "setdocumentlocator",      
(shSAXfunc*)&parser.setDocumentLocator,    (shSAXfunc)shSAXsetDocumentLocator,  
   NULL },
+           { "startdocument",           (shSAXfunc*)&parser.startDocument,     
    (shSAXfunc)shSAXstartDocument,          NULL },
+           { "enddocument",             (shSAXfunc*)&parser.endDocument,       
    (shSAXfunc)shSAXendDocument,            NULL },
+           { "startelement",            (shSAXfunc*)&parser.startElement,      
    (shSAXfunc)shSAXstartElement,           NULL },
+           { "endelement",              (shSAXfunc*)&parser.endElement,        
    (shSAXfunc)shSAXendElement,             NULL },
+           { "reference",               (shSAXfunc*)&parser.reference,         
    (shSAXfunc)shSAXreference,              NULL },
+           { "characters",              (shSAXfunc*)&parser.characters,        
    (shSAXfunc)shSAXcharacters,             NULL },
+           { "ignorablewhitespace",     
(shSAXfunc*)&parser.ignorableWhitespace,   (shSAXfunc)shSAXignorableWhitespace, 
   NULL },
+           { "processinginstruction",   
(shSAXfunc*)&parser.processingInstruction, 
(shSAXfunc)shSAXprocessingInstruction,  NULL },
+           { "comment",                 (shSAXfunc*)&parser.comment,           
    (shSAXfunc)shSAXcomment,                NULL },
+           { "warning",                 (shSAXfunc*)&parser.warning,           
    (shSAXfunc)shSAXwarning,                NULL },
+           { "error",                   (shSAXfunc*)&parser.error,             
    (shSAXfunc)shSAXerror,                  NULL },
+           { "fatalerror",              (shSAXfunc*)&parser.fatalError,        
    (shSAXfunc)shSAXfatalError,             NULL },
+           { "getparameterentity",      
(shSAXfunc*)&parser.getParameterEntity,    (shSAXfunc)shSAXgetParameterEntity,  
   NULL },
+           { "cdatablock",              (shSAXfunc*)&parser.cdataBlock,        
    (shSAXfunc)shSAXcdataBlock,             NULL },
+           { "externalsubset",          (shSAXfunc*)&parser.externalSubset,    
    (shSAXfunc)shSAXexternalSubset,         NULL }, 
+           { NULL,                      (shSAXfunc*)NULL,                      
    (shSAXfunc)NULL,                        NULL }, 
+       };
+
+        /* context shared between this function and callbacks */
+       shxmlcontext shxmlctx;
+       memset(&shxmlctx, 0, sizeof(shxmlctx));
+       shxmlctx.shextra          = extra;
+       shxmlctx.shxmlfunctions   = &shSAXfunctions[0];
+       shxmlctx.do_callbacknames = FALSE;
+       shxmlctx.ctxvar_init      = FALSE;
+       shxmlctx.userdata         = "";
+       
+       while (n = optget(argv, sh_optxmlsaxparse)) switch (n)
+       {
+           case 'c':
+               shxmlctx.do_callbacknames = TRUE;
+               break;
+           case 'u':
+               shxmlctx.userdata = opt_info.arg;
+               break;
+           case ':':
+               errormsg(SH_DICT, 2, "%s", opt_info.arg);
+               break;
+           case '?':
+               errormsg(SH_DICT, ERROR_usage(2), "%s", opt_info.arg);
+               break;
+       }
+       argc -= opt_info.index;
+       argv += opt_info.index;
+       if(argc!=2)
+               errormsg(SH_DICT, ERROR_usage(2), optusage((char*)0));
+
+        varname  = argv[0];
+        filename = argv[1];
+       
+       if (!strcmp(filename, "/dev/stdin"))
+               filename="-";
+
+       /* Load some libxml2 symbols */
+       shxmlSAXParseFileWithDataFunc parse;
+       void *dl_xml2_handle = dlopen("libxml2.so.2", RTLD_LAZY);
+       if (!dl_xml2_handle)
+               errormsg(SH_DICT, ERROR_system(1), "cannot open libxml2.so.2");
+       parse                  = 
(shxmlSAXParseFileWithDataFunc)dlsym(dl_xml2_handle, "xmlSAXParseFileWithData");
+       shxmlctx.xmlstopparser = (shxmlStopParserFunc)dlsym(dl_xml2_handle, 
"xmlStopParser");
+       if (!parse || !shxmlctx.xmlstopparser)
+       {
+               dlclose(dl_xml2_handle);
+               errormsg(SH_DICT, ERROR_system(1), "cannot find required 
symbols in libxml2.so.2");
+       }
+
+       for(i=0 ; shSAXfunctions[i].name != NULL ; i++)
+       {
+               const char *functionname = shSAXfunctions[i].name;
+               const char *val;
+               
+               np = nv_open_fmt(shp->var_tree, NV_VARNAME|NV_NOFAIL|NV_NOADD, 
"%s[%s]", varname, functionname);
+               if (!np)
+                   continue;
+
+               val = nv_getval(np);
+               if (!val)
+               {
+                   nv_close(np);
+                   continue;
+               }
+
+               shSAXfunctions[i].shellfunc = strdup(val);
+               *(shSAXfunctions[i].pfunc) = shSAXfunctions[i].sfunc;
+               D(printf("setting callback '%s' to '%s'\n", functionname, val));
+               nv_close(np);
+        }
+       
+        (*parse)(&parser, filename, 1, (void *)&shxmlctx);
+
+       for(i=0 ; shSAXfunctions[i].name != NULL ; i++)
+       {
+           if (shSAXfunctions[i].shellfunc)
+               free(shSAXfunctions[i].shellfunc);
+        }
+
+       dlclose(dl_xml2_handle);
+       
+       /* Unset (temporary) SAX context */
+       shDestroySAXctxCompoundVar(&shxmlctx);
+
+       return(0);
+}
+
Index: src/lib/libshell/Makefile.com
===================================================================
--- src/lib/libshell/Makefile.com       (revision 903)
+++ src/lib/libshell/Makefile.com       (working copy)
@@ -47,6 +47,7 @@
        bltins/ulimit.o \
        bltins/umask.o \
        bltins/whence.o \
+       bltins/xml.o \
        data/aliases.o \
        data/builtins.o \
        data/keywords.o \
@@ -153,6 +154,10 @@
        $(CCVERBOSE) \
        -xstrconst
 
+# extra rule for XML glue
+pics/bltins/xml.o      := CPPFLAGS += -I/usr/include/libxml2
+
+# turn errors off
 pics/sh/io.o           := CPPFLAGS += -D_SYS_VMEM_H
 pics/sh/nvdisc.o       := CERRWARN += -erroff=E_END_OF_LOOP_CODE_NOT_REACHED
 
Index: src/lib/libshell/mapfile-vers
===================================================================
--- src/lib/libshell/mapfile-vers       (revision 903)
+++ src/lib/libshell/mapfile-vers       (working copy)
@@ -165,7 +165,8 @@
        b_unset;
        b_vpath;
        b_wait;
-       b_whence;       
+       b_whence;
+       b_xmlsaxparse;
     local:
        *;
 };

Reply via email to