Hi guys,
I needed the ability to add a table to an existing ODF spreadsheet and
decided that the best way would be to use a streaming solution based on
xmlreader and xmlwriter. I was surprised how difficult this was to
write and wondered if anyone had any comments on my code (either for
correctness or for simplicity).
Many thanks,
Ali.
/*
* This is a program to test the concept of an XML Streamer.
* Ref: http://blogs.msdn.com/mfussell/archive/2005/02/12/371546.aspx
*/
#include <stdlib.h>
#include <string.h>
#include <glib.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlwriter.h>
#undef DEBUG
static const xmlChar *build_qname(const char *prefix,const char *name)
{
static xmlChar *buf=0;
static int buflen=0;
int len;
if (!prefix)
{
if (!name && buf)
{
g_free((char *)buf);
buf=NULL;
buflen=0;
}
return BAD_CAST name;
}
else
{
len=strlen(prefix)+strlen(name)+2;
if (buflen<len)
{
g_free((char *)buf);
buflen=len*2;
buf=BAD_CAST g_malloc(buflen);
}
sprintf(buf,"%s:%s",prefix,name);
return buf;
}
}
GSList *stream_element(xmlTextWriter *dst,xmlTextReader *src,
GTree *namespaces,GSList *elements)
{
int i,r,type,count;
GPtrArray *array=NULL;
const char *uri,*prefix;
uri=xmlTextReaderConstNamespaceUri(src);
if (uri && *uri)
prefix=g_tree_lookup(namespaces,uri);
else
prefix=NULL;
if (prefix)
xmlTextWriterStartElement(dst,build_qname(prefix,
xmlTextReaderConstLocalName(src)));
else if (uri && *uri)
{
prefix=xmlTextReaderConstPrefix(src);
array=g_ptr_array_new();
g_ptr_array_add(array,(gpointer)uri);
g_tree_insert(namespaces,(gpointer)uri,(gpointer)prefix);
xmlTextWriterStartElementNS(dst,prefix,xmlTextReaderConstLocalName(src),
uri);
}
else
xmlTextWriterStartElement(dst,xmlTextReaderConstLocalName(src));
count=xmlTextReaderAttributeCount(src);
for(i=0;i<count;i++)
{
xmlTextReaderMoveToAttributeNo(src,i);
uri=xmlTextReaderConstNamespaceUri(src);
if (uri && *uri)
prefix=g_tree_lookup(namespaces,uri);
else
prefix=NULL;
if (prefix)
{
if (!strcmp(prefix,"xmlns"))
{
uri=xmlTextReaderConstValue(src);
if (uri && *uri)
{
prefix=g_tree_lookup(namespaces,uri);
if (!prefix)
{
prefix=xmlTextReaderConstLocalName(src);
if (!array)
array=g_ptr_array_new();
g_ptr_array_add(array,(gpointer)uri);
g_tree_insert(namespaces,(gpointer)uri,
(gpointer)prefix);
xmlTextWriterStartAttribute(dst,
build_qname("xmlns",prefix));
}
else
continue;
}
else
/* xmlns:prefix="" is illegal XML; drop it */
continue;
}
else
xmlTextWriterStartAttribute(dst,
build_qname(prefix,
xmlTextReaderConstLocalName(src)));
}
else if (uri && *uri)
{
prefix=xmlTextReaderConstPrefix(src);
if (!array)
array=g_ptr_array_new();
g_ptr_array_add(array,(gpointer)uri);
g_tree_insert(namespaces,(gpointer)uri,
(gpointer)prefix);
xmlTextWriterStartAttributeNS(dst,prefix,
xmlTextReaderConstLocalName(src),uri);
}
else
xmlTextWriterStartAttribute(dst,
xmlTextReaderConstLocalName(src));
while ((r=xmlTextReaderReadAttributeValue(src))==1)
{
type=xmlTextReaderNodeType(src);
#ifdef DEBUG
fprintf(stderr,"=== Node type %d\n",type);
#endif
switch(type)
{
case XML_READER_TYPE_TEXT:
case XML_READER_TYPE_WHITESPACE:
case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
xmlTextWriterWriteString(dst,
xmlTextReaderConstValue(src));
break;
case XML_READER_TYPE_ENTITY_REFERENCE:
xmlTextWriterWriteFormatRaw(dst,"&%s;",
xmlTextReaderConstName(src));
break;
}
}
xmlTextWriterEndAttribute(dst);
}
if (count)
xmlTextReaderMoveToElement(src);
if (xmlTextReaderIsEmptyElement(src))
{
if (array)
{
for(i=0;i<array->len;i++)
g_tree_remove(namespaces,
g_ptr_array_index(array,i));
g_ptr_array_free(array,TRUE);
}
xmlTextWriterEndElement(dst);
}
else
elements=g_slist_prepend(elements,array);
return elements;
}
int stream(xmlTextWriter *dst,xmlTextReader *src)
{
int i,r,type;
GTree *namespaces; /* key: URI, value: prefix */
GSList *elements=NULL; /* Tail is root, Head is current element */
GPtrArray *array; /* Array of keys */
const char *uri,*prefix;
gboolean doc_started=FALSE;
xmlNode *node;
xmlDtd *dtd;
xmlBuffer *buf;
namespaces=g_tree_new((GCompareFunc)strcmp);
g_tree_insert(namespaces,"http://www.w3.org/XML/1998/namespace","xml");
g_tree_insert(namespaces,"http://www.w3.org/2000/xmlns/","xmlns");
xmlTextReaderSetParserProp(src,XML_PARSER_LOADDTD,0);
xmlTextReaderSetParserProp(src,XML_PARSER_DEFAULTATTRS,0);
xmlTextReaderSetParserProp(src,XML_PARSER_VALIDATE,0);
xmlTextReaderSetParserProp(src,XML_PARSER_SUBST_ENTITIES,0);
while ((r=xmlTextReaderRead(src))==1)
{
type=xmlTextReaderNodeType(src);
if (!doc_started && type==XML_READER_TYPE_ELEMENT ||
type==XML_READER_TYPE_DOCUMENT || type==XML_READER_TYPE_DOCUMENT_TYPE)
{
i=xmlTextReaderStandalone(src);
xmlTextWriterStartDocument(dst,
xmlTextReaderConstXmlVersion(src),
xmlTextReaderConstEncoding(src),
i>0?"yes":i==0?"no":NULL);
doc_started=TRUE;
}
#ifdef DEBUG
fprintf(stderr,"*** Node type %d\n",type);
#endif
switch(type)
{
case XML_READER_TYPE_ELEMENT:
elements=stream_element(dst,src,namespaces,elements);
break;
case XML_READER_TYPE_TEXT:
case XML_READER_TYPE_WHITESPACE:
case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
xmlTextWriterWriteString(dst,xmlTextReaderConstValue(src));
break;
case XML_READER_TYPE_CDATA:
xmlTextWriterWriteCDATA(dst,xmlTextReaderConstValue(src));
break;
case XML_READER_TYPE_ENTITY_REFERENCE:
xmlTextWriterWriteFormatRaw(dst,"&%s;",
xmlTextReaderConstName(src));
break;
case XML_READER_TYPE_XML_DECLARATION:
/* Not generated by libxml version 2.6.27 */
break;
case XML_READER_TYPE_PROCESSING_INSTRUCTION:
xmlTextWriterWritePI(dst,xmlTextReaderConstName(src),
xmlTextReaderConstValue(src));
break;
case XML_READER_TYPE_DOCUMENT_TYPE:
node=xmlTextReaderCurrentNode(src);
if (node->type!=XML_DTD_NODE)
xmlTextWriterWriteDTD(dst,xmlTextReaderConstName(src),
NULL,NULL,NULL);
else
{
dtd=(xmlDtd *)node;
buf=xmlBufferCreate();
if (dtd->entities)
xmlDumpEntitiesTable(buf,
(xmlEntitiesTable *)dtd->entities);
if (dtd->pentities)
xmlDumpEntitiesTable(buf,
(xmlEntitiesTable *)dtd->pentities);
/* This is not quite right in that it expands entities,
* but I don't know how to do better.
*/
if (dtd->elements)
xmlDumpElementTable(buf,
(xmlElementTable *)dtd->elements);
if (dtd->attributes)
xmlDumpAttributeTable(buf,
(xmlAttributeTable *)dtd->attributes);
if (dtd->notations)
xmlDumpNotationTable(buf,
(xmlNotationTable *)dtd->notations);
xmlTextWriterWriteDTD(dst,dtd->name,dtd->ExternalID,
dtd->SystemID,*buf->content?buf->content:NULL);
xmlBufferFree(buf);
}
break;
case XML_READER_TYPE_COMMENT:
xmlTextWriterWriteComment(dst,xmlTextReaderConstValue(src));
break;
case XML_READER_TYPE_END_ELEMENT:
array=elements->data;
elements=g_slist_remove_link(elements,elements);
if (array)
{
for(i=0;i<array->len;i++)
g_tree_remove(namespaces,g_ptr_array_index(array,i));
g_ptr_array_free(array,TRUE);
}
xmlTextWriterEndElement(dst);
break;
case XML_READER_TYPE_NONE:
case XML_READER_TYPE_ATTRIBUTE:
case XML_READER_TYPE_ENTITY:
case XML_READER_TYPE_DOCUMENT:
case XML_READER_TYPE_DOCUMENT_FRAGMENT:
case XML_READER_TYPE_NOTATION:
case XML_READER_TYPE_END_ENTITY:
break;
}
}
if (doc_started)
xmlTextWriterEndDocument(dst);
return r;
}
main()
{
int r;
xmlTextWriter *dst;
xmlTextReader *src;
src=xmlReaderForFd(0,NULL,NULL,XML_PARSE_NONET);
dst=xmlNewTextWriterFilename("/dev/stdout",0);
r=stream(dst,src);
xmlFreeTextReader(src);
xmlFreeTextWriter(dst);
exit(r);
}
_______________________________________________
xml mailing list, project page http://xmlsoft.org/
[email protected]
http://mail.gnome.org/mailman/listinfo/xml