Hello libxml-ruby's SAX parser interface is only of limited use currently. The reason is that the document to be parsed must be passed as a whole. This is somewhat contrary to the actual reason for the use of a SAX parser. The attached patch adds a new constructor to use the parser in the so-called "push mode" where it won't complain about the document to be unfinished when parsing subsequent parts of it.
My goal was the parsing of a network XML stream (XMPP). Here, the document is being provided continually over time and I need the SAX events long before the document ends. :-) Unfortunately, the libxml parser seems to buffer a lot and I don't receive all events of the current string immediately. This behaviour renders it unsuitable for XMPP and I will stick to the apparently antique expat binding unless someone comes up with a better idea. Stephan
>From 7b70b3f4bd2c212c7750ca22f175b1510451825b Mon Sep 17 00:00:00 2001 From: Stephan Maka <[EMAIL PROTECTED]> Date: Sat, 27 Sep 2008 00:32:07 +0200 Subject: [PATCH] ruby_xml_sax_parser_new_push_parser --- ext/libxml/ruby_xml_sax_parser.c | 50 +++++++++++++++++++++++++++++++++++-- ext/libxml/ruby_xml_sax_parser.h | 1 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/ext/libxml/ruby_xml_sax_parser.c b/ext/libxml/ruby_xml_sax_parser.c index 26921c6..2515dbf 100644 --- a/ext/libxml/ruby_xml_sax_parser.c +++ b/ext/libxml/ruby_xml_sax_parser.c @@ -33,6 +33,12 @@ ruby_xml_sax_parser_free(ruby_xml_sax_parser *nodesp) { /* Apparently this isn't needed: time will tell */ /* if (nodesp->xsh != NULL) */ /* xmlFreeSax_Parser(nodesp->sax_parser); */ + + if (nodesp->push_parser && nodesp->xpc) + { + xmlParseChunk(nodesp->xpc, NULL, 0, 1); + xmlFreeParserCtxt(nodesp->xpc); + } } void @@ -61,6 +67,7 @@ ruby_xml_sax_parser_new(VALUE class) { ruby_xml_sax_parser *nodesp; nodesp = ALLOC(ruby_xml_sax_parser); + nodesp->push_parser = 0; nodesp->xsh = &rubySAXHandlerStruct; nodesp->callbackHandler = Qnil; @@ -72,6 +79,28 @@ ruby_xml_sax_parser_new(VALUE class) { ruby_xml_sax_parser_free, nodesp)); } +/* + * call-seq: + * XML::SaxParser.new -> sax_parser + * + * Create a new XML::SaxParser instance. + */ +VALUE +ruby_xml_sax_parser_new_push_parser(VALUE class) { + ruby_xml_sax_parser *nodesp; + + nodesp = ALLOC(ruby_xml_sax_parser); + nodesp->push_parser = 1; + nodesp->xsh = &rubySAXHandlerStruct; + + nodesp->callbackHandler = Qnil; + nodesp->xpc = NULL; + nodesp->filename = Qnil; + nodesp->str = Qnil; + + return(Data_Wrap_Struct(class, ruby_xml_sax_parser_mark, + ruby_xml_sax_parser_free, nodesp)); +} /* * call-seq: @@ -154,9 +183,23 @@ ruby_xml_sax_parser_parse(VALUE self) { status = xmlSAXUserParseFile(nodesp->xsh, nodesp, StringValuePtr(nodesp->filename)); } else if (nodesp->str != Qnil) { str = StringValuePtr(nodesp->str); - status = //ruby_xml_document_new(cXMLDocument, - xmlSAXUserParseMemory(nodesp->xsh, nodesp, - str, strlen(str)); //); + if (nodesp->push_parser) + { + if (!nodesp->xpc) + { + printf("new push: %s\n", str); + status = (nodesp->xpc = xmlCreatePushParserCtxt(nodesp->xsh, nodesp, str, strlen(str), "push.xml")) == NULL; + } + else + { + printf("push: %s\n", str); + status = xmlParseChunk(nodesp->xpc, str, strlen(str), 0) <= 0; + } + } + else + status = //ruby_xml_document_new(cXMLDocument, + xmlSAXUserParseMemory(nodesp->xsh, nodesp, + str, strlen(str)); //); } /* XXX This should return an exception for the various error codes @@ -417,6 +460,7 @@ ruby_init_xml_sax_parser(void) { /* SaxParser */ rb_define_singleton_method(cXMLSaxParser, "new", ruby_xml_sax_parser_new, 0); + rb_define_singleton_method(cXMLSaxParser, "new_push_parser", ruby_xml_sax_parser_new_push_parser, 0); rb_define_method(cXMLSaxParser, "filename", ruby_xml_sax_parser_filename_get, 0); diff --git a/ext/libxml/ruby_xml_sax_parser.h b/ext/libxml/ruby_xml_sax_parser.h index 2d20ad0..8593b7a 100644 --- a/ext/libxml/ruby_xml_sax_parser.h +++ b/ext/libxml/ruby_xml_sax_parser.h @@ -41,6 +41,7 @@ typedef struct ruby_xml_sax_parser_callbacks { */ typedef struct ruby_xml_sax_parser { + int push_parser; xmlParserCtxtPtr xpc; xmlSAXHandlerPtr xsh; //ruby_xml_sax_parser_callbacks *cbp; -- 1.5.6.5
_______________________________________________ libxml-devel mailing list libxml-devel@rubyforge.org http://rubyforge.org/mailman/listinfo/libxml-devel