Signed-off-by: Kirill A. Shutemov <kir...@shutemov.name> --- ext/libxml/libxml.c | 1 + ext/libxml/ruby_libxml.h | 1 + ext/libxml/ruby_xml_push_parser.c | 77 +++++++++++ ext/libxml/ruby_xml_push_parser.h | 8 ++ test/tc_push_parser.rb | 253 +++++++++++++++++++++++++++++++++++++ test/test_suite.rb | 3 +- 6 files changed, 342 insertions(+), 1 deletions(-) create mode 100644 ext/libxml/ruby_xml_push_parser.c create mode 100644 ext/libxml/ruby_xml_push_parser.h create mode 100644 test/tc_push_parser.rb
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index c722642..b861f93 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -859,6 +859,7 @@ void Init_libxml_ruby(void) ruby_init_xml_namespace(); ruby_init_xml_sax_parser(); ruby_init_xml_sax2_handler(); + ruby_init_xml_push_parser(); ruby_init_xml_xinclude(); ruby_init_xml_xpath(); ruby_init_xml_xpath_context(); diff --git a/ext/libxml/ruby_libxml.h b/ext/libxml/ruby_libxml.h index fbc3108..2843398 100644 --- a/ext/libxml/ruby_libxml.h +++ b/ext/libxml/ruby_libxml.h @@ -73,6 +73,7 @@ #include "ruby_xml_parser_context.h" #include "ruby_xml_sax2_handler.h" #include "ruby_xml_sax_parser.h" +#include "ruby_xml_push_parser.h" #include "ruby_xml_xinclude.h" #include "ruby_xml_xpath.h" #include "ruby_xml_xpath_expression.h" diff --git a/ext/libxml/ruby_xml_push_parser.c b/ext/libxml/ruby_xml_push_parser.c new file mode 100644 index 0000000..deab671 --- /dev/null +++ b/ext/libxml/ruby_xml_push_parser.c @@ -0,0 +1,77 @@ +#include "ruby_libxml.h" +#include "ruby_xml_push_parser.h" + +VALUE cXMLPushParser; + +static ID CALLBACKS_ATTR; + +static void free_parser(xmlParserCtxtPtr ctxt) +{ + xmlFreeParserCtxt(ctxt); +} + +static VALUE rxml_push_parser_initialize(VALUE self) +{ + xmlParserCtxtPtr ctxt; + VALUE parser; + + ctxt = xmlCreatePushParserCtxt(&rxml_sax_handler, + NULL, NULL, 0, NULL); + + if (!ctxt) { + rxml_raise(&xmlLastError); + return Qnil; + } + + parser = Data_Wrap_Struct(rb_cData, NULL, free_parser, ctxt); + rb_iv_set(self, "@parser", parser); + return self; +} + +static VALUE rxml_push_parser_parse_chunk(VALUE self, VALUE string) +{ + xmlParserCtxtPtr ctxt; + VALUE handler = rb_ivar_get(self, CALLBACKS_ATTR); + + Data_Get_Struct(rb_iv_get(self, "@parser"), xmlParserCtxt, ctxt); + + ctxt->userData = (void *)handler; + + if (xmlParseChunk(ctxt, StringValuePtr(string), + RSTRING_LEN(string), 0)) { + rxml_raise(&xmlLastError); + return Qfalse; + } else + return Qtrue; +} + +static VALUE rxml_push_parser_close(VALUE self) +{ + xmlParserCtxtPtr ctxt; + VALUE handler = rb_ivar_get(self, CALLBACKS_ATTR); + + Data_Get_Struct(rb_iv_get(self, "@parser"), xmlParserCtxt, ctxt); + + ctxt->userData = (void *)handler; + + if (xmlParseChunk(ctxt, "", 0, 1)) { + rxml_raise(&xmlLastError); + return Qfalse; + } else + return Qtrue; +} + +void ruby_init_xml_push_parser(void) +{ + /* PushParser */ + cXMLPushParser = rb_define_class_under(mXML, "PushParser", rb_cObject); + + /* Attributes */ + CALLBACKS_ATTR = rb_intern("@callbacks"); + rb_define_attr(cXMLPushParser, "callbacks", 1, 1); + + /* Instance Methods */ + rb_define_method(cXMLPushParser, "initialize", rxml_push_parser_initialize, 0); + rb_define_method(cXMLPushParser, "parse_chunk", rxml_push_parser_parse_chunk, 1); + rb_define_method(cXMLPushParser, "close", rxml_push_parser_close, 0); +} diff --git a/ext/libxml/ruby_xml_push_parser.h b/ext/libxml/ruby_xml_push_parser.h new file mode 100644 index 0000000..e0f7ce3 --- /dev/null +++ b/ext/libxml/ruby_xml_push_parser.h @@ -0,0 +1,8 @@ +#ifndef __rxml_PUSH_PARSER__ +#define __rxml_PUSH_PARSER__ + +extern VALUE cXMLSaxParser; + +void ruby_init_xml_push_parser(void); + +#endif diff --git a/test/tc_push_parser.rb b/test/tc_push_parser.rb new file mode 100644 index 0000000..e6dc492 --- /dev/null +++ b/test/tc_push_parser.rb @@ -0,0 +1,253 @@ +require 'xml' +require 'test/unit' + +class DocTypeCallback + include XML::SaxParser::Callbacks + def on_start_element(element, attributes) + end +end + +class TestCaseCallbacks + include XML::SaxParser::Callbacks + + attr_accessor :result + + def initialize + @result = Array.new + end + + def on_cdata_block(cdata) + @result << "cdata: #{cdata}" + end + + def on_characters(chars) + @result << "characters: #{chars}" + end + + def on_comment(text) + @result << "comment: #{text}" + end + + def on_end_document + @result << "end_document" + end + + def on_end_element(name) + @result << "end_element: #{name}" + end + + def on_end_element_ns(name, prefix, uri) + @result << "end_element_ns #{name}, prefix: #{prefix}, uri: #{uri}" + end + + # Called for parser errors. + def on_error(error) + @result << "error: #{error}" + end + + def on_processing_instruction(target, data) + @result << "pi: #{target} #{data}" + end + + def on_start_document + @result << "startdoc" + end + + def on_start_element(name, attributes) + attributes ||= Hash.new + @result << "start_element: #{name}, attr: #{attributes.inspect}" + end + + def on_start_element_ns(name, attributes, prefix, uri, namespaces) + attributes ||= Hash.new + namespaces ||= Hash.new + @result << "start_element_ns: #{name}, attr: #{attributes.inspect}, prefix: #{prefix}, uri: #{uri}, ns: #{namespaces.inspect}" + end +end + +class TestPushParser < Test::Unit::TestCase + def setup + XML.default_keep_blanks = true + @xp = XML::PushParser.new + end + + def teardown + @xp = nil + XML.default_keep_blanks = true + end + + def saxtest_file + File.join(File.dirname(__FILE__), 'model/atom.xml') + end + + def verify + result = @xp.callbacks.result + + i = -1 + assert_equal("startdoc", result[i+=1]) + assert_equal("pi: xml-stylesheet type=\"text/xsl\" href=\"my_stylesheet.xsl\"", result[i+=1]) + assert_equal("start_element: feed, attr: {nil=>\"http://www.w3.org/2005/Atom\"}", result[i+=1]) + assert_equal("start_element_ns: feed, attr: {nil=>\"http://www.w3.org/2005/Atom\"}, prefix: , uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("comment: Not a valid atom entry ", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("start_element: entry, attr: {}", result[i+=1]) + assert_equal("start_element_ns: entry, attr: {}, prefix: , uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("start_element: title, attr: {\"type\"=>\"html\"}", result[i+=1]) + assert_equal("start_element_ns: title, attr: {\"type\"=>\"html\"}, prefix: , uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1]) + assert_equal("cdata: <<strong>>", result[i+=1]) + assert_equal("end_element: title", result[i+=1]) + assert_equal("end_element_ns title, prefix: , uri: http://www.w3.org/2005/Atom", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("start_element: content, attr: {\"type\"=>\"xhtml\"}", result[i+=1]) + assert_equal("start_element_ns: content, attr: {\"type\"=>\"xhtml\"}, prefix: , uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("start_element: xhtml:div, attr: {\"xhtml\"=>\"http://www.w3.org/1999/xhtml\"}", result[i+=1]) + assert_equal("start_element_ns: div, attr: {\"xhtml\"=>\"http://www.w3.org/1999/xhtml\"}, prefix: xhtml, uri: http://www.w3.org/1999/xhtml, ns: {}", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("start_element: xhtml:p, attr: {}", result[i+=1]) + assert_equal("start_element_ns: p, attr: {}, prefix: xhtml, uri: http://www.w3.org/1999/xhtml, ns: {}", result[i+=1]) + assert_equal("characters: hi there", result[i+=1]) + assert_equal("end_element: xhtml:p", result[i+=1]) + assert_equal("end_element_ns p, prefix: xhtml, uri: http://www.w3.org/1999/xhtml", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("end_element: xhtml:div", result[i+=1]) + assert_equal("end_element_ns div, prefix: xhtml, uri: http://www.w3.org/1999/xhtml", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("end_element: content", result[i+=1]) + assert_equal("end_element_ns content, prefix: , uri: http://www.w3.org/2005/Atom", result[i+=1]) + assert_equal("characters: \n ", result[i+=1]) + assert_equal("end_element: entry", result[i+=1]) + assert_equal("end_element_ns entry, prefix: , uri: http://www.w3.org/2005/Atom", result[i+=1]) + assert_equal("characters: \n", result[i+=1]) + assert_equal("end_element: feed", result[i+=1]) + assert_equal("end_element_ns feed, prefix: , uri: http://www.w3.org/2005/Atom", result[i+=1]) + assert_equal("end_document", result[i+=1]) + end + + def test_no_callbacks + File.read(saxtest_file).each_char{ |char| + assert_equal true, @xp.parse_chunk(char) + } + assert_equal true, @xp.close + end + + def test_parsing + @xp.callbacks = TestCaseCallbacks.new + File.read(saxtest_file).each_char{ |char| + @xp.parse_chunk(char) + } + @xp.close + verify + end + + def test_doctype + @xp.callbacks = DocTypeCallback.new + string = <<-EOS +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE Results SYSTEM "results.dtd"> +<Results> + <a>a1</a> +</Results> +EOS + string.each_char{ |char| + assert_equal true, @xp.parse_chunk(char) + } + assert_equal true, @xp.close + end + + def test_parse_warning + @xp.callbacks = TestCaseCallbacks.new + # Two xml PIs is a warning + string = <<-EOS +<?xml version="1.0" encoding="utf-8"?> +<?xml-invalid?> +<Test/> +EOS + + string.each_char{ |char| + @xp.parse_chunk(char) + } + @xp.close + + # Check callbacks + result = @xp.callbacks.result + i = -1 + assert_equal("startdoc", result[i+=1]) + assert_equal("error: Warning: xmlParsePITarget: invalid name prefix 'xml' at :2.", result[i+=1]) + assert_equal("pi: xml-invalid ", result[i+=1]) + assert_equal("start_element: Test, attr: {}", result[i+=1]) + assert_equal("start_element_ns: Test, attr: {}, prefix: , uri: , ns: {}", result[i+=1]) + assert_equal("end_element: Test", result[i+=1]) + assert_equal("end_element_ns Test, prefix: , uri: ", result[i+=1]) + assert_equal("end_document", result[i+=1]) + end + + def test_parse_error + @xp.callbacks = TestCaseCallbacks.new + string = <<-EOS + <Results> + EOS + + error = assert_raise(XML::Error) do + string.each_char{ |char| + @xp.parse_chunk(char) + } + @xp.close + end + + # Check callbacks + result = @xp.callbacks.result + + i = -1 + + assert_equal("startdoc", result[i+=1]) + assert_equal("start_element: Results, attr: {}", result[i+=1]) + assert_equal("start_element_ns: Results, attr: {}, prefix: , uri: , ns: {}", result[i+=1]) + assert_equal("error: Fatal error: Extra content at the end of the document at :1.", result[i+=1]) + assert_equal("end_document", result[i+=1]) + + assert_not_nil(error) + assert_kind_of(XML::Error, error) + assert_equal("Fatal error: Extra content at the end of the document at :1.", error.message) + assert_equal(XML::Error::PARSER, error.domain) + assert_equal(XML::Error::DOCUMENT_END, error.code) + assert_equal(XML::Error::FATAL, error.level) + assert_nil(error.file) + assert_equal(1, error.line) + assert_nil(error.str1) + assert_nil(error.str2) + assert_nil(error.str3) + assert_equal(0, error.int1) + assert_equal(10, error.int2) + assert_nil(error.node) + end + + def test_push + @xp.callbacks = TestCaseCallbacks.new + + assert_nil(@xp.callbacks.result.shift) + @xp.parse_chunk("<tes") + assert_equal("startdoc", @xp.callbacks.result.shift) + assert_nil(@xp.callbacks.result.shift) + @xp.parse_chunk("t>") + assert_equal("start_element: test, attr: {}", @xp.callbacks.result.shift) + assert_equal("start_element_ns: test, attr: {}, prefix: , uri: , ns: {}", @xp.callbacks.result.shift) + assert_nil(@xp.callbacks.result.shift) + @xp.parse_chunk("text") + assert_nil(@xp.callbacks.result.shift) + @xp.parse_chunk("<") + assert_equal("characters: text", @xp.callbacks.result.shift) + assert_nil(@xp.callbacks.result.shift) + @xp.parse_chunk("/test") + assert_nil(@xp.callbacks.result.shift) + @xp.parse_chunk(">") + assert_equal("end_element: test", @xp.callbacks.result.shift) + assert_equal("end_element_ns test, prefix: , uri: ", @xp.callbacks.result.shift) + assert_nil(@xp.callbacks.result.shift) + @xp.close + assert_equal("end_document", @xp.callbacks.result.shift) + assert_nil(@xp.callbacks.result.shift) + end +end diff --git a/test/test_suite.rb b/test/test_suite.rb index 1389842..d88912d 100644 --- a/test/test_suite.rb +++ b/test/test_suite.rb @@ -18,6 +18,7 @@ require 'tc_node_write' require 'tc_node_xlink' require 'tc_parser' require 'tc_parser_context' +require 'tc_push_parser' require 'tc_reader' require 'tc_relaxng' require 'tc_sax_parser' @@ -31,4 +32,4 @@ require 'tc_xpointer' # Compatibility require 'tc_properties' -require 'tc_deprecated_require' \ No newline at end of file +require 'tc_deprecated_require' -- 1.6.0.2.GIT _______________________________________________ libxml-devel mailing list libxml-devel@rubyforge.org http://rubyforge.org/mailman/listinfo/libxml-devel