Signed-off-by: Kirill A. Shutemov <[email protected]>
---
ext/libxml/libxml.c | 1 +
ext/libxml/ruby_libxml.h | 1 +
ext/libxml/ruby_xml_push_parser.c | 77 +++++++++++
ext/libxml/ruby_xml_push_parser.h | 8 ++
test/tc_push_parser.rb | 253 +++++++++++++++++++++++++++++++++++++
test/test_suite.rb | 3 +-
6 files changed, 342 insertions(+), 1 deletions(-)
create mode 100644 ext/libxml/ruby_xml_push_parser.c
create mode 100644 ext/libxml/ruby_xml_push_parser.h
create mode 100644 test/tc_push_parser.rb
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index c722642..b861f93 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -859,6 +859,7 @@ void Init_libxml_ruby(void)
ruby_init_xml_namespace();
ruby_init_xml_sax_parser();
ruby_init_xml_sax2_handler();
+ ruby_init_xml_push_parser();
ruby_init_xml_xinclude();
ruby_init_xml_xpath();
ruby_init_xml_xpath_context();
diff --git a/ext/libxml/ruby_libxml.h b/ext/libxml/ruby_libxml.h
index fbc3108..2843398 100644
--- a/ext/libxml/ruby_libxml.h
+++ b/ext/libxml/ruby_libxml.h
@@ -73,6 +73,7 @@
#include "ruby_xml_parser_context.h"
#include "ruby_xml_sax2_handler.h"
#include "ruby_xml_sax_parser.h"
+#include "ruby_xml_push_parser.h"
#include "ruby_xml_xinclude.h"
#include "ruby_xml_xpath.h"
#include "ruby_xml_xpath_expression.h"
diff --git a/ext/libxml/ruby_xml_push_parser.c
b/ext/libxml/ruby_xml_push_parser.c
new file mode 100644
index 0000000..deab671
--- /dev/null
+++ b/ext/libxml/ruby_xml_push_parser.c
@@ -0,0 +1,77 @@
+#include "ruby_libxml.h"
+#include "ruby_xml_push_parser.h"
+
+VALUE cXMLPushParser;
+
+static ID CALLBACKS_ATTR;
+
+static void free_parser(xmlParserCtxtPtr ctxt)
+{
+ xmlFreeParserCtxt(ctxt);
+}
+
+static VALUE rxml_push_parser_initialize(VALUE self)
+{
+ xmlParserCtxtPtr ctxt;
+ VALUE parser;
+
+ ctxt = xmlCreatePushParserCtxt(&rxml_sax_handler,
+ NULL, NULL, 0, NULL);
+
+ if (!ctxt) {
+ rxml_raise(&xmlLastError);
+ return Qnil;
+ }
+
+ parser = Data_Wrap_Struct(rb_cData, NULL, free_parser, ctxt);
+ rb_iv_set(self, "@parser", parser);
+ return self;
+}
+
+static VALUE rxml_push_parser_parse_chunk(VALUE self, VALUE string)
+{
+ xmlParserCtxtPtr ctxt;
+ VALUE handler = rb_ivar_get(self, CALLBACKS_ATTR);
+
+ Data_Get_Struct(rb_iv_get(self, "@parser"), xmlParserCtxt, ctxt);
+
+ ctxt->userData = (void *)handler;
+
+ if (xmlParseChunk(ctxt, StringValuePtr(string),
+ RSTRING_LEN(string), 0)) {
+ rxml_raise(&xmlLastError);
+ return Qfalse;
+ } else
+ return Qtrue;
+}
+
+static VALUE rxml_push_parser_close(VALUE self)
+{
+ xmlParserCtxtPtr ctxt;
+ VALUE handler = rb_ivar_get(self, CALLBACKS_ATTR);
+
+ Data_Get_Struct(rb_iv_get(self, "@parser"), xmlParserCtxt, ctxt);
+
+ ctxt->userData = (void *)handler;
+
+ if (xmlParseChunk(ctxt, "", 0, 1)) {
+ rxml_raise(&xmlLastError);
+ return Qfalse;
+ } else
+ return Qtrue;
+}
+
+void ruby_init_xml_push_parser(void)
+{
+ /* PushParser */
+ cXMLPushParser = rb_define_class_under(mXML, "PushParser", rb_cObject);
+
+ /* Attributes */
+ CALLBACKS_ATTR = rb_intern("@callbacks");
+ rb_define_attr(cXMLPushParser, "callbacks", 1, 1);
+
+ /* Instance Methods */
+ rb_define_method(cXMLPushParser, "initialize",
rxml_push_parser_initialize, 0);
+ rb_define_method(cXMLPushParser, "parse_chunk",
rxml_push_parser_parse_chunk, 1);
+ rb_define_method(cXMLPushParser, "close", rxml_push_parser_close, 0);
+}
diff --git a/ext/libxml/ruby_xml_push_parser.h
b/ext/libxml/ruby_xml_push_parser.h
new file mode 100644
index 0000000..e0f7ce3
--- /dev/null
+++ b/ext/libxml/ruby_xml_push_parser.h
@@ -0,0 +1,8 @@
+#ifndef __rxml_PUSH_PARSER__
+#define __rxml_PUSH_PARSER__
+
+extern VALUE cXMLSaxParser;
+
+void ruby_init_xml_push_parser(void);
+
+#endif
diff --git a/test/tc_push_parser.rb b/test/tc_push_parser.rb
new file mode 100644
index 0000000..e6dc492
--- /dev/null
+++ b/test/tc_push_parser.rb
@@ -0,0 +1,253 @@
+require 'xml'
+require 'test/unit'
+
+class DocTypeCallback
+ include XML::SaxParser::Callbacks
+ def on_start_element(element, attributes)
+ end
+end
+
+class TestCaseCallbacks
+ include XML::SaxParser::Callbacks
+
+ attr_accessor :result
+
+ def initialize
+ @result = Array.new
+ end
+
+ def on_cdata_block(cdata)
+ @result << "cdata: #{cdata}"
+ end
+
+ def on_characters(chars)
+ @result << "characters: #{chars}"
+ end
+
+ def on_comment(text)
+ @result << "comment: #{text}"
+ end
+
+ def on_end_document
+ @result << "end_document"
+ end
+
+ def on_end_element(name)
+ @result << "end_element: #{name}"
+ end
+
+ def on_end_element_ns(name, prefix, uri)
+ @result << "end_element_ns #{name}, prefix: #{prefix}, uri: #{uri}"
+ end
+
+ # Called for parser errors.
+ def on_error(error)
+ @result << "error: #{error}"
+ end
+
+ def on_processing_instruction(target, data)
+ @result << "pi: #{target} #{data}"
+ end
+
+ def on_start_document
+ @result << "startdoc"
+ end
+
+ def on_start_element(name, attributes)
+ attributes ||= Hash.new
+ @result << "start_element: #{name}, attr: #{attributes.inspect}"
+ end
+
+ def on_start_element_ns(name, attributes, prefix, uri, namespaces)
+ attributes ||= Hash.new
+ namespaces ||= Hash.new
+ @result << "start_element_ns: #{name}, attr: #{attributes.inspect},
prefix: #{prefix}, uri: #{uri}, ns: #{namespaces.inspect}"
+ end
+end
+
+class TestPushParser < Test::Unit::TestCase
+ def setup
+ XML.default_keep_blanks = true
+ @xp = XML::PushParser.new
+ end
+
+ def teardown
+ @xp = nil
+ XML.default_keep_blanks = true
+ end
+
+ def saxtest_file
+ File.join(File.dirname(__FILE__), 'model/atom.xml')
+ end
+
+ def verify
+ result = @xp.callbacks.result
+
+ i = -1
+ assert_equal("startdoc", result[i+=1])
+ assert_equal("pi: xml-stylesheet type=\"text/xsl\"
href=\"my_stylesheet.xsl\"", result[i+=1])
+ assert_equal("start_element: feed, attr:
{nil=>\"http://www.w3.org/2005/Atom\"}", result[i+=1])
+ assert_equal("start_element_ns: feed, attr:
{nil=>\"http://www.w3.org/2005/Atom\"}, prefix: , uri:
http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("comment: Not a valid atom entry ", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("start_element: entry, attr: {}", result[i+=1])
+ assert_equal("start_element_ns: entry, attr: {}, prefix: , uri:
http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("start_element: title, attr: {\"type\"=>\"html\"}",
result[i+=1])
+ assert_equal("start_element_ns: title, attr: {\"type\"=>\"html\"}, prefix:
, uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+ assert_equal("cdata: <<strong>>", result[i+=1])
+ assert_equal("end_element: title", result[i+=1])
+ assert_equal("end_element_ns title, prefix: , uri:
http://www.w3.org/2005/Atom", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("start_element: content, attr: {\"type\"=>\"xhtml\"}",
result[i+=1])
+ assert_equal("start_element_ns: content, attr: {\"type\"=>\"xhtml\"},
prefix: , uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("start_element: xhtml:div, attr:
{\"xhtml\"=>\"http://www.w3.org/1999/xhtml\"}", result[i+=1])
+ assert_equal("start_element_ns: div, attr:
{\"xhtml\"=>\"http://www.w3.org/1999/xhtml\"}, prefix: xhtml, uri:
http://www.w3.org/1999/xhtml, ns: {}", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("start_element: xhtml:p, attr: {}", result[i+=1])
+ assert_equal("start_element_ns: p, attr: {}, prefix: xhtml, uri:
http://www.w3.org/1999/xhtml, ns: {}", result[i+=1])
+ assert_equal("characters: hi there", result[i+=1])
+ assert_equal("end_element: xhtml:p", result[i+=1])
+ assert_equal("end_element_ns p, prefix: xhtml, uri:
http://www.w3.org/1999/xhtml", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("end_element: xhtml:div", result[i+=1])
+ assert_equal("end_element_ns div, prefix: xhtml, uri:
http://www.w3.org/1999/xhtml", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("end_element: content", result[i+=1])
+ assert_equal("end_element_ns content, prefix: , uri:
http://www.w3.org/2005/Atom", result[i+=1])
+ assert_equal("characters: \n ", result[i+=1])
+ assert_equal("end_element: entry", result[i+=1])
+ assert_equal("end_element_ns entry, prefix: , uri:
http://www.w3.org/2005/Atom", result[i+=1])
+ assert_equal("characters: \n", result[i+=1])
+ assert_equal("end_element: feed", result[i+=1])
+ assert_equal("end_element_ns feed, prefix: , uri:
http://www.w3.org/2005/Atom", result[i+=1])
+ assert_equal("end_document", result[i+=1])
+ end
+
+ def test_no_callbacks
+ File.read(saxtest_file).each_char{ |char|
+ assert_equal true, @xp.parse_chunk(char)
+ }
+ assert_equal true, @xp.close
+ end
+
+ def test_parsing
+ @xp.callbacks = TestCaseCallbacks.new
+ File.read(saxtest_file).each_char{ |char|
+ @xp.parse_chunk(char)
+ }
+ @xp.close
+ verify
+ end
+
+ def test_doctype
+ @xp.callbacks = DocTypeCallback.new
+ string = <<-EOS
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE Results SYSTEM "results.dtd">
+<Results>
+ <a>a1</a>
+</Results>
+EOS
+ string.each_char{ |char|
+ assert_equal true, @xp.parse_chunk(char)
+ }
+ assert_equal true, @xp.close
+ end
+
+ def test_parse_warning
+ @xp.callbacks = TestCaseCallbacks.new
+ # Two xml PIs is a warning
+ string = <<-EOS
+<?xml version="1.0" encoding="utf-8"?>
+<?xml-invalid?>
+<Test/>
+EOS
+
+ string.each_char{ |char|
+ @xp.parse_chunk(char)
+ }
+ @xp.close
+
+ # Check callbacks
+ result = @xp.callbacks.result
+ i = -1
+ assert_equal("startdoc", result[i+=1])
+ assert_equal("error: Warning: xmlParsePITarget: invalid name prefix 'xml'
at :2.", result[i+=1])
+ assert_equal("pi: xml-invalid ", result[i+=1])
+ assert_equal("start_element: Test, attr: {}", result[i+=1])
+ assert_equal("start_element_ns: Test, attr: {}, prefix: , uri: , ns: {}",
result[i+=1])
+ assert_equal("end_element: Test", result[i+=1])
+ assert_equal("end_element_ns Test, prefix: , uri: ", result[i+=1])
+ assert_equal("end_document", result[i+=1])
+ end
+
+ def test_parse_error
+ @xp.callbacks = TestCaseCallbacks.new
+ string = <<-EOS
+ <Results>
+ EOS
+
+ error = assert_raise(XML::Error) do
+ string.each_char{ |char|
+ @xp.parse_chunk(char)
+ }
+ @xp.close
+ end
+
+ # Check callbacks
+ result = @xp.callbacks.result
+
+ i = -1
+
+ assert_equal("startdoc", result[i+=1])
+ assert_equal("start_element: Results, attr: {}", result[i+=1])
+ assert_equal("start_element_ns: Results, attr: {}, prefix: , uri: , ns:
{}", result[i+=1])
+ assert_equal("error: Fatal error: Extra content at the end of the document
at :1.", result[i+=1])
+ assert_equal("end_document", result[i+=1])
+
+ assert_not_nil(error)
+ assert_kind_of(XML::Error, error)
+ assert_equal("Fatal error: Extra content at the end of the document at
:1.", error.message)
+ assert_equal(XML::Error::PARSER, error.domain)
+ assert_equal(XML::Error::DOCUMENT_END, error.code)
+ assert_equal(XML::Error::FATAL, error.level)
+ assert_nil(error.file)
+ assert_equal(1, error.line)
+ assert_nil(error.str1)
+ assert_nil(error.str2)
+ assert_nil(error.str3)
+ assert_equal(0, error.int1)
+ assert_equal(10, error.int2)
+ assert_nil(error.node)
+ end
+
+ def test_push
+ @xp.callbacks = TestCaseCallbacks.new
+
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.parse_chunk("<tes")
+ assert_equal("startdoc", @xp.callbacks.result.shift)
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.parse_chunk("t>")
+ assert_equal("start_element: test, attr: {}", @xp.callbacks.result.shift)
+ assert_equal("start_element_ns: test, attr: {}, prefix: , uri: , ns: {}",
@xp.callbacks.result.shift)
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.parse_chunk("text")
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.parse_chunk("<")
+ assert_equal("characters: text", @xp.callbacks.result.shift)
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.parse_chunk("/test")
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.parse_chunk(">")
+ assert_equal("end_element: test", @xp.callbacks.result.shift)
+ assert_equal("end_element_ns test, prefix: , uri: ",
@xp.callbacks.result.shift)
+ assert_nil(@xp.callbacks.result.shift)
+ @xp.close
+ assert_equal("end_document", @xp.callbacks.result.shift)
+ assert_nil(@xp.callbacks.result.shift)
+ end
+end
diff --git a/test/test_suite.rb b/test/test_suite.rb
index 1389842..d88912d 100644
--- a/test/test_suite.rb
+++ b/test/test_suite.rb
@@ -18,6 +18,7 @@ require 'tc_node_write'
require 'tc_node_xlink'
require 'tc_parser'
require 'tc_parser_context'
+require 'tc_push_parser'
require 'tc_reader'
require 'tc_relaxng'
require 'tc_sax_parser'
@@ -31,4 +32,4 @@ require 'tc_xpointer'
# Compatibility
require 'tc_properties'
-require 'tc_deprecated_require'
\ No newline at end of file
+require 'tc_deprecated_require'
--
1.6.0.2.GIT
_______________________________________________
libxml-devel mailing list
[email protected]
http://rubyforge.org/mailman/listinfo/libxml-devel