Signed-off-by: Kirill A. Shutemov <kir...@shutemov.name>
---
 ext/libxml/libxml.c               |    1 +
 ext/libxml/ruby_libxml.h          |    1 +
 ext/libxml/ruby_xml_push_parser.c |   77 +++++++++++
 ext/libxml/ruby_xml_push_parser.h |    8 ++
 test/tc_push_parser.rb            |  253 +++++++++++++++++++++++++++++++++++++
 test/test_suite.rb                |    3 +-
 6 files changed, 342 insertions(+), 1 deletions(-)
 create mode 100644 ext/libxml/ruby_xml_push_parser.c
 create mode 100644 ext/libxml/ruby_xml_push_parser.h
 create mode 100644 test/tc_push_parser.rb

diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index c722642..b861f93 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -859,6 +859,7 @@ void Init_libxml_ruby(void)
   ruby_init_xml_namespace();
   ruby_init_xml_sax_parser();
   ruby_init_xml_sax2_handler();
+  ruby_init_xml_push_parser();
   ruby_init_xml_xinclude();
   ruby_init_xml_xpath();
   ruby_init_xml_xpath_context();
diff --git a/ext/libxml/ruby_libxml.h b/ext/libxml/ruby_libxml.h
index fbc3108..2843398 100644
--- a/ext/libxml/ruby_libxml.h
+++ b/ext/libxml/ruby_libxml.h
@@ -73,6 +73,7 @@
 #include "ruby_xml_parser_context.h"
 #include "ruby_xml_sax2_handler.h"
 #include "ruby_xml_sax_parser.h"
+#include "ruby_xml_push_parser.h"
 #include "ruby_xml_xinclude.h"
 #include "ruby_xml_xpath.h"
 #include "ruby_xml_xpath_expression.h"
diff --git a/ext/libxml/ruby_xml_push_parser.c 
b/ext/libxml/ruby_xml_push_parser.c
new file mode 100644
index 0000000..deab671
--- /dev/null
+++ b/ext/libxml/ruby_xml_push_parser.c
@@ -0,0 +1,77 @@
+#include "ruby_libxml.h"
+#include "ruby_xml_push_parser.h"
+
+VALUE cXMLPushParser;
+
+static ID CALLBACKS_ATTR;
+
+static void free_parser(xmlParserCtxtPtr ctxt)
+{
+       xmlFreeParserCtxt(ctxt);
+}
+
+static VALUE rxml_push_parser_initialize(VALUE self)
+{
+       xmlParserCtxtPtr ctxt;
+       VALUE parser;
+       
+       ctxt = xmlCreatePushParserCtxt(&rxml_sax_handler, 
+                       NULL, NULL, 0, NULL);
+       
+       if (!ctxt) {
+               rxml_raise(&xmlLastError);
+               return Qnil;
+       }
+       
+       parser = Data_Wrap_Struct(rb_cData, NULL, free_parser, ctxt);
+       rb_iv_set(self, "@parser", parser); 
+       return self;
+}
+
+static VALUE rxml_push_parser_parse_chunk(VALUE self, VALUE string)
+{
+       xmlParserCtxtPtr ctxt;
+       VALUE handler = rb_ivar_get(self, CALLBACKS_ATTR);
+
+       Data_Get_Struct(rb_iv_get(self, "@parser"), xmlParserCtxt, ctxt);
+
+       ctxt->userData = (void *)handler;
+
+       if (xmlParseChunk(ctxt, StringValuePtr(string), 
+                               RSTRING_LEN(string), 0)) {
+               rxml_raise(&xmlLastError);
+               return Qfalse;
+       } else
+               return Qtrue;
+}
+
+static VALUE rxml_push_parser_close(VALUE self)
+{
+       xmlParserCtxtPtr ctxt;
+       VALUE handler = rb_ivar_get(self, CALLBACKS_ATTR);
+
+       Data_Get_Struct(rb_iv_get(self, "@parser"), xmlParserCtxt, ctxt);
+
+       ctxt->userData = (void *)handler;
+
+       if (xmlParseChunk(ctxt, "", 0, 1)) {
+               rxml_raise(&xmlLastError);
+               return Qfalse;
+       } else
+               return Qtrue;
+}
+
+void ruby_init_xml_push_parser(void)
+{
+       /* PushParser */
+       cXMLPushParser = rb_define_class_under(mXML, "PushParser", rb_cObject);
+
+       /* Attributes */
+       CALLBACKS_ATTR = rb_intern("@callbacks");
+       rb_define_attr(cXMLPushParser, "callbacks", 1, 1);
+
+       /* Instance Methods */
+       rb_define_method(cXMLPushParser, "initialize", 
rxml_push_parser_initialize, 0);
+       rb_define_method(cXMLPushParser, "parse_chunk", 
rxml_push_parser_parse_chunk, 1);
+       rb_define_method(cXMLPushParser, "close", rxml_push_parser_close, 0);
+}
diff --git a/ext/libxml/ruby_xml_push_parser.h 
b/ext/libxml/ruby_xml_push_parser.h
new file mode 100644
index 0000000..e0f7ce3
--- /dev/null
+++ b/ext/libxml/ruby_xml_push_parser.h
@@ -0,0 +1,8 @@
+#ifndef __rxml_PUSH_PARSER__
+#define __rxml_PUSH_PARSER__
+
+extern VALUE cXMLSaxParser;
+
+void ruby_init_xml_push_parser(void);
+
+#endif
diff --git a/test/tc_push_parser.rb b/test/tc_push_parser.rb
new file mode 100644
index 0000000..e6dc492
--- /dev/null
+++ b/test/tc_push_parser.rb
@@ -0,0 +1,253 @@
+require 'xml'
+require 'test/unit'
+
+class DocTypeCallback
+  include XML::SaxParser::Callbacks
+  def on_start_element(element, attributes)
+  end
+end
+
+class TestCaseCallbacks
+  include XML::SaxParser::Callbacks
+
+  attr_accessor :result
+
+  def initialize
+    @result = Array.new
+  end
+
+  def on_cdata_block(cdata)
+    @result << "cdata: #{cdata}"
+  end
+
+  def on_characters(chars)
+    @result << "characters: #{chars}"
+  end
+
+  def on_comment(text)
+    @result << "comment: #{text}"
+  end
+
+  def on_end_document
+    @result << "end_document"
+  end
+
+  def on_end_element(name)
+    @result << "end_element: #{name}"
+  end
+
+  def on_end_element_ns(name, prefix, uri)
+    @result << "end_element_ns #{name}, prefix: #{prefix}, uri: #{uri}"
+  end
+
+  # Called for parser errors.
+  def on_error(error)
+    @result << "error: #{error}"
+  end
+
+  def on_processing_instruction(target, data)
+    @result << "pi: #{target} #{data}"
+  end
+
+  def on_start_document
+    @result << "startdoc"
+  end
+
+  def on_start_element(name, attributes)
+    attributes ||= Hash.new
+    @result << "start_element: #{name}, attr: #{attributes.inspect}"
+  end
+
+  def on_start_element_ns(name, attributes, prefix, uri, namespaces)
+    attributes ||= Hash.new
+    namespaces ||= Hash.new
+    @result << "start_element_ns: #{name}, attr: #{attributes.inspect}, 
prefix: #{prefix}, uri: #{uri}, ns: #{namespaces.inspect}"
+  end
+end
+
+class TestPushParser < Test::Unit::TestCase
+  def setup
+    XML.default_keep_blanks = true
+    @xp = XML::PushParser.new
+  end
+
+  def teardown
+    @xp = nil
+    XML.default_keep_blanks = true
+  end
+
+  def saxtest_file
+    File.join(File.dirname(__FILE__), 'model/atom.xml')
+  end
+
+  def verify
+    result = @xp.callbacks.result
+
+    i = -1
+    assert_equal("startdoc", result[i+=1])
+    assert_equal("pi: xml-stylesheet type=\"text/xsl\" 
href=\"my_stylesheet.xsl\"", result[i+=1])
+    assert_equal("start_element: feed, attr: 
{nil=>\"http://www.w3.org/2005/Atom\"}";, result[i+=1])
+    assert_equal("start_element_ns: feed, attr: 
{nil=>\"http://www.w3.org/2005/Atom\"}, prefix: , uri: 
http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+    assert_equal("characters: \n  ", result[i+=1])
+    assert_equal("comment:  Not a valid atom entry ", result[i+=1])
+    assert_equal("characters: \n  ", result[i+=1])
+    assert_equal("start_element: entry, attr: {}", result[i+=1])
+    assert_equal("start_element_ns: entry, attr: {}, prefix: , uri: 
http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+    assert_equal("characters: \n    ", result[i+=1])
+    assert_equal("start_element: title, attr: {\"type\"=>\"html\"}", 
result[i+=1])
+    assert_equal("start_element_ns: title, attr: {\"type\"=>\"html\"}, prefix: 
, uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+    assert_equal("cdata: <<strong>>", result[i+=1])
+    assert_equal("end_element: title", result[i+=1])
+    assert_equal("end_element_ns title, prefix: , uri: 
http://www.w3.org/2005/Atom";, result[i+=1])
+    assert_equal("characters: \n    ", result[i+=1])
+    assert_equal("start_element: content, attr: {\"type\"=>\"xhtml\"}", 
result[i+=1])
+    assert_equal("start_element_ns: content, attr: {\"type\"=>\"xhtml\"}, 
prefix: , uri: http://www.w3.org/2005/Atom, ns: {}", result[i+=1])
+    assert_equal("characters: \n      ", result[i+=1])
+    assert_equal("start_element: xhtml:div, attr: 
{\"xhtml\"=>\"http://www.w3.org/1999/xhtml\"}";, result[i+=1])
+    assert_equal("start_element_ns: div, attr: 
{\"xhtml\"=>\"http://www.w3.org/1999/xhtml\"}, prefix: xhtml, uri: 
http://www.w3.org/1999/xhtml, ns: {}", result[i+=1])
+    assert_equal("characters: \n        ", result[i+=1])
+    assert_equal("start_element: xhtml:p, attr: {}", result[i+=1])
+    assert_equal("start_element_ns: p, attr: {}, prefix: xhtml, uri: 
http://www.w3.org/1999/xhtml, ns: {}", result[i+=1])
+    assert_equal("characters: hi there", result[i+=1])
+    assert_equal("end_element: xhtml:p", result[i+=1])
+    assert_equal("end_element_ns p, prefix: xhtml, uri: 
http://www.w3.org/1999/xhtml";, result[i+=1])
+    assert_equal("characters: \n      ", result[i+=1])
+    assert_equal("end_element: xhtml:div", result[i+=1])
+    assert_equal("end_element_ns div, prefix: xhtml, uri: 
http://www.w3.org/1999/xhtml";, result[i+=1])
+    assert_equal("characters: \n    ", result[i+=1])
+    assert_equal("end_element: content", result[i+=1])
+    assert_equal("end_element_ns content, prefix: , uri: 
http://www.w3.org/2005/Atom";, result[i+=1])
+    assert_equal("characters: \n  ", result[i+=1])
+    assert_equal("end_element: entry", result[i+=1])
+    assert_equal("end_element_ns entry, prefix: , uri: 
http://www.w3.org/2005/Atom";, result[i+=1])
+    assert_equal("characters: \n", result[i+=1])
+    assert_equal("end_element: feed", result[i+=1])
+    assert_equal("end_element_ns feed, prefix: , uri: 
http://www.w3.org/2005/Atom";, result[i+=1])
+    assert_equal("end_document", result[i+=1])
+  end
+  
+  def test_no_callbacks
+    File.read(saxtest_file).each_char{ |char|
+      assert_equal true, @xp.parse_chunk(char)
+    }
+    assert_equal true, @xp.close
+  end
+
+  def test_parsing
+    @xp.callbacks = TestCaseCallbacks.new
+    File.read(saxtest_file).each_char{ |char|
+      @xp.parse_chunk(char)
+    }
+    @xp.close
+    verify
+  end
+
+  def test_doctype
+    @xp.callbacks = DocTypeCallback.new
+    string = <<-EOS
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE Results SYSTEM "results.dtd">
+<Results>
+  <a>a1</a>
+</Results>
+EOS
+    string.each_char{ |char|
+      assert_equal true, @xp.parse_chunk(char)
+    }
+    assert_equal true, @xp.close
+  end
+
+  def test_parse_warning
+    @xp.callbacks = TestCaseCallbacks.new
+    # Two xml PIs is a warning
+    string = <<-EOS
+<?xml version="1.0" encoding="utf-8"?>
+<?xml-invalid?>
+<Test/>
+EOS
+
+    string.each_char{ |char|
+      @xp.parse_chunk(char)
+    }
+    @xp.close
+
+    # Check callbacks
+    result = @xp.callbacks.result
+    i = -1
+    assert_equal("startdoc", result[i+=1])
+    assert_equal("error: Warning: xmlParsePITarget: invalid name prefix 'xml' 
at :2.", result[i+=1])
+    assert_equal("pi: xml-invalid ", result[i+=1])
+    assert_equal("start_element: Test, attr: {}", result[i+=1])
+    assert_equal("start_element_ns: Test, attr: {}, prefix: , uri: , ns: {}", 
result[i+=1])
+    assert_equal("end_element: Test", result[i+=1])
+    assert_equal("end_element_ns Test, prefix: , uri: ", result[i+=1])
+    assert_equal("end_document", result[i+=1])
+  end
+
+  def test_parse_error
+    @xp.callbacks = TestCaseCallbacks.new
+    string = <<-EOS
+      <Results>
+    EOS
+
+    error = assert_raise(XML::Error) do
+      string.each_char{ |char|
+        @xp.parse_chunk(char)
+      }
+      @xp.close
+    end
+
+    # Check callbacks
+    result = @xp.callbacks.result
+
+    i = -1
+
+    assert_equal("startdoc", result[i+=1])
+    assert_equal("start_element: Results, attr: {}", result[i+=1])
+    assert_equal("start_element_ns: Results, attr: {}, prefix: , uri: , ns: 
{}", result[i+=1])
+    assert_equal("error: Fatal error: Extra content at the end of the document 
at :1.", result[i+=1])
+    assert_equal("end_document", result[i+=1])
+
+    assert_not_nil(error)
+    assert_kind_of(XML::Error, error)
+    assert_equal("Fatal error: Extra content at the end of the document at 
:1.", error.message)
+    assert_equal(XML::Error::PARSER, error.domain)
+    assert_equal(XML::Error::DOCUMENT_END, error.code)
+    assert_equal(XML::Error::FATAL, error.level)
+    assert_nil(error.file)
+    assert_equal(1, error.line)
+    assert_nil(error.str1)
+    assert_nil(error.str2)
+    assert_nil(error.str3)
+    assert_equal(0, error.int1)
+    assert_equal(10, error.int2)
+    assert_nil(error.node)
+  end
+
+  def test_push
+    @xp.callbacks = TestCaseCallbacks.new
+
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.parse_chunk("<tes")
+    assert_equal("startdoc", @xp.callbacks.result.shift)
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.parse_chunk("t>")
+    assert_equal("start_element: test, attr: {}", @xp.callbacks.result.shift)
+    assert_equal("start_element_ns: test, attr: {}, prefix: , uri: , ns: {}", 
@xp.callbacks.result.shift)
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.parse_chunk("text")
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.parse_chunk("<")
+    assert_equal("characters: text", @xp.callbacks.result.shift)
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.parse_chunk("/test")
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.parse_chunk(">")
+    assert_equal("end_element: test", @xp.callbacks.result.shift)
+    assert_equal("end_element_ns test, prefix: , uri: ", 
@xp.callbacks.result.shift)
+    assert_nil(@xp.callbacks.result.shift)
+    @xp.close
+    assert_equal("end_document", @xp.callbacks.result.shift)
+    assert_nil(@xp.callbacks.result.shift)
+  end
+end
diff --git a/test/test_suite.rb b/test/test_suite.rb
index 1389842..d88912d 100644
--- a/test/test_suite.rb
+++ b/test/test_suite.rb
@@ -18,6 +18,7 @@ require 'tc_node_write'
 require 'tc_node_xlink'
 require 'tc_parser'
 require 'tc_parser_context'
+require 'tc_push_parser'
 require 'tc_reader'
 require 'tc_relaxng'
 require 'tc_sax_parser'
@@ -31,4 +32,4 @@ require 'tc_xpointer'
 
 # Compatibility
 require 'tc_properties'
-require 'tc_deprecated_require'
\ No newline at end of file
+require 'tc_deprecated_require'
-- 
1.6.0.2.GIT

_______________________________________________
libxml-devel mailing list
libxml-devel@rubyforge.org
http://rubyforge.org/mailman/listinfo/libxml-devel

Reply via email to