Hi Stephen,

Here's the result running on MacOS 10.5.6

$ ruby test_attack_xml_with_libxml.rb
Loaded suite test_attack_xml
Started
/Library/Ruby/Gems/1.8/gems/libxml-ruby-1.1.2/lib/libxml/node.rb:100: [BUG] Bus Error
ruby 1.8.6 (2008-03-03) [universal-darwin9.0]

Abort trap

Can you get a stack trace?

Testing on both Windows and Fedora 10 shows (not within Rails, I extracted the test case out, see below):

Fatal error: Detected an entity reference loop at :1.
Fatal error: Detected an entity reference loop at :1.
Fatal error: Detected an entity reference loop at :1.
Fatal error: Detected an entity reference loop at :1.
Fatal error: Detected an entity reference loop at :1.
Fatal error: Detected an entity reference loop at :12.
C:/Development/src/libxml-ruby/test/new_main.rb:15:in `parse': Fatal error: Detected an entity reference loop at :12. (LibXML::XML::Error) from C:/Development/src/libxml-ruby/test/new_main.rb:15:in `from_xml'
        from C:/Development/src/libxml-ruby/test/new_main.rb:210

I assume that is what you are after?


Charlie

-------------------

require 'pp'
require 'libxml'


class Object
  def returning(value)
    yield(value)
    value
  end
end

class Hash
  def self.from_xml(xml)
    LibXML::XML.default_keep_blanks = false
    doc = LibXML::XML::Parser.string(xml.strip).parse
    typecast_xml_value(unrename_keys(doc))
  end

  def typecast_xml_value(value)
    case value.class.to_s
      when 'Hash'
        if value['type'] == 'array'
child_key, entries = value.detect { |k,v| k != 'type' } # child_key is throwaway
          if entries.nil? || (c = value['__content__'] && c.blank?)
            []
          else
case entries.class.to_s # something weird with classes not matching here. maybe singleton methods breaking is_a?
            when "Array"
              entries.collect { |v| typecast_xml_value(v) }
            when "Hash"
              [typecast_xml_value(entries)]
            else
              raise "can't typecast #{entries.inspect}"
            end
          end
        elsif value.has_key?("__content__")
          content = value["__content__"]
          if parser = XML_PARSING[value["type"]]
            if parser.arity == 2
              XML_PARSING[value["type"]].call(content, value)
            else
              XML_PARSING[value["type"]].call(content)
            end
          else
            content
          end
        elsif value['type'] == 'string' && value['nil'] != 'true'
          ""
        # blank or nil parsed values are represented by nil
        elsif value.blank? || value['nil'] == 'true'
          nil
        # If the type is the only element which makes it then
        # this still makes the value nil, except if type is
        # a XML node(where type['value'] is a Hash)
elsif value['type'] && value.size == 1 && !value['type'].is_a?(::Hash)
          nil
        else
          xml_value = value.inject({}) do |h,(k,v)|
            h[k] = typecast_xml_value(v)
            h
          end

# Turn { :files => { :file => #<StringIO> } into { :files => #<StringIO> } so it is compatible with
          # how multipart uploaded files from HTML appear
          xml_value["file"].is_a?(StringIO) ? xml_value["file"] : xml_value
        end
      when 'Array'
        value.map! { |i| typecast_xml_value(i) }
        case value.length
          when 0 then nil
          when 1 then value.first
          else value
        end
      when 'String'
        value
      else
        raise "can't typecast #{value.class.name} - #{value.inspect}"
    end
  end
end

module LibXML
  module Conversions
    module Document
      def to_hash
        root.to_hash
      end
    end

    module Node
      CONTENT_ROOT = '__content__'
      LIB_XML_LIMIT = 30000000 # Hardcoded LibXML limit

      # Convert XML document to hash
      #
      # hash::
      # Hash to merge the converted element into.
      def to_hash(hash={})
        if text?
          raise LibXML::XML::Error if content.length >= LIB_XML_LIMIT
          hash[CONTENT_ROOT] = (hash[CONTENT_ROOT] || '') + content
        else
          sub_hash = insert_name_into_hash(hash, name)
          attributes_to_hash(sub_hash)
          if array?
            children_array_to_hash(sub_hash)
          elsif yaml?
            children_yaml_to_hash(sub_hash)
          else
            children_to_hash(sub_hash)
          end
        end
        hash
      end

      protected

        # Insert name into hash
        #
        # hash::
        # Hash to merge the converted element into.
        # name::
        # name to to merge into hash
        def insert_name_into_hash(hash, name)
          sub_hash = {}
          if hash[name]
            if !hash[name].kind_of? Array
              hash[name] = [hash[name]]
            end
            hash[name] << sub_hash
          else
            hash[name] = sub_hash
          end
          sub_hash
        end

        # Insert children into hash
        #
        # hash::
        # Hash to merge the children into.
        def children_to_hash(hash={})
          each { |child| child.to_hash(hash) }
          attributes_to_hash(hash)
          hash
        end

        # Convert xml attributes to hash
        #
        # hash::
        # Hash to merge the attributes into
        def attributes_to_hash(hash={})
          each_attr { |attr| hash[attr.name] = attr.value }
          hash
        end

        # Convert array into hash
        #
        # hash::
        # Hash to merge the array into
        def children_array_to_hash(hash={})
          hash[child.name] = map do |child|
            returning({}) { |sub_hash| child.children_to_hash(sub_hash) }
          end
          hash
        end

        # Convert yaml into hash
        #
        # hash::
        # Hash to merge the yaml into
        def children_yaml_to_hash(hash = {})
          hash[CONTENT_ROOT] = content unless content.blank?
          hash
        end

        # Check if child is of type array
        def array?
          child? && child.next? && child.name == child.next.name
        end

        # Check if child is of type yaml
        def yaml?
          attributes.collect{|x| x.value}.include?('yaml')
        end

    end
  end
end

LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
LibXML::XML::Node.send(:include, LibXML::Conversions::Node)

 attack_xml = <<-EOT
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE member [
   <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
   <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
   <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
   <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
   <!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;">
   <!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;">
   <!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
 ]>
 <member>
 &a;
 </member>
 EOT


hash = Hash.from_xml(attack_xml)

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature

_______________________________________________
libxml-devel mailing list
libxml-devel@rubyforge.org
http://rubyforge.org/mailman/listinfo/libxml-devel

Reply via email to