Paul Dlug created AVRO-1134:
-------------------------------

             Summary: Ruby datafile serialization fails with UTF-8 characters
                 Key: AVRO-1134
                 URL: https://issues.apache.org/jira/browse/AVRO-1134
             Project: Avro
          Issue Type: Bug
          Components: ruby
    Affects Versions: 1.7.1
         Environment: Linux and Mac OS X tested, identical on both.
            Reporter: Paul Dlug


When trying to deserialize a data file containing a string with UTF-8 
characters the ruby avro client fails with a variety of errors (error message 
varies with each run, see below). The attached script can be used to replicate 
this problem. Changing the type in the schema between bytes and string doesn't 
make a difference.

{code}
% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:46:in `byte!': undefined 
method `unpack' for nil:NilClass (NoMethodError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:63:in `read_long'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:380:in `read_union'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:316:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:46:in `byte!': undefined 
method `unpack' for nil:NilClass (NoMethodError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:63:in `read_long'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:380:in `read_union'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:316:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:46:in `byte!': undefined 
method `unpack' for nil:NilClass (NoMethodError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:63:in `read_long'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:380:in `read_union'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:316:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read': failed to 
allocate memory (NoMemoryError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:93:in `read_bytes'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:100:in `read_string'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:306:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:229:in `match_schemas': 
undefined method `type' for nil:NilClass (NoMethodError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:287:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:383:in `read_union'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:316:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read': negative length 
-7638 given (ArgumentError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:93:in `read_bytes'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:100:in `read_string'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:306:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read': negative length 
-50 given (ArgumentError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:93:in `read_bytes'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:100:in `read_string'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:306:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:46:in `byte!': undefined 
method `unpack' for nil:NilClass (NoMethodError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:63:in `read_long'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:380:in `read_union'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:316:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read': negative length 
-47 given (ArgumentError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:105:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:93:in `read_bytes'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:100:in `read_string'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:306:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'

% ruby avro_utf8_test.rb
{"id"=>"works", "data"=>"2x2"}
{"id"=>"broken", "data"=>"2\xC3\x97"}
vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:229:in `match_schemas': 
undefined method `type' for nil:NilClass (NoMethodError)
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:287:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:383:in `read_union'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:316:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:391:in `block in 
read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:389:in `read_record'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:317:in `read_data'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/io.rb:282:in `read'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:223:in `block in 
each'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `loop'
  from vendor/ruby/1.9.1/gems/avro-1.7.0/lib/avro/data_file.rb:211:in `each'
  from avro_utf8_test.rb:29:in `<main>'
{code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Reply via email to