Author: martinkl
Date: Thu Jan 21 15:27:56 2016
New Revision: 1725987
URL: http://svn.apache.org/viewvc?rev=1725987&view=rev
Log:
AVRO-1783. Ruby: Ensure correct binary encoding for byte strings.
Modified:
avro/branches/branch-1.8/CHANGES.txt
avro/branches/branch-1.8/lang/ruby/lib/avro/io.rb
avro/branches/branch-1.8/lang/ruby/lib/avro/ipc.rb
avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb
avro/branches/branch-1.8/lang/ruby/test/test_io.rb
Modified: avro/branches/branch-1.8/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.8/CHANGES.txt?rev=1725987&r1=1725986&r2=1725987&view=diff
==============================================================================
--- avro/branches/branch-1.8/CHANGES.txt (original)
+++ avro/branches/branch-1.8/CHANGES.txt Thu Jan 21 15:27:56 2016
@@ -266,6 +266,9 @@ Avro 1.8.0 (15 December 2015)
AVRO-1775. Ruby: Use test-unit gem for running tests. (martinkl)
+ AVRO-1783. Ruby: Ensure correct binary encoding for byte strings.
+ (martinkl)
+
Avro 1.7.7 (23 July 2014)
NEW FEATURES
Modified: avro/branches/branch-1.8/lang/ruby/lib/avro/io.rb
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/lib/avro/io.rb?rev=1725987&r1=1725986&r2=1725987&view=diff
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/lib/avro/io.rb (original)
+++ avro/branches/branch-1.8/lang/ruby/lib/avro/io.rb Thu Jan 21 15:27:56 2016
@@ -209,7 +209,7 @@ module Avro
# A string is encoded as a long followed by that many bytes of
# UTF-8 encoded character data
def write_string(datum)
- # FIXME utf-8 encode this in 1.9
+ datum = datum.encode('utf-8') if datum.respond_to? :encode
write_bytes(datum)
end
Modified: avro/branches/branch-1.8/lang/ruby/lib/avro/ipc.rb
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/lib/avro/ipc.rb?rev=1725987&r1=1725986&r2=1725987&view=diff
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/lib/avro/ipc.rb (original)
+++ avro/branches/branch-1.8/lang/ruby/lib/avro/ipc.rb Thu Jan 21 15:27:56 2016
@@ -100,7 +100,7 @@ module Avro::IPC
def request(message_name, request_datum)
# Writes a request message and reads a response or error message.
# build handshake and call request
- buffer_writer = StringIO.new('', 'w+')
+ buffer_writer = StringIO.new(''.force_encoding('BINARY'))
buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
write_handshake_request(buffer_encoder)
write_call_request(message_name, request_datum, buffer_encoder)
@@ -244,7 +244,7 @@ module Avro::IPC
# a response or error. Compare to 'handle()' in Thrift.
def respond(call_request, transport=nil)
buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
- buffer_writer = StringIO.new('', 'w+')
+ buffer_writer = StringIO.new(''.force_encoding('BINARY'))
buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
error = nil
response_metadata = {}
@@ -294,6 +294,7 @@ module Avro::IPC
end
rescue Avro::AvroError => e
error = AvroRemoteException.new(e.to_s)
+ # TODO does the stuff written here ever get used?
buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new)
META_WRITER.write(response_metadata, buffer_encoder)
buffer_encoder.write_boolean(true)
@@ -393,7 +394,7 @@ module Avro::IPC
def read_framed_message
message = []
loop do
- buffer = StringIO.new
+ buffer = StringIO.new(''.force_encoding('BINARY'))
buffer_length = read_buffer_length
if buffer_length == 0
return message.join
@@ -410,7 +411,7 @@ module Avro::IPC
end
def write_framed_message(message)
- message_length = message.size
+ message_length = message.bytesize
total_bytes_sent = 0
while message_length - total_bytes_sent > 0
if message_length - total_bytes_sent > BUFFER_SIZE
@@ -426,7 +427,7 @@ module Avro::IPC
end
def write_buffer(chunk)
- buffer_length = chunk.size
+ buffer_length = chunk.bytesize
write_buffer_length(buffer_length)
total_bytes_sent = 0
while total_bytes_sent < buffer_length
@@ -467,7 +468,7 @@ module Avro::IPC
end
def write_framed_message(message)
- message_size = message.size
+ message_size = message.bytesize
total_bytes_sent = 0
while message_size - total_bytes_sent > 0
if message_size - total_bytes_sent > BUFFER_SIZE
@@ -485,7 +486,7 @@ module Avro::IPC
private
def write_buffer(chunk)
- buffer_size = chunk.size
+ buffer_size = chunk.bytesize
write_buffer_size(buffer_size)
writer << chunk
end
@@ -505,13 +506,13 @@ module Avro::IPC
def read_framed_message
message = []
loop do
- buffer = ""
+ buffer = ''.force_encoding('BINARY')
buffer_size = read_buffer_size
return message.join if buffer_size == 0
- while buffer.size < buffer_size
- chunk = reader.read(buffer_size - buffer.size)
+ while buffer.bytesize < buffer_size
+ chunk = reader.read(buffer_size - buffer.bytesize)
chunk_error?(chunk)
buffer << chunk
end
@@ -541,7 +542,7 @@ module Avro::IPC
end
def transceive(message)
- writer = FramedWriter.new(StringIO.new)
+ writer = FramedWriter.new(StringIO.new(''.force_encoding('BINARY')))
writer.write_framed_message(message)
resp = @conn.post('/', writer.to_s, {'Content-Type' => 'avro/binary'})
FramedReader.new(StringIO.new(resp.body)).read_framed_message
Modified: avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb?rev=1725987&r1=1725986&r2=1725987&view=diff
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb (original)
+++ avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb Thu Jan 21 15:27:56
2016
@@ -108,7 +108,7 @@ module Avro
when :float, :double
datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
when :fixed
- datum.is_a?(String) && datum.size == expected_schema.size
+ datum.is_a?(String) && datum.bytesize == expected_schema.size
when :enum
expected_schema.symbols.include? datum
when :array
Modified: avro/branches/branch-1.8/lang/ruby/test/test_io.rb
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/test/test_io.rb?rev=1725987&r1=1725986&r2=1725987&view=diff
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/test/test_io.rb (original)
+++ avro/branches/branch-1.8/lang/ruby/test/test_io.rb Thu Jan 21 15:27:56 2016
@@ -210,6 +210,51 @@ EOS
end
end
+ def test_utf8_string_encoding
+ [
+ "\xC3".force_encoding('ISO-8859-1'),
+ "\xC3\x83".force_encoding('UTF-8')
+ ].each do |value|
+ output = ''.force_encoding('BINARY')
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
+ datum_writer.write(value, encoder)
+
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
+ end
+ end
+
+ def test_bytes_encoding
+ [
+ "\xC3\x83".force_encoding('BINARY'),
+ "\xC3\x83".force_encoding('ISO-8859-1'),
+ "\xC3\x83".force_encoding('UTF-8')
+ ].each do |value|
+ output = ''.force_encoding('BINARY')
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
+ datum_writer.write(value, encoder)
+
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
+ end
+ end
+
+ def test_fixed_encoding
+ [
+ "\xC3\x83".force_encoding('BINARY'),
+ "\xC3\x83".force_encoding('ISO-8859-1'),
+ "\xC3\x83".force_encoding('UTF-8')
+ ].each do |value|
+ output = ''.force_encoding('BINARY')
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+ schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
+ datum_writer.write(value, encoder)
+
+ assert_equal "\xc3\x83".force_encoding('BINARY'), output
+ end
+ end
+
def test_skip_long
for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
value_to_read = 6253