[
https://issues.apache.org/jira/browse/AVRO-1650?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14369997#comment-14369997
]
Rob Turner commented on AVRO-1650:
----------------------------------
I suspect it is an character encoding issue as the Avro bytes generated are
converted to a String using the default character encoding which is probably
UTF-8 and then converted back to bytes using the same character encoding. This
does not preserve the original bytes in general as some byte values are not
valid UTF-8. You can use the ISO-8859-1 encoding on the new String(bytes,
"ISO-8859-1") and binaryString.getBytes("ISO-8859-1") or better still just use
the bytes variable in the call to binaryDecoder.
> Avro deserialization fails depending on the value of integer/long fields
> ------------------------------------------------------------------------
>
> Key: AVRO-1650
> URL: https://issues.apache.org/jira/browse/AVRO-1650
> Project: Avro
> Issue Type: Bug
> Reporter: Sachin Goyal
>
> Here is a test that fails depending on the value of the zipCode integer.
> {code}
> public class TestBinaryDecoderSeparateSchema {
> @Test
> public void checkAvroWithoutEmbeddedSchema () throws Exception {
> log ("\n\n\nBeginning without-schema\n");
> Person datum = new Person();
> ReflectData rdata = ReflectData.AllowNull.get();
> Schema schema = rdata.getSchema(Person.class);
> // Write avro as binary
> ByteArrayOutputStream baos = new ByteArrayOutputStream();
> DatumWriter<Person> dout = new ReflectDatumWriter<Person>(Person.class,
> rdata);
> Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null);
> dout.write(datum, encoder);
> encoder.flush();
> byte[] bytes = baos.toByteArray();
> String binaryString = new String (bytes);
> log (binaryString);
> // Read avro binary string into GenericRecord
> BinaryDecoder decoder =
> DecoderFactory.get().binaryDecoder(binaryString.getBytes(), null);
> GenericDatumReader<GenericRecord> datumReader = new
> GenericDatumReader<GenericRecord> ();
> datumReader.setSchema(schema);
> GenericRecord record = datumReader.read(null, decoder);
> log ("Read zipCode = " + record.get("zipCode"));
> }
> @Test
> public void checkAvroWithEmbeddedSchema () throws Exception {
> log ("\n\n\nBeginning with-schema\n");
> Person datum = new Person();
> ReflectData rdata = ReflectData.AllowNull.get();
> Schema schema = rdata.getSchema(Person.class);
> // Write avro with embedded schema
> ByteArrayOutputStream baos = new ByteArrayOutputStream();
> ReflectDatumWriter<Person> dout = new ReflectDatumWriter<Person>
> (Person.class, rdata);
> DataFileWriter<Person> fileWriter = new DataFileWriter<Person> (dout);
> fileWriter.create(schema, baos);
> fileWriter.append(datum);
> fileWriter.close();
> byte[] bytes = baos.toByteArray();
> String binaryString = new String (bytes);
> log (binaryString);
> // Read avro with embedded schema
> GenericDatumReader<GenericRecord> datumReader = new
> GenericDatumReader<GenericRecord> ();
> SeekableByteArrayInput avroInputStream = new
> SeekableByteArrayInput(bytes);
> DataFileReader<GenericRecord> fileReader =
> new DataFileReader<GenericRecord>(avroInputStream, datumReader);
> schema = fileReader.getSchema();
> GenericRecord record = null;
> List<GenericRecord> records = new ArrayList<GenericRecord> ();
> while (fileReader.hasNext())
> records.add (fileReader.next(record));
> log ("Read " + records.size() + " records");
> log ("Read zipCode = " + records.get(0).get("zipCode"));
> }
> private static class Person {
> Integer zipCode = 90900;
> }
> private static void log (String s) {
> System.out.println (s);
> }
> }
> {code}
> \\
> *Issues:*
> # zipCode = 1, no exception but data zipCode is readWrong
> # zipCode = 90900, exception in checkAvroWithoutEmbeddedSchema()
> {color:red}
> java.io.IOException: Invalid int encoding
> at org.apache.avro.io.BinaryDecoder.readInt(BinaryDecoder.java:145)
> at org.apache.avro.io.ValidatingDecoder.readInt(ValidatingDecoder.java:83)
> at
> org.apache.avro.generic.GenericDatumReader.readInt(GenericDatumReader.java:444)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:159)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:155)
> at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:193)
> at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:183)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:151)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:155)
> at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:193)
> at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:183)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:151)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:142)
> at
> org.apache.avro.io.TestBinaryDecoderSeparateSchema.checkAvroWithoutEmbeddedSchema(TestBinaryDecoderSeparateSchema.java:68)
> {color}
> \\
> Am I even supposed to read/write like the way shown in
> checkAvroWithoutEmbeddedSchema()?
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)