[ 
https://issues.apache.org/jira/browse/AVRO-1857?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nandor Kollar updated AVRO-1857:
--------------------------------
    Status: Patch Available  (was: Open)

> GenericDatumWriter.write using BufferedBinaryEncoder leaves ByteBuffer in 
> indeterminate state
> ---------------------------------------------------------------------------------------------
>
>                 Key: AVRO-1857
>                 URL: https://issues.apache.org/jira/browse/AVRO-1857
>             Project: Avro
>          Issue Type: Bug
>          Components: java
>    Affects Versions: 1.7.7
>            Reporter: Matt Grimwade
>            Assignee: Nandor Kollar
>
> Calling {{GenericDatumWriter.write(Object, Encoder)}} using a 
> BufferedBinaryEncoder leaves any ByteBuffers within the object (representing 
> BYTES or FIXED types) in an indeterminate state. Specifically, each buffer 
> may be left either in its initial state, with (position, remaining) = (0, N) 
> or in its "consumed" state of (N, 0).
> Although I cannot find it documented, I believe the correct behaviour is that 
> the state of the object being written should be unmodified.
> This is an actual problem in our project where we save a copy of an object to 
> disk before performing some action on it. This later action fails 
> indeterminately because some of the ByteBuffers in the object are "consumed" 
> and some are not.
> I think the fault lies in 
> {{org.apache.avro.io.BufferedBinaryEncoder#writeFixed(java.nio.ByteBuffer)}}, 
> wherein the first branch advances the buffer's position but the second does 
> not:
> {code}
>   @Override
>   public void writeFixed(ByteBuffer bytes) throws IOException {
>     if (!bytes.hasArray() && bytes.remaining() > bulkLimit) {
>       flushBuffer();
>       sink.innerWrite(bytes);                     // bypass the buffer
>     } else {
>       super.writeFixed(bytes);
>     }
>   }
> {code}
> Here is a failing test case:
> {code}
> import static java.util.Arrays.asList;
> import static org.hamcrest.Matchers.equalTo;
> import static org.hamcrest.Matchers.is;
> import static org.junit.Assert.assertThat;
> import java.io.ByteArrayOutputStream;
> import java.io.IOException;
> import java.nio.ByteBuffer;
> import java.nio.MappedByteBuffer;
> import java.nio.channels.FileChannel;
> import java.nio.file.Files;
> import java.nio.file.Path;
> import java.nio.file.StandardOpenOption;
> import org.apache.avro.Schema;
> import org.apache.avro.generic.GenericDatumWriter;
> import org.apache.avro.io.Encoder;
> import org.apache.avro.io.EncoderFactory;
> import org.junit.Test;
> public class BugTest {
>     private static final int ENCODER_BUFFER_SIZE = 32;
>     private static final int EXAMPLE_DATA_SIZE = 17;
>     @Test
>     public void testArrayBackedByteBuffer() throws IOException {
>         ByteBuffer buffer = ByteBuffer.wrap(someBytes(EXAMPLE_DATA_SIZE));
>         doTest(buffer);
>     }
>     @Test
>     public void testMappedByteBuffer() throws IOException {
>         Path file = Files.createTempFile("test", "data");
>         Files.write(file, someBytes(EXAMPLE_DATA_SIZE));
>         MappedByteBuffer buffer = FileChannel.open(file, 
> StandardOpenOption.READ).map(FileChannel.MapMode.READ_ONLY, 0, 
> EXAMPLE_DATA_SIZE);
>         doTest(buffer);
>     }
>     private static void doTest(ByteBuffer buffer) throws IOException {
>         assertThat(asList(buffer.position(), buffer.remaining()), 
> is(asList(0, EXAMPLE_DATA_SIZE)));
>         ByteArrayOutputStream output = new 
> ByteArrayOutputStream(EXAMPLE_DATA_SIZE * 2);
>         EncoderFactory encoderFactory = new EncoderFactory();
>         encoderFactory.configureBufferSize(ENCODER_BUFFER_SIZE);
>         Encoder encoder = encoderFactory.binaryEncoder(output, null);
>         new 
> GenericDatumWriter<ByteBuffer>(Schema.create(Schema.Type.BYTES)).write(buffer,
>  encoder);
>         encoder.flush();
>         assertThat(output.toByteArray(), 
> equalTo(avroEncoded(someBytes(EXAMPLE_DATA_SIZE))));
>         assertThat(asList(buffer.position(), buffer.remaining()), 
> is(asList(0, EXAMPLE_DATA_SIZE))); // fails if buffer is not array-backed and 
> buffer overflow occurs
>     }
>     private static byte[] someBytes(int size) {
>         byte[] result = new byte[size];
>         for (int i = 0; i < size; i++) {
>             result[i] = (byte) i;
>         }
>         return result;
>     }
>     private static byte[] avroEncoded(byte[] bytes) {
>         assert bytes.length < 64;
>         byte[] result = new byte[1 + bytes.length];
>         result[0] = (byte) (bytes.length * 2); // zig-zag encoding
>         System.arraycopy(bytes, 0, result, 1, bytes.length);
>         return result;
>     }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to