Matt Grimwade created AVRO-1857:
-----------------------------------

             Summary: GenericDatumWriter.write using BufferedBinaryEncoder 
leaves ByteBuffer in indeterminate state
                 Key: AVRO-1857
                 URL: https://issues.apache.org/jira/browse/AVRO-1857
             Project: Avro
          Issue Type: Bug
          Components: java
    Affects Versions: 1.7.7
            Reporter: Matt Grimwade


Calling {{GenericDatumWriter.write(Object, Encoder)}} using a 
BufferedBinaryEncoder leaves any ByteBuffers within the object (representing 
BYTES or FIXED types) in an indeterminate state. Specifically, each buffer may 
be left either in its initial state, with (position, remaining) = (0, N) or in 
its "consumed" state of (N, 0).

Although I cannot find it documented, I believe the correct behaviour is that 
the state of the object being written should be unmodified.

This is an actual problem in our project where we save a copy of an object to 
disk before performing some action on it. This later action fails 
indeterminately because some of the ByteBuffers in the object are "consumed" 
and some are not.

I think the fault lies in 
{{org.apache.avro.io.BufferedBinaryEncoder#writeFixed(java.nio.ByteBuffer)}}, 
wherein the first branch advances the buffer's position but the second does not:

{code}
  @Override
  public void writeFixed(ByteBuffer bytes) throws IOException {
    if (!bytes.hasArray() && bytes.remaining() > bulkLimit) {
      flushBuffer();
      sink.innerWrite(bytes);                     // bypass the buffer
    } else {
      super.writeFixed(bytes);
    }
  }
{code}

Here is a failing test case:

{code}
import static java.util.Arrays.asList;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.junit.Test;

public class BugTest {
    private static final int ENCODER_BUFFER_SIZE = 32;
    private static final int EXAMPLE_DATA_SIZE = 17;

    @Test
    public void testArrayBackedByteBuffer() throws IOException {
        ByteBuffer buffer = ByteBuffer.wrap(someBytes(EXAMPLE_DATA_SIZE));

        doTest(buffer);
    }

    @Test
    public void testMappedByteBuffer() throws IOException {
        Path file = Files.createTempFile("test", "data");
        Files.write(file, someBytes(EXAMPLE_DATA_SIZE));
        MappedByteBuffer buffer = FileChannel.open(file, 
StandardOpenOption.READ).map(FileChannel.MapMode.READ_ONLY, 0, 
EXAMPLE_DATA_SIZE);

        doTest(buffer);
    }

    private static void doTest(ByteBuffer buffer) throws IOException {
        assertThat(asList(buffer.position(), buffer.remaining()), is(asList(0, 
EXAMPLE_DATA_SIZE)));

        ByteArrayOutputStream output = new 
ByteArrayOutputStream(EXAMPLE_DATA_SIZE * 2);
        EncoderFactory encoderFactory = new EncoderFactory();
        encoderFactory.configureBufferSize(ENCODER_BUFFER_SIZE);

        Encoder encoder = encoderFactory.binaryEncoder(output, null);
        new 
GenericDatumWriter<ByteBuffer>(Schema.create(Schema.Type.BYTES)).write(buffer, 
encoder);
        encoder.flush();

        assertThat(output.toByteArray(), 
equalTo(avroEncoded(someBytes(EXAMPLE_DATA_SIZE))));

        assertThat(asList(buffer.position(), buffer.remaining()), is(asList(0, 
EXAMPLE_DATA_SIZE))); // fails if buffer is not array-backed and buffer 
overflow occurs
    }

    private static byte[] someBytes(int size) {
        byte[] result = new byte[size];
        for (int i = 0; i < size; i++) {
            result[i] = (byte) i;
        }
        return result;
    }

    private static byte[] avroEncoded(byte[] bytes) {
        assert bytes.length < 64;
        byte[] result = new byte[1 + bytes.length];
        result[0] = (byte) (bytes.length * 2); // zig-zag encoding
        System.arraycopy(bytes, 0, result, 1, bytes.length);
        return result;
    }
}
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to