lidavidm commented on code in PR #33719:
URL: https://github.com/apache/arrow/pull/33719#discussion_r1072206925


##########
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java:
##########
@@ -116,4 +118,63 @@ public List<ArrowBlock> getRecordBlocks() {
   public List<ArrowBlock> getDictionaryBlocks() {
     return dictionaryBlocks;
   }
+
+  /**
+   * Class to configure builder pattern for ArrowWriter that writes out a 
Arrow files.
+   */
+  public static class Builder extends ArrowWriter.Builder<Builder> {
+    private DictionaryProvider provider = null;
+    private IpcOption option = IpcOption.DEFAULT;
+    private Map<String, String> metaData = Collections.emptyMap();
+    private int level = -1;
+
+    public Builder(VectorSchemaRoot root, WritableByteChannel out) {
+      super(root, out);
+    }
+
+    @Override
+    public ArrowFileWriter build() {
+      ArrowFileWriter writer = new ArrowFileWriter(this);
+      writer.initialize();
+      return writer;
+    }
+
+    @Override
+    protected Builder self() {
+      return this;
+    }
+
+    public Builder setProvider(DictionaryProvider provider) {
+      this.provider = provider;
+      return this;
+    }
+
+    public Builder setOption(IpcOption option) {
+      this.option = option;
+      return this;
+    }
+
+    public Builder setMetaData(Map<String, String> metaData) {
+      this.metaData = metaData;
+      return this;
+    }
+
+    public Builder setLevel(int level) {
+      this.level = level;
+      return this;
+    }
+
+    @Override
+    public Builder setCodec(CompressionCodec codec) {
+      return super.setCodec(codec);
+    }
+  }
+
+  private ArrowFileWriter(Builder builder) {

Review Comment:
   I would rather you call one of the existing constructors instead of tying 
the builder/class so closely together, and that will avoid needing the separate 
initialize() call



##########
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java:
##########
@@ -116,4 +118,63 @@ public List<ArrowBlock> getRecordBlocks() {
   public List<ArrowBlock> getDictionaryBlocks() {
     return dictionaryBlocks;
   }
+
+  /**
+   * Class to configure builder pattern for ArrowWriter that writes out a 
Arrow files.
+   */
+  public static class Builder extends ArrowWriter.Builder<Builder> {
+    private DictionaryProvider provider = null;
+    private IpcOption option = IpcOption.DEFAULT;
+    private Map<String, String> metaData = Collections.emptyMap();
+    private int level = -1;
+
+    public Builder(VectorSchemaRoot root, WritableByteChannel out) {
+      super(root, out);
+    }
+
+    @Override
+    public ArrowFileWriter build() {
+      ArrowFileWriter writer = new ArrowFileWriter(this);
+      writer.initialize();
+      return writer;
+    }
+
+    @Override
+    protected Builder self() {
+      return this;
+    }
+
+    public Builder setProvider(DictionaryProvider provider) {

Review Comment:
   Add docstrings?



##########
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java:
##########
@@ -77,23 +78,32 @@ protected ArrowWriter(VectorSchemaRoot root, 
DictionaryProvider provider, Writab
    * @param option   IPC write options
    */
   protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, 
WritableByteChannel out, IpcOption option) {
-    this.unloader = new VectorUnloader(root);
+    this.root = root;
+    this.unloader = new VectorUnloader(this.root);
     this.out = new WriteChannel(out);
     this.option = option;
+    this.provider = provider;
+
+    initialize();
+  }
 
-    List<Field> fields = new ArrayList<>(root.getSchema().getFields().size());
+  /**
+   * Initialize object needed as part of Arrow Writer functionalities.
+   */
+  public void initialize() {

Review Comment:
   As noted above, I'd rather not have this, but if we do  have to have it, it 
should be protected at most.



##########
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java:
##########
@@ -116,4 +118,63 @@ public List<ArrowBlock> getRecordBlocks() {
   public List<ArrowBlock> getDictionaryBlocks() {
     return dictionaryBlocks;
   }
+
+  /**
+   * Class to configure builder pattern for ArrowWriter that writes out a 
Arrow files.
+   */
+  public static class Builder extends ArrowWriter.Builder<Builder> {
+    private DictionaryProvider provider = null;
+    private IpcOption option = IpcOption.DEFAULT;
+    private Map<String, String> metaData = Collections.emptyMap();
+    private int level = -1;
+
+    public Builder(VectorSchemaRoot root, WritableByteChannel out) {
+      super(root, out);
+    }
+
+    @Override
+    public ArrowFileWriter build() {
+      ArrowFileWriter writer = new ArrowFileWriter(this);
+      writer.initialize();

Review Comment:
   Add try-catch here so we close the writer on exception



##########
java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriterBuilder.java:
##########
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static 
org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TestUtils;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestArrowReaderWriterBuilder {
+
+  private BufferAllocator allocator;
+
+  private VarCharVector dictionaryVector1;
+  private StructVector dictionaryVector4;
+
+  private Dictionary dictionary1;
+  private Dictionary dictionary4;
+
+  @Before
+  public void init() {
+    allocator = new RootAllocator(Long.MAX_VALUE);
+
+    dictionaryVector1 = newVarCharVector("D1", allocator);
+    setVector(dictionaryVector1,
+        "foo".getBytes(StandardCharsets.UTF_8),
+        "bar".getBytes(StandardCharsets.UTF_8),
+        "baz".getBytes(StandardCharsets.UTF_8));
+
+    dictionaryVector4 = newVector(StructVector.class, "D4", MinorType.STRUCT, 
allocator);
+    final Map<String, List<Integer>> dictionaryValues4 = new HashMap<>();
+    dictionaryValues4.put("a", Arrays.asList(1, 2, 3));
+    dictionaryValues4.put("b", Arrays.asList(4, 5, 6));
+    setVector(dictionaryVector4, dictionaryValues4);
+
+    dictionary1 = new Dictionary(dictionaryVector1,
+        new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, 
/*indexType=*/null));
+    dictionary4 = new Dictionary(dictionaryVector4,
+        new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, 
/*indexType=*/null));
+  }
+
+  @After
+  public void terminate() throws Exception {
+    dictionaryVector1.close();
+    dictionaryVector4.close();
+    allocator.close();
+  }
+
+  ArrowBuf buf(byte[] bytes) {
+    ArrowBuf buffer = allocator.buffer(bytes.length);
+    buffer.writeBytes(bytes);
+    return buffer;
+  }
+
+  byte[] array(ArrowBuf buf) {
+    byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())];
+    buf.readBytes(bytes);
+    return bytes;
+  }
+
+  @Test
+  public void test() throws IOException {

Review Comment:
   Please use a more descriptive name than 'test'.



##########
java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriterBuilder.java:
##########
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static 
org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TestUtils;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestArrowReaderWriterBuilder {

Review Comment:
   This is a copy-paste of another test? I'd rather we just have a smaller set 
of tests focusing on the builder, not a copy-paste of every test.



##########
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecWriteAndRead.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)

Review Comment:
   And this test is redundant with tests added in #15223, I believe.



##########
java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java:
##########
@@ -97,6 +124,27 @@ private void write(FieldVector parent, File file, 
OutputStream outStream) throws
     }
   }
 
+  private void writeThruBuilderPattern(FieldVector parent, File file, 
OutputStream outStream) throws IOException {
+    VectorSchemaRoot root = new VectorSchemaRoot(parent);
+
+    try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+         ArrowFileWriter arrowWriter = new ArrowFileWriter.Builder(root, 
fileOutputStream.getChannel()).build()) {
+      LOGGER.debug("writing schema: " + root.getSchema());

Review Comment:
   Unnecessary debug?



##########
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecWriteAndRead.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)

Review Comment:
   For new tests, can we try to use JUnit 5?



##########
java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java:
##########
@@ -122,6 +122,53 @@ private void testWriteLargeArrowData(boolean streamMode) 
throws IOException {
     assertTrue(new File(FILE_NAME).exists());
   }
 
+  private void testWriteLargeArrowDataThruBuilderPattern(boolean streamMode) 
throws IOException {

Review Comment:
   I don't think we need to duplicate every test using the builder pattern, 
especially if the builder is implemented without having to add new constructors.



##########
java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriterBuilder.java:
##########
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static 
org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TestUtils;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestArrowReaderWriterBuilder {
+
+  private BufferAllocator allocator;
+
+  private VarCharVector dictionaryVector1;
+  private StructVector dictionaryVector4;
+
+  private Dictionary dictionary1;
+  private Dictionary dictionary4;
+
+  @Before

Review Comment:
   For new tests, can we try to use JUnit 5?



##########
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java:
##########
@@ -207,4 +217,34 @@ public void close() {
       throw new RuntimeException(e);
     }
   }
+
+  /**
+   * Abstract class to configure builder pattern for implementing Arrow 
writers for IPC over a WriteChannel.
+   * @param <T> Recursive type parameter that allow method chaining tp work 
properly in subclasses
+   */
+  abstract static class Builder<T extends Builder<T>> {
+    private final VectorSchemaRoot root;
+    private final WriteChannel out;
+    private CompressionCodec codec = NoCompressionCodec.INSTANCE;
+
+    abstract ArrowWriter build();
+
+    protected abstract T self();
+
+    public Builder(VectorSchemaRoot root, WritableByteChannel out) {
+      this.root = root;
+      this.out = new WriteChannel(out);
+    }
+
+    public T setCodec(CompressionCodec codec) {
+      this.codec = codec;
+      return self();
+    }
+  }
+
+  protected ArrowWriter(Builder<?> builder) {

Review Comment:
   Ditto here: I'd rather remove this. `build` is abstract so IMO there is no 
benefit to be had with a common constructor here.



##########
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java:
##########
@@ -116,4 +118,63 @@ public List<ArrowBlock> getRecordBlocks() {
   public List<ArrowBlock> getDictionaryBlocks() {
     return dictionaryBlocks;
   }
+
+  /**
+   * Class to configure builder pattern for ArrowWriter that writes out a 
Arrow files.
+   */
+  public static class Builder extends ArrowWriter.Builder<Builder> {
+    private DictionaryProvider provider = null;
+    private IpcOption option = IpcOption.DEFAULT;
+    private Map<String, String> metaData = Collections.emptyMap();
+    private int level = -1;
+
+    public Builder(VectorSchemaRoot root, WritableByteChannel out) {
+      super(root, out);
+    }
+
+    @Override
+    public ArrowFileWriter build() {
+      ArrowFileWriter writer = new ArrowFileWriter(this);
+      writer.initialize();
+      return writer;
+    }
+
+    @Override
+    protected Builder self() {
+      return this;
+    }
+
+    public Builder setProvider(DictionaryProvider provider) {
+      this.provider = provider;
+      return this;
+    }
+
+    public Builder setOption(IpcOption option) {
+      this.option = option;
+      return this;
+    }
+
+    public Builder setMetaData(Map<String, String> metaData) {
+      this.metaData = metaData;
+      return this;
+    }
+
+    public Builder setLevel(int level) {
+      this.level = level;

Review Comment:
   What is 'level' for? Compression? In that case make it compressionLevel



##########
java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriterBuilder.java:
##########
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static 
org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TestUtils;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestArrowReaderWriterBuilder {
+
+  private BufferAllocator allocator;
+
+  private VarCharVector dictionaryVector1;
+  private StructVector dictionaryVector4;
+
+  private Dictionary dictionary1;
+  private Dictionary dictionary4;
+
+  @Before
+  public void init() {
+    allocator = new RootAllocator(Long.MAX_VALUE);
+
+    dictionaryVector1 = newVarCharVector("D1", allocator);
+    setVector(dictionaryVector1,
+        "foo".getBytes(StandardCharsets.UTF_8),
+        "bar".getBytes(StandardCharsets.UTF_8),
+        "baz".getBytes(StandardCharsets.UTF_8));
+
+    dictionaryVector4 = newVector(StructVector.class, "D4", MinorType.STRUCT, 
allocator);
+    final Map<String, List<Integer>> dictionaryValues4 = new HashMap<>();
+    dictionaryValues4.put("a", Arrays.asList(1, 2, 3));
+    dictionaryValues4.put("b", Arrays.asList(4, 5, 6));
+    setVector(dictionaryVector4, dictionaryValues4);
+
+    dictionary1 = new Dictionary(dictionaryVector1,
+        new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, 
/*indexType=*/null));
+    dictionary4 = new Dictionary(dictionaryVector4,
+        new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, 
/*indexType=*/null));
+  }
+
+  @After
+  public void terminate() throws Exception {
+    dictionaryVector1.close();
+    dictionaryVector4.close();
+    allocator.close();
+  }
+
+  ArrowBuf buf(byte[] bytes) {
+    ArrowBuf buffer = allocator.buffer(bytes.length);
+    buffer.writeBytes(bytes);
+    return buffer;
+  }
+
+  byte[] array(ArrowBuf buf) {
+    byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())];
+    buf.readBytes(bytes);
+    return bytes;
+  }
+
+  @Test
+  public void test() throws IOException {
+    Schema schema = new Schema(asList(new Field("testField", 
FieldType.nullable(new ArrowType.Int(8, true)),
+        Collections.<Field>emptyList())));
+    ArrowType type = schema.getFields().get(0).getType();
+    FieldVector vector = TestUtils.newVector(FieldVector.class, "testField", 
type, allocator);
+    
vector.initializeChildrenFromFields(schema.getFields().get(0).getChildren());
+
+    byte[] validity = new byte[] {(byte) 255, 0};
+    // second half is "undefined"
+    byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 
15, 16};
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), 
asList(vector), 16);
+         ArrowFileWriter writer = new ArrowFileWriter.Builder(root, 
newChannel(out)).build()) {
+      ArrowBuf validityb = buf(validity);

Review Comment:
   Can't we just use the normal Vector interfaces?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to