lidavidm commented on a change in pull request #136:
URL: https://github.com/apache/arrow-cookbook/pull/136#discussion_r801970972
##########
File path: java/source/io.rst
##########
@@ -298,14 +303,56 @@ Read - From File
ageVector.set(1, 20);
ageVector.set(2, 30);
vectorSchemaRoot.setRowCount(3);
+ Field name2 = new Field("name2", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age2 = new Field("age2", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson2 = new Schema(asList(name2, age2));
+ VectorSchemaRoot vectorSchemaRoot2 =
VectorSchemaRoot.create(schemaPerson2, rootAllocator);
+ VarCharVector nameVector2 = (VarCharVector)
vectorSchemaRoot2.getVector("name2");
+ nameVector2.allocateNew(3);
+ nameVector2.set(0, "Nidia".getBytes());
+ nameVector2.set(1, "Alexa".getBytes());
+ nameVector2.set(2, "Mara".getBytes());
+ IntVector ageVector2 = (IntVector) vectorSchemaRoot2.getVector("age2");
+ ageVector2.allocateNew(3);
+ ageVector2.set(0, 15);
+ ageVector2.set(1, 20);
+ ageVector2.set(2, 15);
+ vectorSchemaRoot2.setRowCount(3);
+ Field name3 = new Field("name3", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age3 = new Field("age3", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson3 = new Schema(asList(name3, age3));
+ VectorSchemaRoot vectorSchemaRoot3 =
VectorSchemaRoot.create(schemaPerson3, rootAllocator);
+ VarCharVector nameVector3 = (VarCharVector)
vectorSchemaRoot3.getVector("name3");
+ nameVector3.allocateNew(3);
+ nameVector3.set(0, "Raul".getBytes());
+ nameVector3.set(1, "Jhon".getBytes());
+ nameVector3.set(2, "Thomy".getBytes());
+ IntVector ageVector3 = (IntVector) vectorSchemaRoot3.getVector("age3");
+ ageVector3.allocateNew(3);
+ ageVector3.set(0, 34);
+ ageVector3.set(1, 29);
+ ageVector3.set(2, 33);
+ vectorSchemaRoot3.setRowCount(3);
File file = new File("randon_access_to_file.arrow");
try (FileOutputStream fileOutputStream = new FileOutputStream(file);
ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, null,
/*WritableByteChannel out*/fileOutputStream.getChannel())
){
// write
writer.start();
- for (int i=0; i<10; i++){
+ for (int i=0; i<3; i++){
// Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ if (i==1){
+ VectorUnloader vectorUnloader2 = new
VectorUnloader(vectorSchemaRoot2);
+ ArrowRecordBatch arrowRecordBatch2 =
vectorUnloader2.getRecordBatch();
+ VectorLoader vectorLoader2 = new
VectorLoader(vectorSchemaRoot);
+ vectorLoader2.load(arrowRecordBatch2);
+ }
+ if (i==2){
+ VectorUnloader vectorUnloader3 = new
VectorUnloader(vectorSchemaRoot3);
+ ArrowRecordBatch arrowRecordBatch3 =
vectorUnloader3.getRecordBatch();
+ VectorLoader vectorLoader3 = new
VectorLoader(vectorSchemaRoot);
+ vectorLoader3.load(arrowRecordBatch3);
+ }
Review comment:
At this point, just don't use the loop.
##########
File path: java/source/io.rst
##########
@@ -0,0 +1,610 @@
+.. _arrow-io:
+
+========================
+Reading and writing data
+========================
+
+The `Arrow IPC format <https://arrow.apache.org/docs/java/ipc.html>`_ defines
two types of binary formats
+for serializing Arrow data: the streaming format and the file format (or
random access format). Such files can
+be directly memory-mapped when read.
+
+.. contents::
+
+Writing
+=======
+
+Both writing file and streaming formats use the same API.
+
+Writing Random Access Files
+***************************
+
+Write - Out to File
+-------------------
+
+.. testcode::
+
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.VarCharVector;
+ import org.apache.arrow.vector.IntVector;
+ import org.apache.arrow.vector.types.pojo.Field;
+ import org.apache.arrow.vector.types.pojo.FieldType;
+ import org.apache.arrow.vector.types.pojo.ArrowType;
+ import org.apache.arrow.vector.types.pojo.Schema;
+ import org.apache.arrow.vector.VectorSchemaRoot;
+ import static java.util.Arrays.asList;
+ import org.apache.arrow.vector.ipc.ArrowFileWriter;
+ import java.io.File;
+ import java.io.FileNotFoundException;
+ import java.io.FileOutputStream;
+ import java.io.IOException;
+
+ // Create and populate data:
+ Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson = new Schema(asList(name, age));
+ RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schemaPerson,
rootAllocator);
+ VarCharVector nameVector = (VarCharVector)
vectorSchemaRoot.getVector("name");
+ nameVector.allocateNew(3);
+ nameVector.set(0, "David".getBytes());
+ nameVector.set(1, "Gladis".getBytes());
+ nameVector.set(2, "Juan".getBytes());
+ IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
+ ageVector.allocateNew(3);
+ ageVector.set(0, 10);
+ ageVector.set(1, 20);
+ ageVector.set(2, 30);
+ vectorSchemaRoot.setRowCount(3);
+ File file = new File("randon_access_to_file.arrow");
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, null,
fileOutputStream.getChannel())
+ ){
+ writer.start();
+ for (int i=0; i<10; i++){
+ // Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ writer.writeBatch();
+ }
+ System.out.println("Record batches written: " +
writer.getRecordBlocks().size());
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+.. testoutput::
+
+ Record batches written: 10
+
+Write - Out to Buffer
+---------------------
+
+.. testcode::
+
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.VarCharVector;
+ import org.apache.arrow.vector.IntVector;
+ import org.apache.arrow.vector.types.pojo.Field;
+ import org.apache.arrow.vector.types.pojo.FieldType;
+ import org.apache.arrow.vector.types.pojo.ArrowType;
+ import org.apache.arrow.vector.types.pojo.Schema;
+ import org.apache.arrow.vector.VectorSchemaRoot;
+ import static java.util.Arrays.asList;
+ import org.apache.arrow.vector.ipc.ArrowFileWriter;
+
+ import java.io.ByteArrayOutputStream;
+ import java.io.FileNotFoundException;
+ import java.io.IOException;
+ import java.nio.channels.Channels;
+
+ // Create and populate data:
+ Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson = new Schema(asList(name, age));
+ RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schemaPerson,
rootAllocator);
+ VarCharVector nameVector = (VarCharVector)
vectorSchemaRoot.getVector("name");
+ nameVector.allocateNew(3);
+ nameVector.set(0, "David".getBytes());
+ nameVector.set(1, "Gladis".getBytes());
+ nameVector.set(2, "Juan".getBytes());
+ IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
+ ageVector.allocateNew(3);
+ ageVector.set(0, 10);
+ ageVector.set(1, 20);
+ ageVector.set(2, 30);
+ vectorSchemaRoot.setRowCount(3);
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, null,
/*WritableByteChannel out*/Channels.newChannel(out)))
+ {
+ writer.start();
+ for (int i=0; i<10; i++){
+ // Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ writer.writeBatch();
+ }
+ System.out.println(writer.getRecordBlocks().size());
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+.. testoutput::
+
+ 10
+
+Writing Streaming Format
+************************
+
+Write - Out to File
+-------------------
+
+.. testcode::
+
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.VarCharVector;
+ import org.apache.arrow.vector.IntVector;
+ import org.apache.arrow.vector.types.pojo.Field;
+ import org.apache.arrow.vector.types.pojo.FieldType;
+ import org.apache.arrow.vector.types.pojo.ArrowType;
+ import org.apache.arrow.vector.types.pojo.Schema;
+ import org.apache.arrow.vector.VectorSchemaRoot;
+ import static java.util.Arrays.asList;
+ import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+ import java.io.File;
+ import java.io.FileNotFoundException;
+ import java.io.FileOutputStream;
+ import java.io.IOException;
+
+ // Create and populate data:
+ Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson = new Schema(asList(name, age));
+ RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schemaPerson,
rootAllocator);
+ VarCharVector nameVector = (VarCharVector)
vectorSchemaRoot.getVector("name");
+ nameVector.allocateNew(3);
+ nameVector.set(0, "David".getBytes());
+ nameVector.set(1, "Gladis".getBytes());
+ nameVector.set(2, "Juan".getBytes());
+ IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
+ ageVector.allocateNew(3);
+ ageVector.set(0, 10);
+ ageVector.set(1, 20);
+ ageVector.set(2, 30);
+ vectorSchemaRoot.setRowCount(3);
+ File file = new File("streaming_to_file.arrow");
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ ArrowStreamWriter writer = new ArrowStreamWriter(vectorSchemaRoot,
null, /*WritableByteChannel out*/fileOutputStream.getChannel())
+ ){
+ writer.start();
+ for (int i=0; i<10; i++){
+ // Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ writer.writeBatch();
+ }
+ System.out.println(writer.bytesWritten());
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+.. testoutput::
+
+ 2928
+
+Write - Out to Buffer
+---------------------
+
+.. testcode::
+
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.VarCharVector;
+ import org.apache.arrow.vector.IntVector;
+ import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+ import org.apache.arrow.vector.types.pojo.Field;
+ import org.apache.arrow.vector.types.pojo.FieldType;
+ import org.apache.arrow.vector.types.pojo.ArrowType;
+ import org.apache.arrow.vector.types.pojo.Schema;
+ import org.apache.arrow.vector.VectorSchemaRoot;
+ import static java.util.Arrays.asList;
+
+ import java.io.ByteArrayOutputStream;
+ import java.io.FileNotFoundException;
+ import java.io.IOException;
+ import java.nio.channels.Channels;
+
+ // Create and populate data:
+ Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson = new Schema(asList(name, age));
+ RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schemaPerson,
rootAllocator);
+ VarCharVector nameVector = (VarCharVector)
vectorSchemaRoot.getVector("name");
+ nameVector.allocateNew(3);
+ nameVector.set(0, "David".getBytes());
+ nameVector.set(1, "Gladis".getBytes());
+ nameVector.set(2, "Juan".getBytes());
+ IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
+ ageVector.allocateNew(3);
+ ageVector.set(0, 10);
+ ageVector.set(1, 20);
+ ageVector.set(2, 30);
+ vectorSchemaRoot.setRowCount(3);
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowStreamWriter writer = new ArrowStreamWriter(vectorSchemaRoot,
null, /*WritableByteChannel out*/Channels.newChannel(out))
+ ){
+ writer.start();
+ for (int i=0; i<10; i++){
+ // Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ writer.writeBatch();
+ }
+ System.out.println(writer.bytesWritten());
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+.. testoutput::
+
+ 2928
+
+Reading
+=======
+
+Reading the random access format and streaming format both offer the same API,
+with the difference that random access files also offer access to any record
batch by index.
+
+Reading Random Access Files
+***************************
+
+Read - From File
+----------------
+
+.. testcode::
+
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.VarCharVector;
+ import org.apache.arrow.vector.IntVector;
+ import org.apache.arrow.vector.ipc.message.ArrowBlock;
+ import org.apache.arrow.vector.types.pojo.Field;
+ import org.apache.arrow.vector.types.pojo.FieldType;
+ import org.apache.arrow.vector.types.pojo.ArrowType;
+ import org.apache.arrow.vector.types.pojo.Schema;
+ import org.apache.arrow.vector.VectorSchemaRoot;
+ import static java.util.Arrays.asList;
+ import org.apache.arrow.vector.ipc.ArrowFileWriter;
+ import java.io.File;
+ import java.io.FileInputStream;
+ import java.io.FileNotFoundException;
+ import java.io.FileOutputStream;
+ import java.io.IOException;
+ import org.apache.arrow.vector.ipc.ArrowFileReader;
+
+ // Create and populate data
+ Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson = new Schema(asList(name, age));
+ RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schemaPerson,
rootAllocator);
+ VarCharVector nameVector = (VarCharVector)
vectorSchemaRoot.getVector("name");
+ nameVector.allocateNew(3);
+ nameVector.set(0, "David".getBytes());
+ nameVector.set(1, "Gladis".getBytes());
+ nameVector.set(2, "Juan".getBytes());
+ IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
+ ageVector.allocateNew(3);
+ ageVector.set(0, 10);
+ ageVector.set(1, 20);
+ ageVector.set(2, 30);
+ vectorSchemaRoot.setRowCount(3);
+ File file = new File("randon_access_to_file.arrow");
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
Review comment:
I'm suggesting this since right now the recipes are combining read and
write. I know that recipes should be self-contained, but the recipe is much
less helpful if the reader has to scan over it carefully to figure out which
part of the recipe is relevant and which part is just noise.
##########
File path: java/source/io.rst
##########
@@ -298,14 +303,56 @@ Read - From File
ageVector.set(1, 20);
ageVector.set(2, 30);
vectorSchemaRoot.setRowCount(3);
+ Field name2 = new Field("name2", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age2 = new Field("age2", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson2 = new Schema(asList(name2, age2));
+ VectorSchemaRoot vectorSchemaRoot2 =
VectorSchemaRoot.create(schemaPerson2, rootAllocator);
+ VarCharVector nameVector2 = (VarCharVector)
vectorSchemaRoot2.getVector("name2");
+ nameVector2.allocateNew(3);
+ nameVector2.set(0, "Nidia".getBytes());
+ nameVector2.set(1, "Alexa".getBytes());
+ nameVector2.set(2, "Mara".getBytes());
+ IntVector ageVector2 = (IntVector) vectorSchemaRoot2.getVector("age2");
+ ageVector2.allocateNew(3);
+ ageVector2.set(0, 15);
+ ageVector2.set(1, 20);
+ ageVector2.set(2, 15);
+ vectorSchemaRoot2.setRowCount(3);
+ Field name3 = new Field("name3", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age3 = new Field("age3", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson3 = new Schema(asList(name3, age3));
+ VectorSchemaRoot vectorSchemaRoot3 =
VectorSchemaRoot.create(schemaPerson3, rootAllocator);
+ VarCharVector nameVector3 = (VarCharVector)
vectorSchemaRoot3.getVector("name3");
+ nameVector3.allocateNew(3);
+ nameVector3.set(0, "Raul".getBytes());
+ nameVector3.set(1, "Jhon".getBytes());
+ nameVector3.set(2, "Thomy".getBytes());
+ IntVector ageVector3 = (IntVector) vectorSchemaRoot3.getVector("age3");
+ ageVector3.allocateNew(3);
+ ageVector3.set(0, 34);
+ ageVector3.set(1, 29);
+ ageVector3.set(2, 33);
+ vectorSchemaRoot3.setRowCount(3);
File file = new File("randon_access_to_file.arrow");
try (FileOutputStream fileOutputStream = new FileOutputStream(file);
ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, null,
/*WritableByteChannel out*/fileOutputStream.getChannel())
){
// write
writer.start();
- for (int i=0; i<10; i++){
+ for (int i=0; i<3; i++){
// Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ if (i==1){
+ VectorUnloader vectorUnloader2 = new
VectorUnloader(vectorSchemaRoot2);
+ ArrowRecordBatch arrowRecordBatch2 =
vectorUnloader2.getRecordBatch();
+ VectorLoader vectorLoader2 = new
VectorLoader(vectorSchemaRoot);
Review comment:
FWIW, why not clean things up and unload the batch when we first create
the other roots?
##########
File path: java/source/io.rst
##########
@@ -457,18 +504,19 @@ Read - From File
){
// write
writer.start();
- for (int i=0; i<10; i++){
+ for (int i=0; i<2; i++){
// Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
writer.writeBatch();
}
// read
try (FileInputStream fileInputStreamForStream = new
FileInputStream(file);
ArrowStreamReader reader = new
ArrowStreamReader(fileInputStreamForStream, rootAllocator)){
- // read the batch
- reader.loadNextBatch();
- VectorSchemaRoot vectorSchemaRootRecover =
reader.getVectorSchemaRoot();
- System.out.print(vectorSchemaRootRecover.contentToTSVString());
+ while(reader.loadNextBatch()){
+ // read the batch (on the next example you could see how to
use a VectorLoader to get fresh data)
Review comment:
I don't think this is relevant to this recipe? It really sounds like we
should have more recipes for loader/unloader.
##########
File path: java/source/io.rst
##########
@@ -298,14 +303,56 @@ Read - From File
ageVector.set(1, 20);
ageVector.set(2, 30);
vectorSchemaRoot.setRowCount(3);
+ Field name2 = new Field("name2", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age2 = new Field("age2", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson2 = new Schema(asList(name2, age2));
+ VectorSchemaRoot vectorSchemaRoot2 =
VectorSchemaRoot.create(schemaPerson2, rootAllocator);
+ VarCharVector nameVector2 = (VarCharVector)
vectorSchemaRoot2.getVector("name2");
+ nameVector2.allocateNew(3);
+ nameVector2.set(0, "Nidia".getBytes());
+ nameVector2.set(1, "Alexa".getBytes());
+ nameVector2.set(2, "Mara".getBytes());
+ IntVector ageVector2 = (IntVector) vectorSchemaRoot2.getVector("age2");
+ ageVector2.allocateNew(3);
+ ageVector2.set(0, 15);
+ ageVector2.set(1, 20);
+ ageVector2.set(2, 15);
+ vectorSchemaRoot2.setRowCount(3);
+ Field name3 = new Field("name3", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age3 = new Field("age3", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson3 = new Schema(asList(name3, age3));
+ VectorSchemaRoot vectorSchemaRoot3 =
VectorSchemaRoot.create(schemaPerson3, rootAllocator);
+ VarCharVector nameVector3 = (VarCharVector)
vectorSchemaRoot3.getVector("name3");
+ nameVector3.allocateNew(3);
+ nameVector3.set(0, "Raul".getBytes());
+ nameVector3.set(1, "Jhon".getBytes());
+ nameVector3.set(2, "Thomy".getBytes());
+ IntVector ageVector3 = (IntVector) vectorSchemaRoot3.getVector("age3");
+ ageVector3.allocateNew(3);
+ ageVector3.set(0, 34);
+ ageVector3.set(1, 29);
+ ageVector3.set(2, 33);
+ vectorSchemaRoot3.setRowCount(3);
File file = new File("randon_access_to_file.arrow");
try (FileOutputStream fileOutputStream = new FileOutputStream(file);
ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, null,
/*WritableByteChannel out*/fileOutputStream.getChannel())
){
// write
writer.start();
- for (int i=0; i<10; i++){
+ for (int i=0; i<3; i++){
// Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ if (i==1){
+ VectorUnloader vectorUnloader2 = new
VectorUnloader(vectorSchemaRoot2);
+ ArrowRecordBatch arrowRecordBatch2 =
vectorUnloader2.getRecordBatch();
+ VectorLoader vectorLoader2 = new
VectorLoader(vectorSchemaRoot);
Review comment:
Though, a suggestion: why not just `ageVector.set(0, i * 10);
ageVector.set(1, i * 10 + 1); ...` etc in the loop and demonstrate modifying
the root?
##########
File path: java/source/io.rst
##########
@@ -298,14 +303,56 @@ Read - From File
ageVector.set(1, 20);
ageVector.set(2, 30);
vectorSchemaRoot.setRowCount(3);
+ Field name2 = new Field("name2", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age2 = new Field("age2", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson2 = new Schema(asList(name2, age2));
+ VectorSchemaRoot vectorSchemaRoot2 =
VectorSchemaRoot.create(schemaPerson2, rootAllocator);
+ VarCharVector nameVector2 = (VarCharVector)
vectorSchemaRoot2.getVector("name2");
+ nameVector2.allocateNew(3);
+ nameVector2.set(0, "Nidia".getBytes());
+ nameVector2.set(1, "Alexa".getBytes());
+ nameVector2.set(2, "Mara".getBytes());
+ IntVector ageVector2 = (IntVector) vectorSchemaRoot2.getVector("age2");
+ ageVector2.allocateNew(3);
+ ageVector2.set(0, 15);
+ ageVector2.set(1, 20);
+ ageVector2.set(2, 15);
+ vectorSchemaRoot2.setRowCount(3);
+ Field name3 = new Field("name3", FieldType.nullable(new ArrowType.Utf8()),
null);
+ Field age3 = new Field("age3", FieldType.nullable(new ArrowType.Int(32,
true)), null);
+ Schema schemaPerson3 = new Schema(asList(name3, age3));
+ VectorSchemaRoot vectorSchemaRoot3 =
VectorSchemaRoot.create(schemaPerson3, rootAllocator);
+ VarCharVector nameVector3 = (VarCharVector)
vectorSchemaRoot3.getVector("name3");
+ nameVector3.allocateNew(3);
+ nameVector3.set(0, "Raul".getBytes());
+ nameVector3.set(1, "Jhon".getBytes());
+ nameVector3.set(2, "Thomy".getBytes());
+ IntVector ageVector3 = (IntVector) vectorSchemaRoot3.getVector("age3");
+ ageVector3.allocateNew(3);
+ ageVector3.set(0, 34);
+ ageVector3.set(1, 29);
+ ageVector3.set(2, 33);
+ vectorSchemaRoot3.setRowCount(3);
File file = new File("randon_access_to_file.arrow");
try (FileOutputStream fileOutputStream = new FileOutputStream(file);
ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, null,
/*WritableByteChannel out*/fileOutputStream.getChannel())
){
// write
writer.start();
- for (int i=0; i<10; i++){
+ for (int i=0; i<3; i++){
// Generate data or modify the root or use a VectorLoader to get
fresh data from somewhere else
+ if (i==1){
+ VectorUnloader vectorUnloader2 = new
VectorUnloader(vectorSchemaRoot2);
+ ArrowRecordBatch arrowRecordBatch2 =
vectorUnloader2.getRecordBatch();
+ VectorLoader vectorLoader2 = new
VectorLoader(vectorSchemaRoot);
Review comment:
We could have a separate recipe to demonstrate
VectorLoader/VectorUnloader.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]