davisusanibar commented on a change in pull request #136:
URL: https://github.com/apache/arrow-cookbook/pull/136#discussion_r809977242



##########
File path: java/source/io.rst
##########
@@ -0,0 +1,542 @@
+.. _arrow-io:
+
+========================
+Reading and writing data
+========================
+
+The `Arrow IPC format <https://arrow.apache.org/docs/java/ipc.html>`_ defines 
two types of binary formats
+for serializing Arrow data: the streaming format and the file format (or 
random access format). Such files can
+be directly memory-mapped when read.
+
+.. contents::
+
+Writing
+=======
+
+Both writing file and streaming formats use the same API.
+
+Writing Random Access Files
+***************************
+
+Write - Out to File
+-------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.ipc.ArrowFileWriter;
+
+    import java.io.File;
+    import java.io.FileOutputStream;
+    import java.io.IOException;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            nameVector.set(0, "David".getBytes());
+            nameVector.set(1, "Gladis".getBytes());
+            nameVector.set(2, "Juan".getBytes());
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            ageVector.set(0, 10);
+            ageVector.set(1, 20);
+            ageVector.set(2, 30);
+            vectorSchemaRoot.setRowCount(3);
+            File file = new File("randon_access_to_file.arrow");
+            try (FileOutputStream fileOutputStream = new 
FileOutputStream(file);
+                 ArrowFileWriter writer = new 
ArrowFileWriter(vectorSchemaRoot, null, fileOutputStream.getChannel())
+            ) {
+                writer.start();
+                for (int i = 0; i < 10; i++) {
+                    // All the data that is  going to write comes from the 
root and as we were not changing root data we are writing the same data 10x 
times
+                    writer.writeBatch();
+                }
+                writer.end();
+                System.out.println("Record batches written: " + 
writer.getRecordBlocks().size());
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+.. testoutput::
+
+    Record batches written: 10
+
+Write - Out to Buffer
+---------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.ipc.ArrowFileWriter;
+
+    import java.io.ByteArrayOutputStream;
+    import java.io.IOException;
+    import java.nio.channels.Channels;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            vectorSchemaRoot.setRowCount(3);
+            try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+                 ArrowFileWriter writer = new 
ArrowFileWriter(vectorSchemaRoot, null, Channels.newChannel(out)))
+            {
+                writer.start();
+                for (int i=0; i<10; i++){
+                    nameVector.set(0, ("New-"+(i+1)).getBytes());
+                    nameVector.set(1, ("New-"+(i+1)).getBytes());
+                    nameVector.set(2, ("New-"+(i+1)).getBytes());
+                    ageVector.set(0, (i+2)*10);
+                    ageVector.set(1, (i+2)*20);
+                    ageVector.set(2, (i+2)*30);
+                    writer.writeBatch();
+                }
+                System.out.println("Record batches written: " + 
writer.getRecordBlocks().size());
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+.. testoutput::
+
+    Record batches written: 10
+
+Writing Streaming Format
+************************
+
+Write - Out to File
+-------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+    import java.io.File;
+    import java.io.FileOutputStream;
+    import java.io.IOException;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        // Create and populate data:
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            nameVector.set(0, "David".getBytes());
+            nameVector.set(1, "Gladis".getBytes());
+            nameVector.set(2, "Juan".getBytes());
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            ageVector.set(0, 10);
+            ageVector.set(1, 20);
+            ageVector.set(2, 30);
+            vectorSchemaRoot.setRowCount(3);
+            File file = new File("streaming_to_file.arrow");
+            try (FileOutputStream fileOutputStream = new 
FileOutputStream(file);
+                 ArrowStreamWriter writer = new 
ArrowStreamWriter(vectorSchemaRoot, null, fileOutputStream.getChannel())
+            ){
+                writer.start();
+                for (int i=0; i<10; i++){

Review comment:
       Please confirm 
https://github.com/apache/arrow-cookbook/pull/136#discussion_r809969275 to I 
delete for lines of code

##########
File path: java/source/io.rst
##########
@@ -0,0 +1,542 @@
+.. _arrow-io:
+
+========================
+Reading and writing data
+========================
+
+The `Arrow IPC format <https://arrow.apache.org/docs/java/ipc.html>`_ defines 
two types of binary formats
+for serializing Arrow data: the streaming format and the file format (or 
random access format). Such files can
+be directly memory-mapped when read.
+
+.. contents::
+
+Writing
+=======
+
+Both writing file and streaming formats use the same API.
+
+Writing Random Access Files
+***************************
+
+Write - Out to File
+-------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.ipc.ArrowFileWriter;
+
+    import java.io.File;
+    import java.io.FileOutputStream;
+    import java.io.IOException;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            nameVector.set(0, "David".getBytes());
+            nameVector.set(1, "Gladis".getBytes());
+            nameVector.set(2, "Juan".getBytes());
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            ageVector.set(0, 10);
+            ageVector.set(1, 20);
+            ageVector.set(2, 30);
+            vectorSchemaRoot.setRowCount(3);
+            File file = new File("randon_access_to_file.arrow");
+            try (FileOutputStream fileOutputStream = new 
FileOutputStream(file);
+                 ArrowFileWriter writer = new 
ArrowFileWriter(vectorSchemaRoot, null, fileOutputStream.getChannel())
+            ) {
+                writer.start();
+                for (int i = 0; i < 10; i++) {
+                    // All the data that is  going to write comes from the 
root and as we were not changing root data we are writing the same data 10x 
times
+                    writer.writeBatch();
+                }
+                writer.end();
+                System.out.println("Record batches written: " + 
writer.getRecordBlocks().size());
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+.. testoutput::
+
+    Record batches written: 10
+
+Write - Out to Buffer
+---------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.ipc.ArrowFileWriter;
+
+    import java.io.ByteArrayOutputStream;
+    import java.io.IOException;
+    import java.nio.channels.Channels;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            vectorSchemaRoot.setRowCount(3);
+            try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+                 ArrowFileWriter writer = new 
ArrowFileWriter(vectorSchemaRoot, null, Channels.newChannel(out)))
+            {
+                writer.start();
+                for (int i=0; i<10; i++){
+                    nameVector.set(0, ("New-"+(i+1)).getBytes());
+                    nameVector.set(1, ("New-"+(i+1)).getBytes());
+                    nameVector.set(2, ("New-"+(i+1)).getBytes());
+                    ageVector.set(0, (i+2)*10);
+                    ageVector.set(1, (i+2)*20);
+                    ageVector.set(2, (i+2)*30);
+                    writer.writeBatch();
+                }
+                System.out.println("Record batches written: " + 
writer.getRecordBlocks().size());
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+.. testoutput::
+
+    Record batches written: 10
+
+Writing Streaming Format
+************************
+
+Write - Out to File
+-------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+    import java.io.File;
+    import java.io.FileOutputStream;
+    import java.io.IOException;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        // Create and populate data:
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            nameVector.set(0, "David".getBytes());
+            nameVector.set(1, "Gladis".getBytes());
+            nameVector.set(2, "Juan".getBytes());
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            ageVector.set(0, 10);
+            ageVector.set(1, 20);
+            ageVector.set(2, 30);
+            vectorSchemaRoot.setRowCount(3);
+            File file = new File("streaming_to_file.arrow");
+            try (FileOutputStream fileOutputStream = new 
FileOutputStream(file);
+                 ArrowStreamWriter writer = new 
ArrowStreamWriter(vectorSchemaRoot, null, fileOutputStream.getChannel())
+            ){
+                writer.start();
+                for (int i=0; i<10; i++){
+                    // All the data that is  going to write comes from the 
root and as we were not changing root data we are writing the same data 10x 
times
+                    writer.writeBatch();
+                }
+                System.out.println(writer.bytesWritten());
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+.. testoutput::
+
+    2928
+
+Write - Out to Buffer
+---------------------
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import static java.util.Arrays.asList;
+
+    import java.io.ByteArrayOutputStream;
+    import java.io.IOException;
+    import java.nio.channels.Channels;
+
+    try (RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE)) {
+        // Create and populate data:
+        Field name = new Field("name", FieldType.nullable(new 
ArrowType.Utf8()), null);
+        Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+        Schema schemaPerson = new Schema(asList(name, age));
+        try(VectorSchemaRoot vectorSchemaRoot = 
VectorSchemaRoot.create(schemaPerson, rootAllocator)){
+            VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+            nameVector.allocateNew(3);
+            IntVector ageVector = (IntVector) 
vectorSchemaRoot.getVector("age");
+            ageVector.allocateNew(3);
+            vectorSchemaRoot.setRowCount(3);
+            try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+                 ArrowStreamWriter writer = new 
ArrowStreamWriter(vectorSchemaRoot, null, Channels.newChannel(out))
+            ){
+                writer.start();
+                for (int i=0; i<10; i++){

Review comment:
       Please confirm 
https://github.com/apache/arrow-cookbook/pull/136#discussion_r809969275 to I 
delete for lines of code




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to