This is an automated email from the ASF dual-hosted git repository.

amolina pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-cookbook.git


The following commit(s) were added to refs/heads/main by this push:
     new 4ab9880  [Java]: Java cookbook for create arrow schema (#134)
4ab9880 is described below

commit 4ab9880b8a6b33350cff2f1d0c733466d864b0f5
Author: david dali susanibar arce <[email protected]>
AuthorDate: Thu Jan 27 05:19:04 2022 -0500

    [Java]: Java cookbook for create arrow schema (#134)
    
    * Adding java cookbook for creating arrow schema
    
    * minor change to trigger ci
    
    Co-authored-by: Alessandro Molina <[email protected]>
---
 java/source/index.rst  |   1 +
 java/source/schema.rst | 215 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 216 insertions(+)

diff --git a/java/source/index.rst b/java/source/index.rst
index d6c1ccd..ccf21d5 100644
--- a/java/source/index.rst
+++ b/java/source/index.rst
@@ -11,6 +11,7 @@ Welcome to java arrow's documentation!
    :caption: Contents:
 
    create
+   schema
 
 Indices and tables
 ==================
diff --git a/java/source/schema.rst b/java/source/schema.rst
new file mode 100644
index 0000000..5cc5cd8
--- /dev/null
+++ b/java/source/schema.rst
@@ -0,0 +1,215 @@
+===================
+Working with Schema
+===================
+
+Common definition of table has an schema. Java arrow is columnar oriented and 
it also has an schema representation.
+Consider that each name on the schema maps to a columns for a predefined data 
type
+
+
+.. contents::
+
+Define Data Type
+================
+
+Definition of columnar fields for string (name), integer (age) and array 
(points):
+
+.. testcode::
+
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+
+    Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()), 
null);
+    System.out.print(name)
+
+.. testoutput::
+
+    name: Utf8
+
+.. testcode::
+
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+
+    Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+    System.out.print(age)
+
+.. testoutput::
+
+    age: Int(32, true)
+
+.. testcode::
+
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+
+    FieldType intType = new FieldType(true, new ArrowType.Int(32, true), null);
+    FieldType listType = new FieldType(true, new ArrowType.List(), null);
+    Field childField = new Field("intCol", intType, null);
+    List<Field> childFields = new ArrayList<>();
+    childFields.add(childField);
+    Field points = new Field("points", listType, childFields);
+
+    System.out.print(points)
+
+.. testoutput::
+
+    points: List<intCol: Int(32, true)>
+
+Define Metadata for Field
+=========================
+
+In case we need to add metadata to our definition we could use:
+
+.. testcode::
+
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+
+    // create a column data type + metadata
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("A", "Id card");
+    metadata.put("B", "Passport");
+    metadata.put("C", "Visa");
+    Field document = new Field("document", new FieldType(true, new 
ArrowType.Utf8(), null, metadata), null);
+
+    System.out.print(document.getMetadata())
+
+.. testoutput::
+
+    {A=Id card, B=Passport, C=Visa}
+
+Create the Schema
+=================
+
+A schema is a list of Fields, where each Field is defined by name and type.
+
+.. testcode::
+
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import static java.util.Arrays.asList;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+
+    Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()), 
null);
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("A", "Id card");
+    metadata.put("B", "Passport");
+    metadata.put("C", "Visa");
+    Field document = new Field("document", new FieldType(true, new 
ArrowType.Utf8(), null, metadata), null);
+    Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+    FieldType intType = new FieldType(true, new ArrowType.Int(32, true), 
/*dictionary=*/null);
+    FieldType listType = new FieldType(true, new ArrowType.List(), 
/*dictionary=*/null);
+    Field childField = new Field("intCol", intType, null);
+    List<Field> childFields = new ArrayList<>();
+    childFields.add(childField);
+    Field points = new Field("points", listType, childFields);
+
+    // create a definition
+    Schema schemaPerson = new Schema(asList(name, document, age, points));
+
+    System.out.print(schemaPerson)
+
+.. testoutput::
+
+    Schema<name: Utf8, document: Utf8, age: Int(32, true), points: 
List<intCol: Int(32, true)>>
+
+Populate Data
+=============
+
+.. testcode::
+
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.BitVectorHelper;
+    import org.apache.arrow.vector.IntVector;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+    import org.apache.arrow.vector.complex.ListVector;
+    import org.apache.arrow.vector.types.Types.MinorType;
+    import org.apache.arrow.vector.VectorSchemaRoot;
+    import org.apache.arrow.vector.types.pojo.Schema;
+    import org.apache.arrow.vector.types.pojo.ArrowType;
+    import org.apache.arrow.vector.types.pojo.Field;
+    import org.apache.arrow.vector.types.pojo.FieldType;
+
+    import java.util.ArrayList;
+    import java.util.HashMap;
+    import java.util.List;
+    import java.util.Map;
+
+    import static java.util.Arrays.asList;
+
+    Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()), 
null);
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("A", "Id card");
+    metadata.put("B", "Passport");
+    metadata.put("C", "Visa");
+    Field document = new Field("document", new FieldType(true, new 
ArrowType.Utf8(), null, metadata), null);
+    Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+    FieldType intType = new FieldType(true, new ArrowType.Int(32, true), null);
+    FieldType listType = new FieldType(true, new ArrowType.List(), null);
+    Field childField = new Field("intCol", intType, null);
+    List<Field> childFields = new ArrayList<>();
+    childFields.add(childField);
+    Field points = new Field("points", listType, childFields);
+
+    RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE);
+    Schema schema = new Schema(asList(name, document, age, points));
+    VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, 
rootAllocator);
+
+    VarCharVector nameVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+    nameVector.allocateNew(3);
+    nameVector.set(0, "David".getBytes());
+    nameVector.set(1, "Gladis".getBytes());
+    nameVector.set(2, "Juan".getBytes());
+    nameVector.setValueCount(3);
+    VarCharVector documentVector = (VarCharVector) 
vectorSchemaRoot.getVector("name");
+    documentVector.allocateNew(3);
+    documentVector.set(0, "A".getBytes());
+    documentVector.set(1, "B".getBytes());
+    documentVector.set(2, "C".getBytes());
+    documentVector.setValueCount(3);
+    IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
+    ageVector.allocateNew(3);
+    ageVector.set(0, 10);
+    ageVector.set(1, 20);
+    ageVector.set(2, 30);
+    ageVector.setValueCount(3);
+    ListVector listVector = (ListVector) vectorSchemaRoot.getVector("points");
+    listVector.allocateNew();
+    MinorType type = MinorType.INT;
+    listVector.addOrGetVector(FieldType.nullable(type.getType()));
+    IntVector dataVector = (IntVector) listVector.getDataVector();
+    dataVector.allocateNew();
+    listVector.getOffsetBuffer().setInt(0, 0);
+    BitVectorHelper.setBit(listVector.getValidityBuffer(), 0);
+    dataVector.set(0, 1);
+    dataVector.set(1, 2);
+    dataVector.set(2, 3);
+    listVector.getOffsetBuffer().setInt(1 * 
BaseRepeatedValueVector.OFFSET_WIDTH, 3);
+    BitVectorHelper.setBit(listVector.getValidityBuffer(), 1);
+    dataVector.set(3, 9);
+    dataVector.set(4, 8);
+    listVector.getOffsetBuffer().setInt(2 * 
BaseRepeatedValueVector.OFFSET_WIDTH, 5);
+    BitVectorHelper.setBit(listVector.getValidityBuffer(), 2);
+    dataVector.set(5, 10);
+    dataVector.set(6, 20);
+    dataVector.set(7, 30);
+    listVector.getOffsetBuffer().setInt(3 * 
BaseRepeatedValueVector.OFFSET_WIDTH, 8);
+    listVector.setLastSet(2);
+    listVector.setValueCount(3);
+
+    vectorSchemaRoot.setRowCount(3);
+
+    System.out.print(vectorSchemaRoot.contentToTSVString());
+
+.. testoutput::
+
+    name    document    age    points
+    A    null    10    [1,2,3]
+    B    null    20    [9,8]
+    C    null    30    [10,20,30]

Reply via email to