davisusanibar commented on a change in pull request #113:
URL: https://github.com/apache/arrow-cookbook/pull/113#discussion_r782129089



##########
File path: java/source/schema.rst
##########
@@ -0,0 +1,330 @@
+===================
+Working with schema
+===================
+
+Common definition of table has an schema. Java arrow is columnar oriented and 
it also has an schema representation. 
+Consider that each name on the schema maps to a columns for a predefined data 
type
+
+
+.. contents::
+
+We are going to use this util for creating arrow objects:
+
+.. code-block:: java
+
+   import org.apache.arrow.memory.RootAllocator;
+   import org.apache.arrow.vector.BitVectorHelper;
+   import org.apache.arrow.vector.IntVector;
+   import org.apache.arrow.vector.VarCharVector;
+   import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+   import org.apache.arrow.vector.complex.ListVector;
+   import org.apache.arrow.vector.types.Types;
+   import org.apache.arrow.vector.types.pojo.FieldType;
+
+   import java.util.List;
+
+
+   void setVector(IntVector vector, Integer... values) {
+       final int length = values.length;
+       vector.allocateNew(length);
+       for (int i = 0; i < length; i++) {
+           if (values[i] != null) {
+               vector.set(i, values[i]);
+           }
+       }
+       vector.setValueCount(length);
+   }
+
+   void setVector(VarCharVector vector, byte[]... values) {
+       final int length = values.length;
+       vector.allocateNewSafe();
+       for (int i = 0; i < length; i++) {
+           if (values[i] != null) {
+               vector.set(i, values[i]);
+           }
+       }
+       vector.setValueCount(length);
+   }
+
+   void setVector(ListVector vector, List<Integer>... values) {
+       vector.allocateNewSafe();
+       Types.MinorType type = Types.MinorType.INT;
+       vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+       IntVector dataVector = (IntVector) vector.getDataVector();
+       dataVector.allocateNew();
+
+       // set underlying vectors
+       int curPos = 0;
+       vector.getOffsetBuffer().setInt(0, curPos);
+       for (int i = 0; i < values.length; i++) {
+           if (values[i] == null) {
+               BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+           } else {
+               BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+               for (int value : values[i]) {
+                   dataVector.setSafe(curPos, value);
+                   curPos += 1;
+               }
+           }
+           vector.getOffsetBuffer().setInt((i + 1) * 
BaseRepeatedValueVector.OFFSET_WIDTH, curPos);
+       }
+       dataVector.setValueCount(curPos);
+       vector.setLastSet(values.length - 1);
+       vector.setValueCount(values.length);
+   }
+
+   RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE); // deal 
with byte buffer allocation
+
+Define data type
+================
+
+Definition of columnar fields for string (name), integer (age) and array 
(points):
+
+.. code-block:: java
+   :emphasize-lines: 6,8,12,15
+
+   import org.apache.arrow.vector.types.pojo.ArrowType;
+   import org.apache.arrow.vector.types.pojo.Field;
+   import org.apache.arrow.vector.types.pojo.FieldType;
+
+   // create a column data type
+   Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()), 
null);
+
+   Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+
+   FieldType intType = new FieldType(true, new ArrowType.Int(32, true), 
/*dictionary=*/null);
+   FieldType listType = new FieldType(true, new ArrowType.List(), 
/*dictionary=*/null);
+   Field childField = new Field("intCol", intType, null);
+   List<Field> childFields = new ArrayList<>();
+   childFields.add(childField);
+   Field points = new Field("points", listType, childFields);
+
+.. code-block:: java
+   :emphasize-lines: 1-5
+
+   jshell> name; age; points;
+
+   name ==> name: Utf8
+   age ==> age: Int(32, true)
+   points ==> points: List<intCol: Int(32, true)>
+
+Define metadata
+===============
+
+In case we need to add metadata to our definition we could use:
+
+.. code-block:: java
+   :emphasize-lines: 10
+
+   import org.apache.arrow.vector.types.pojo.ArrowType;
+   import org.apache.arrow.vector.types.pojo.Field;
+   import org.apache.arrow.vector.types.pojo.FieldType;
+
+   // create a column data type + metadata
+   Map<String, String> metadata = new HashMap<>();
+   metadata.put("A", "Id card");
+   metadata.put("B", "Passport");
+   metadata.put("C", "Visa");
+   Field document = new Field("document", new FieldType(true, new 
ArrowType.Utf8(), null, metadata), null);
+
+.. code-block:: java
+   :emphasize-lines: 1-3
+
+   jshell> document
+
+   document ==> document: Utf8
+
+Create the schema
+=================
+
+Tables detain multiple columns, each with its own name

Review comment:
       Changed

##########
File path: java/source/schema.rst
##########
@@ -0,0 +1,330 @@
+===================
+Working with schema
+===================
+
+Common definition of table has an schema. Java arrow is columnar oriented and 
it also has an schema representation. 
+Consider that each name on the schema maps to a columns for a predefined data 
type
+
+
+.. contents::
+
+We are going to use this util for creating arrow objects:
+
+.. code-block:: java
+
+   import org.apache.arrow.memory.RootAllocator;
+   import org.apache.arrow.vector.BitVectorHelper;
+   import org.apache.arrow.vector.IntVector;
+   import org.apache.arrow.vector.VarCharVector;
+   import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+   import org.apache.arrow.vector.complex.ListVector;
+   import org.apache.arrow.vector.types.Types;
+   import org.apache.arrow.vector.types.pojo.FieldType;
+
+   import java.util.List;
+
+
+   void setVector(IntVector vector, Integer... values) {
+       final int length = values.length;
+       vector.allocateNew(length);
+       for (int i = 0; i < length; i++) {
+           if (values[i] != null) {
+               vector.set(i, values[i]);
+           }
+       }
+       vector.setValueCount(length);
+   }
+
+   void setVector(VarCharVector vector, byte[]... values) {
+       final int length = values.length;
+       vector.allocateNewSafe();
+       for (int i = 0; i < length; i++) {
+           if (values[i] != null) {
+               vector.set(i, values[i]);
+           }
+       }
+       vector.setValueCount(length);
+   }
+
+   void setVector(ListVector vector, List<Integer>... values) {
+       vector.allocateNewSafe();
+       Types.MinorType type = Types.MinorType.INT;
+       vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+       IntVector dataVector = (IntVector) vector.getDataVector();
+       dataVector.allocateNew();
+
+       // set underlying vectors
+       int curPos = 0;
+       vector.getOffsetBuffer().setInt(0, curPos);
+       for (int i = 0; i < values.length; i++) {
+           if (values[i] == null) {
+               BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+           } else {
+               BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+               for (int value : values[i]) {
+                   dataVector.setSafe(curPos, value);
+                   curPos += 1;
+               }
+           }
+           vector.getOffsetBuffer().setInt((i + 1) * 
BaseRepeatedValueVector.OFFSET_WIDTH, curPos);
+       }
+       dataVector.setValueCount(curPos);
+       vector.setLastSet(values.length - 1);
+       vector.setValueCount(values.length);
+   }
+
+   RootAllocator rootAllocator = new RootAllocator(Long.MAX_VALUE); // deal 
with byte buffer allocation
+
+Define data type
+================
+
+Definition of columnar fields for string (name), integer (age) and array 
(points):
+
+.. code-block:: java
+   :emphasize-lines: 6,8,12,15
+
+   import org.apache.arrow.vector.types.pojo.ArrowType;
+   import org.apache.arrow.vector.types.pojo.Field;
+   import org.apache.arrow.vector.types.pojo.FieldType;
+
+   // create a column data type
+   Field name = new Field("name", FieldType.nullable(new ArrowType.Utf8()), 
null);
+
+   Field age = new Field("age", FieldType.nullable(new ArrowType.Int(32, 
true)), null);
+
+   FieldType intType = new FieldType(true, new ArrowType.Int(32, true), 
/*dictionary=*/null);
+   FieldType listType = new FieldType(true, new ArrowType.List(), 
/*dictionary=*/null);
+   Field childField = new Field("intCol", intType, null);
+   List<Field> childFields = new ArrayList<>();
+   childFields.add(childField);
+   Field points = new Field("points", listType, childFields);
+
+.. code-block:: java
+   :emphasize-lines: 1-5
+
+   jshell> name; age; points;
+
+   name ==> name: Utf8
+   age ==> age: Int(32, true)
+   points ==> points: List<intCol: Int(32, true)>
+
+Define metadata
+===============
+
+In case we need to add metadata to our definition we could use:
+
+.. code-block:: java
+   :emphasize-lines: 10
+
+   import org.apache.arrow.vector.types.pojo.ArrowType;
+   import org.apache.arrow.vector.types.pojo.Field;
+   import org.apache.arrow.vector.types.pojo.FieldType;
+
+   // create a column data type + metadata
+   Map<String, String> metadata = new HashMap<>();
+   metadata.put("A", "Id card");
+   metadata.put("B", "Passport");
+   metadata.put("C", "Visa");
+   Field document = new Field("document", new FieldType(true, new 
ArrowType.Utf8(), null, metadata), null);
+
+.. code-block:: java
+   :emphasize-lines: 1-3
+
+   jshell> document
+
+   document ==> document: Utf8
+
+Create the schema
+=================
+
+Tables detain multiple columns, each with its own name
+and type. The union of types and names is what defines a schema.

Review comment:
       Thanks, changed




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to