Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/10820#discussion_r151419068
--- Diff:
sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
---
@@ -166,12 +727,63 @@ public void reset() {
protected boolean anyNullsSet;
/**
- * Data type for this column.
+ * Default size of each array length value. This grows as necessary.
*/
- protected final DataType type;
+ protected static final int DEFAULT_ARRAY_LENGTH = 4;
+
+ /**
+ * Current write cursor (row index) when appending data.
+ */
+ protected int elementsAppended;
- protected ColumnVector(int capacity, DataType type) {
+ /**
+ * If this is a nested type (array or struct), the column for the child
data.
+ */
+ protected final ColumnVector[] childColumns;
+
+ /**
+ * Reusable Array holder for getArray().
+ */
+ protected final Array resultArray;
+
+ /**
+ * Reusable Struct holder for getStruct().
+ */
+ protected final Struct resultStruct;
+
+ /**
+ * Sets up the common state and also handles creating the child columns
if this is a nested
+ * type.
+ */
+ protected ColumnVector(int capacity, DataType type, MemoryMode memMode) {
this.capacity = capacity;
this.type = type;
+
+ if (type instanceof ArrayType || type instanceof BinaryType || type
instanceof StringType) {
+ DataType childType;
+ int childCapacity = capacity;
+ if (type instanceof ArrayType) {
+ childType = ((ArrayType)type).elementType();
+ } else {
+ childType = DataTypes.ByteType;
+ childCapacity *= DEFAULT_ARRAY_LENGTH;
--- End diff --
Why only grow the capacity for non-array type?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]