This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 861e8b4 [SPARK-38628][SQL] Complete the copy method in subclasses of
InternalRow, ArrayData, and MapData to safely copy their instances
861e8b4 is described below
commit 861e8b4a8ba784da1a69bd6522a0a7fdac5d1091
Author: Takuya UESHIN <[email protected]>
AuthorDate: Wed Mar 23 19:43:44 2022 +0900
[SPARK-38628][SQL] Complete the copy method in subclasses of InternalRow,
ArrayData, and MapData to safely copy their instances
### What changes were proposed in this pull request?
Completes the `copy` method in subclasses of `InternalRow`, `ArrayData`,
and `MapData` to safely copy their instances.
### Why are the changes needed?
Some subclasses of `InternalRow`, `ArrayData`, and `MapData` missing
support for `StructType`, `ArrayType`, and `MapType` in their copy method.
We should complete them to safely copy their instances and prevent
potential issues.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
Closes #35942 from ueshin/issues/SPARK-38628/copy.
Authored-by: Takuya UESHIN <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../main/java/org/apache/spark/sql/vectorized/ColumnarArray.java | 2 +-
.../main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java | 6 ++++++
.../src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java | 6 ++++++
.../apache/spark/sql/execution/vectorized/MutableColumnarRow.java | 6 ++++++
4 files changed, 19 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index 2fb6b3f..bd7c3d7 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -68,7 +68,7 @@ public final class ColumnarArray extends ArrayData {
} else if (dt instanceof DoubleType) {
return UnsafeArrayData.fromPrimitiveArray(toDoubleArray());
} else {
- return new GenericArrayData(toObjectArray(dt));
+ return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the
elements are copied.
}
}
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
index 8c32d5c..7f84126 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
@@ -71,6 +71,12 @@ public final class ColumnarBatchRow extends InternalRow {
row.setInt(i, getInt(i));
} else if (dt instanceof TimestampType) {
row.setLong(i, getLong(i));
+ } else if (dt instanceof StructType) {
+ row.update(i, getStruct(i, ((StructType)
dt).fields().length).copy());
+ } else if (dt instanceof ArrayType) {
+ row.update(i, getArray(i).copy());
+ } else if (dt instanceof MapType) {
+ row.update(i, getMap(i).copy());
} else {
throw new RuntimeException("Not implemented. " + dt);
}
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
index da4b242..fd4e8ff 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
@@ -80,6 +80,12 @@ public final class ColumnarRow extends InternalRow {
row.setInt(i, getInt(i));
} else if (dt instanceof TimestampType) {
row.setLong(i, getLong(i));
+ } else if (dt instanceof StructType) {
+ row.update(i, getStruct(i, ((StructType)
dt).fields().length).copy());
+ } else if (dt instanceof ArrayType) {
+ row.update(i, getArray(i).copy());
+ } else if (dt instanceof MapType) {
+ row.update(i, getMap(i).copy());
} else {
throw new RuntimeException("Not implemented. " + dt);
}
diff --git
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
index f4fdf50..64568f1 100644
---
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
+++
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
@@ -80,6 +80,12 @@ public final class MutableColumnarRow extends InternalRow {
row.setInt(i, getInt(i));
} else if (dt instanceof TimestampType) {
row.setLong(i, getLong(i));
+ } else if (dt instanceof StructType) {
+ row.update(i, getStruct(i, ((StructType)
dt).fields().length).copy());
+ } else if (dt instanceof ArrayType) {
+ row.update(i, getArray(i).copy());
+ } else if (dt instanceof MapType) {
+ row.update(i, getMap(i).copy());
} else {
throw new RuntimeException("Not implemented. " + dt);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]