This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1a05b03aee [CORE] Refactor `ArrowColumnarArray` to reduce code
duplication(#11525)
1a05b03aee is described below
commit 1a05b03aee73335d41953b3472d79094dc844f69
Author: PHILO-HE <[email protected]>
AuthorDate: Mon Feb 2 23:01:42 2026 +0800
[CORE] Refactor `ArrowColumnarArray` to reduce code duplication(#11525)
---
.../vectorized/ArrowWritableColumnVector.java | 2 +
.../sql}/vectorized/ArrowColumnarMap.java | 3 +-
.../sql/vectorized/AbstractColumnarArray.java} | 26 ++-
.../execution/vectorized/ColumnarArrayShim.java | 234 -------------------
.../spark/sql}/vectorized/ArrowColumnarArray.java | 16 +-
.../execution/vectorized/ColumnarArrayShim.java | 234 -------------------
.../spark/sql}/vectorized/ArrowColumnarArray.java | 16 +-
.../execution/vectorized/ColumnarArrayShim.java | 234 -------------------
.../spark/sql}/vectorized/ArrowColumnarArray.java | 16 +-
.../execution/vectorized/ColumnarArrayShim.java | 241 --------------------
.../spark/sql}/vectorized/ArrowColumnarArray.java | 23 +-
.../execution/vectorized/ColumnarArrayShim.java | 253 ---------------------
.../spark/sql}/vectorized/ArrowColumnarArray.java | 35 +--
13 files changed, 55 insertions(+), 1278 deletions(-)
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
index 1c28e6d577..934e2c23bd 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
+++
b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
@@ -38,6 +38,8 @@ import
org.apache.spark.sql.execution.vectorized.WritableColumnVectorShim;
import org.apache.spark.sql.types.*;
import org.apache.spark.sql.utils.SparkArrowUtil;
import org.apache.spark.sql.utils.SparkSchemaUtil;
+import org.apache.spark.sql.vectorized.ArrowColumnarArray;
+import org.apache.spark.sql.vectorized.ArrowColumnarMap;
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.types.UTF8String;
import org.slf4j.Logger;
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarMap.java
b/gluten-arrow/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarMap.java
similarity index 94%
rename from
gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarMap.java
rename to
gluten-arrow/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarMap.java
index b6bfacb835..77f98897c3 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarMap.java
+++
b/gluten-arrow/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarMap.java
@@ -14,12 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.vectorized;
+package org.apache.spark.sql.vectorized;
import org.apache.spark.sql.catalyst.util.ArrayBasedMapData;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.MapData;
-import org.apache.spark.sql.vectorized.ColumnVector;
/** See [[ArrowColumnarArray]]. */
public class ArrowColumnarMap extends MapData {
diff --git
a/shims/spark33/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
b/shims/common/src/main/java/org/apache/spark/sql/vectorized/AbstractColumnarArray.java
similarity index 86%
rename from
shims/spark33/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
rename to
shims/common/src/main/java/org/apache/spark/sql/vectorized/AbstractColumnarArray.java
index 21594a155a..dd40a902f9 100644
---
a/shims/spark33/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
+++
b/shims/common/src/main/java/org/apache/spark/sql/vectorized/AbstractColumnarArray.java
@@ -14,28 +14,34 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.sql.execution.vectorized;
+package org.apache.spark.sql.vectorized;
import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.GenericArrayData;
import org.apache.spark.sql.types.*;
-import org.apache.spark.sql.vectorized.ColumnVector;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.sql.vectorized.ColumnarRow;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;
-public class ColumnarArrayShim extends ArrayData {
+/**
+ * Because `get` method in `ColumnarArray` don't check whether the data to get
is null and arrow
+ * vectors will throw exception when we try to access null value, so we define
the following class
+ * as a workaround. Its implementation is copied from Spark-4.0, except that
the `handleNull`
+ * parameter is set to true when we call `SpecializedGettersReader.read` in
`get`, which means that
+ * when trying to access a value of the array, we will check whether the value
to get is null first.
+ *
+ * <p>There are some differences between the supported Spark versions, which
are reflected in the
+ * implementations of respective child classes in the shim modules.
+ */
+abstract class AbstractColumnarArray extends ArrayData {
// The data for this array. This array contains elements from
// data[offset] to data[offset + length).
- private final ColumnVector data;
- private final int offset;
- private final int length;
+ protected final ColumnVector data;
+ protected final int offset;
+ protected final int length;
- public ColumnarArrayShim(ColumnVector data, int offset, int length) {
+ AbstractColumnarArray(ColumnVector data, int offset, int length) {
this.data = data;
this.offset = offset;
this.length = length;
diff --git
a/shims/spark32/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
b/shims/spark32/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
deleted file mode 100644
index 21594a155a..0000000000
---
a/shims/spark32/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.vectorized;
-
-import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
-import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
-import org.apache.spark.sql.catalyst.util.ArrayData;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
-import org.apache.spark.sql.types.*;
-import org.apache.spark.sql.vectorized.ColumnVector;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.sql.vectorized.ColumnarRow;
-import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.UTF8String;
-
-public class ColumnarArrayShim extends ArrayData {
- // The data for this array. This array contains elements from
- // data[offset] to data[offset + length).
- private final ColumnVector data;
- private final int offset;
- private final int length;
-
- public ColumnarArrayShim(ColumnVector data, int offset, int length) {
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- @Override
- public int numElements() {
- return length;
- }
-
- /**
- * Sets all the appropriate null bits in the input UnsafeArrayData.
- *
- * @param arrayData The UnsafeArrayData to set the null bits for
- * @return The UnsafeArrayData with the null bits set
- */
- private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
- if (data.hasNull()) {
- for (int i = 0; i < length; i++) {
- if (data.isNullAt(offset + i)) {
- arrayData.setNullAt(i);
- }
- }
- }
- return arrayData;
- }
-
- @Override
- public ArrayData copy() {
- DataType dt = data.dataType();
-
- if (dt instanceof BooleanType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
- } else if (dt instanceof ByteType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
- } else if (dt instanceof ShortType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
- } else if (dt instanceof IntegerType
- || dt instanceof DateType
- || dt instanceof YearMonthIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
- } else if (dt instanceof LongType
- || dt instanceof TimestampType
- || dt instanceof DayTimeIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
- } else if (dt instanceof FloatType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
- } else if (dt instanceof DoubleType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
- } else {
- return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the
elements are copied.
- }
- }
-
- @Override
- public boolean[] toBooleanArray() {
- return data.getBooleans(offset, length);
- }
-
- @Override
- public byte[] toByteArray() {
- return data.getBytes(offset, length);
- }
-
- @Override
- public short[] toShortArray() {
- return data.getShorts(offset, length);
- }
-
- @Override
- public int[] toIntArray() {
- return data.getInts(offset, length);
- }
-
- @Override
- public long[] toLongArray() {
- return data.getLongs(offset, length);
- }
-
- @Override
- public float[] toFloatArray() {
- return data.getFloats(offset, length);
- }
-
- @Override
- public double[] toDoubleArray() {
- return data.getDoubles(offset, length);
- }
-
- // TODO: this is extremely expensive.
- @Override
- public Object[] array() {
- DataType dt = data.dataType();
- Object[] list = new Object[length];
- try {
- for (int i = 0; i < length; i++) {
- if (!data.isNullAt(offset + i)) {
- list[i] = get(i, dt);
- }
- }
- return list;
- } catch (Exception e) {
- throw new RuntimeException("Could not get the array", e);
- }
- }
-
- @Override
- public boolean isNullAt(int ordinal) {
- return data.isNullAt(offset + ordinal);
- }
-
- @Override
- public boolean getBoolean(int ordinal) {
- return data.getBoolean(offset + ordinal);
- }
-
- @Override
- public byte getByte(int ordinal) {
- return data.getByte(offset + ordinal);
- }
-
- @Override
- public short getShort(int ordinal) {
- return data.getShort(offset + ordinal);
- }
-
- @Override
- public int getInt(int ordinal) {
- return data.getInt(offset + ordinal);
- }
-
- @Override
- public long getLong(int ordinal) {
- return data.getLong(offset + ordinal);
- }
-
- @Override
- public float getFloat(int ordinal) {
- return data.getFloat(offset + ordinal);
- }
-
- @Override
- public double getDouble(int ordinal) {
- return data.getDouble(offset + ordinal);
- }
-
- @Override
- public Decimal getDecimal(int ordinal, int precision, int scale) {
- return data.getDecimal(offset + ordinal, precision, scale);
- }
-
- @Override
- public UTF8String getUTF8String(int ordinal) {
- return data.getUTF8String(offset + ordinal);
- }
-
- @Override
- public byte[] getBinary(int ordinal) {
- return data.getBinary(offset + ordinal);
- }
-
- @Override
- public CalendarInterval getInterval(int ordinal) {
- return data.getInterval(offset + ordinal);
- }
-
- @Override
- public ColumnarRow getStruct(int ordinal, int numFields) {
- return data.getStruct(offset + ordinal);
- }
-
- @Override
- public ColumnarArray getArray(int ordinal) {
- return data.getArray(offset + ordinal);
- }
-
- @Override
- public ColumnarMap getMap(int ordinal) {
- return data.getMap(offset + ordinal);
- }
-
- @Override
- public Object get(int ordinal, DataType dataType) {
- return SpecializedGettersReader.read(this, ordinal, dataType, true, false);
- }
-
- @Override
- public void update(int ordinal, Object value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setNullAt(int ordinal) {
- throw new UnsupportedOperationException();
- }
-}
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
b/shims/spark33/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
similarity index 52%
copy from
gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
copy to
shims/spark33/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
index 3ea0444ee0..cb0c11e5e4 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
+++
b/shims/spark33/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
@@ -14,22 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.vectorized;
+package org.apache.spark.sql.vectorized;
-import org.apache.spark.sql.execution.vectorized.ColumnarArrayShim;
-import org.apache.spark.sql.vectorized.ColumnVector;
+public final class ArrowColumnarArray extends AbstractColumnarArray {
-/**
- * Because `get` method in `ColumnarArray` don't check whether the data to get
is null and arrow
- * vectors will throw exception when we try to access null value, so we define
the following class
- * as a workaround. Its implementation is copied from Spark-4.0, except that
the `handleNull`
- * parameter is set to true when we call `SpecializedGettersReader.read` in
`get`, which means that
- * when trying to access a value of the array, we will check whether the value
to get is null first.
- *
- * <p>The actual implementation is put in [[ColumnarArrayShim]] because
Variant data type is
- * introduced in Spark-4.0.
- */
-public class ArrowColumnarArray extends ColumnarArrayShim {
public ArrowColumnarArray(ColumnVector data, int offset, int length) {
super(data, offset, length);
}
diff --git
a/shims/spark34/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
b/shims/spark34/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
deleted file mode 100644
index 21594a155a..0000000000
---
a/shims/spark34/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.vectorized;
-
-import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
-import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
-import org.apache.spark.sql.catalyst.util.ArrayData;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
-import org.apache.spark.sql.types.*;
-import org.apache.spark.sql.vectorized.ColumnVector;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.sql.vectorized.ColumnarRow;
-import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.UTF8String;
-
-public class ColumnarArrayShim extends ArrayData {
- // The data for this array. This array contains elements from
- // data[offset] to data[offset + length).
- private final ColumnVector data;
- private final int offset;
- private final int length;
-
- public ColumnarArrayShim(ColumnVector data, int offset, int length) {
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- @Override
- public int numElements() {
- return length;
- }
-
- /**
- * Sets all the appropriate null bits in the input UnsafeArrayData.
- *
- * @param arrayData The UnsafeArrayData to set the null bits for
- * @return The UnsafeArrayData with the null bits set
- */
- private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
- if (data.hasNull()) {
- for (int i = 0; i < length; i++) {
- if (data.isNullAt(offset + i)) {
- arrayData.setNullAt(i);
- }
- }
- }
- return arrayData;
- }
-
- @Override
- public ArrayData copy() {
- DataType dt = data.dataType();
-
- if (dt instanceof BooleanType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
- } else if (dt instanceof ByteType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
- } else if (dt instanceof ShortType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
- } else if (dt instanceof IntegerType
- || dt instanceof DateType
- || dt instanceof YearMonthIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
- } else if (dt instanceof LongType
- || dt instanceof TimestampType
- || dt instanceof DayTimeIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
- } else if (dt instanceof FloatType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
- } else if (dt instanceof DoubleType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
- } else {
- return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the
elements are copied.
- }
- }
-
- @Override
- public boolean[] toBooleanArray() {
- return data.getBooleans(offset, length);
- }
-
- @Override
- public byte[] toByteArray() {
- return data.getBytes(offset, length);
- }
-
- @Override
- public short[] toShortArray() {
- return data.getShorts(offset, length);
- }
-
- @Override
- public int[] toIntArray() {
- return data.getInts(offset, length);
- }
-
- @Override
- public long[] toLongArray() {
- return data.getLongs(offset, length);
- }
-
- @Override
- public float[] toFloatArray() {
- return data.getFloats(offset, length);
- }
-
- @Override
- public double[] toDoubleArray() {
- return data.getDoubles(offset, length);
- }
-
- // TODO: this is extremely expensive.
- @Override
- public Object[] array() {
- DataType dt = data.dataType();
- Object[] list = new Object[length];
- try {
- for (int i = 0; i < length; i++) {
- if (!data.isNullAt(offset + i)) {
- list[i] = get(i, dt);
- }
- }
- return list;
- } catch (Exception e) {
- throw new RuntimeException("Could not get the array", e);
- }
- }
-
- @Override
- public boolean isNullAt(int ordinal) {
- return data.isNullAt(offset + ordinal);
- }
-
- @Override
- public boolean getBoolean(int ordinal) {
- return data.getBoolean(offset + ordinal);
- }
-
- @Override
- public byte getByte(int ordinal) {
- return data.getByte(offset + ordinal);
- }
-
- @Override
- public short getShort(int ordinal) {
- return data.getShort(offset + ordinal);
- }
-
- @Override
- public int getInt(int ordinal) {
- return data.getInt(offset + ordinal);
- }
-
- @Override
- public long getLong(int ordinal) {
- return data.getLong(offset + ordinal);
- }
-
- @Override
- public float getFloat(int ordinal) {
- return data.getFloat(offset + ordinal);
- }
-
- @Override
- public double getDouble(int ordinal) {
- return data.getDouble(offset + ordinal);
- }
-
- @Override
- public Decimal getDecimal(int ordinal, int precision, int scale) {
- return data.getDecimal(offset + ordinal, precision, scale);
- }
-
- @Override
- public UTF8String getUTF8String(int ordinal) {
- return data.getUTF8String(offset + ordinal);
- }
-
- @Override
- public byte[] getBinary(int ordinal) {
- return data.getBinary(offset + ordinal);
- }
-
- @Override
- public CalendarInterval getInterval(int ordinal) {
- return data.getInterval(offset + ordinal);
- }
-
- @Override
- public ColumnarRow getStruct(int ordinal, int numFields) {
- return data.getStruct(offset + ordinal);
- }
-
- @Override
- public ColumnarArray getArray(int ordinal) {
- return data.getArray(offset + ordinal);
- }
-
- @Override
- public ColumnarMap getMap(int ordinal) {
- return data.getMap(offset + ordinal);
- }
-
- @Override
- public Object get(int ordinal, DataType dataType) {
- return SpecializedGettersReader.read(this, ordinal, dataType, true, false);
- }
-
- @Override
- public void update(int ordinal, Object value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setNullAt(int ordinal) {
- throw new UnsupportedOperationException();
- }
-}
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
b/shims/spark34/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
similarity index 52%
copy from
gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
copy to
shims/spark34/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
index 3ea0444ee0..cb0c11e5e4 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
+++
b/shims/spark34/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
@@ -14,22 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.vectorized;
+package org.apache.spark.sql.vectorized;
-import org.apache.spark.sql.execution.vectorized.ColumnarArrayShim;
-import org.apache.spark.sql.vectorized.ColumnVector;
+public final class ArrowColumnarArray extends AbstractColumnarArray {
-/**
- * Because `get` method in `ColumnarArray` don't check whether the data to get
is null and arrow
- * vectors will throw exception when we try to access null value, so we define
the following class
- * as a workaround. Its implementation is copied from Spark-4.0, except that
the `handleNull`
- * parameter is set to true when we call `SpecializedGettersReader.read` in
`get`, which means that
- * when trying to access a value of the array, we will check whether the value
to get is null first.
- *
- * <p>The actual implementation is put in [[ColumnarArrayShim]] because
Variant data type is
- * introduced in Spark-4.0.
- */
-public class ArrowColumnarArray extends ColumnarArrayShim {
public ArrowColumnarArray(ColumnVector data, int offset, int length) {
super(data, offset, length);
}
diff --git
a/shims/spark35/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
b/shims/spark35/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
deleted file mode 100644
index 21594a155a..0000000000
---
a/shims/spark35/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.vectorized;
-
-import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
-import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
-import org.apache.spark.sql.catalyst.util.ArrayData;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
-import org.apache.spark.sql.types.*;
-import org.apache.spark.sql.vectorized.ColumnVector;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.sql.vectorized.ColumnarRow;
-import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.UTF8String;
-
-public class ColumnarArrayShim extends ArrayData {
- // The data for this array. This array contains elements from
- // data[offset] to data[offset + length).
- private final ColumnVector data;
- private final int offset;
- private final int length;
-
- public ColumnarArrayShim(ColumnVector data, int offset, int length) {
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- @Override
- public int numElements() {
- return length;
- }
-
- /**
- * Sets all the appropriate null bits in the input UnsafeArrayData.
- *
- * @param arrayData The UnsafeArrayData to set the null bits for
- * @return The UnsafeArrayData with the null bits set
- */
- private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
- if (data.hasNull()) {
- for (int i = 0; i < length; i++) {
- if (data.isNullAt(offset + i)) {
- arrayData.setNullAt(i);
- }
- }
- }
- return arrayData;
- }
-
- @Override
- public ArrayData copy() {
- DataType dt = data.dataType();
-
- if (dt instanceof BooleanType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
- } else if (dt instanceof ByteType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
- } else if (dt instanceof ShortType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
- } else if (dt instanceof IntegerType
- || dt instanceof DateType
- || dt instanceof YearMonthIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
- } else if (dt instanceof LongType
- || dt instanceof TimestampType
- || dt instanceof DayTimeIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
- } else if (dt instanceof FloatType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
- } else if (dt instanceof DoubleType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
- } else {
- return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the
elements are copied.
- }
- }
-
- @Override
- public boolean[] toBooleanArray() {
- return data.getBooleans(offset, length);
- }
-
- @Override
- public byte[] toByteArray() {
- return data.getBytes(offset, length);
- }
-
- @Override
- public short[] toShortArray() {
- return data.getShorts(offset, length);
- }
-
- @Override
- public int[] toIntArray() {
- return data.getInts(offset, length);
- }
-
- @Override
- public long[] toLongArray() {
- return data.getLongs(offset, length);
- }
-
- @Override
- public float[] toFloatArray() {
- return data.getFloats(offset, length);
- }
-
- @Override
- public double[] toDoubleArray() {
- return data.getDoubles(offset, length);
- }
-
- // TODO: this is extremely expensive.
- @Override
- public Object[] array() {
- DataType dt = data.dataType();
- Object[] list = new Object[length];
- try {
- for (int i = 0; i < length; i++) {
- if (!data.isNullAt(offset + i)) {
- list[i] = get(i, dt);
- }
- }
- return list;
- } catch (Exception e) {
- throw new RuntimeException("Could not get the array", e);
- }
- }
-
- @Override
- public boolean isNullAt(int ordinal) {
- return data.isNullAt(offset + ordinal);
- }
-
- @Override
- public boolean getBoolean(int ordinal) {
- return data.getBoolean(offset + ordinal);
- }
-
- @Override
- public byte getByte(int ordinal) {
- return data.getByte(offset + ordinal);
- }
-
- @Override
- public short getShort(int ordinal) {
- return data.getShort(offset + ordinal);
- }
-
- @Override
- public int getInt(int ordinal) {
- return data.getInt(offset + ordinal);
- }
-
- @Override
- public long getLong(int ordinal) {
- return data.getLong(offset + ordinal);
- }
-
- @Override
- public float getFloat(int ordinal) {
- return data.getFloat(offset + ordinal);
- }
-
- @Override
- public double getDouble(int ordinal) {
- return data.getDouble(offset + ordinal);
- }
-
- @Override
- public Decimal getDecimal(int ordinal, int precision, int scale) {
- return data.getDecimal(offset + ordinal, precision, scale);
- }
-
- @Override
- public UTF8String getUTF8String(int ordinal) {
- return data.getUTF8String(offset + ordinal);
- }
-
- @Override
- public byte[] getBinary(int ordinal) {
- return data.getBinary(offset + ordinal);
- }
-
- @Override
- public CalendarInterval getInterval(int ordinal) {
- return data.getInterval(offset + ordinal);
- }
-
- @Override
- public ColumnarRow getStruct(int ordinal, int numFields) {
- return data.getStruct(offset + ordinal);
- }
-
- @Override
- public ColumnarArray getArray(int ordinal) {
- return data.getArray(offset + ordinal);
- }
-
- @Override
- public ColumnarMap getMap(int ordinal) {
- return data.getMap(offset + ordinal);
- }
-
- @Override
- public Object get(int ordinal, DataType dataType) {
- return SpecializedGettersReader.read(this, ordinal, dataType, true, false);
- }
-
- @Override
- public void update(int ordinal, Object value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setNullAt(int ordinal) {
- throw new UnsupportedOperationException();
- }
-}
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
b/shims/spark35/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
similarity index 52%
copy from
gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
copy to
shims/spark35/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
index 3ea0444ee0..cb0c11e5e4 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
+++
b/shims/spark35/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
@@ -14,22 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.vectorized;
+package org.apache.spark.sql.vectorized;
-import org.apache.spark.sql.execution.vectorized.ColumnarArrayShim;
-import org.apache.spark.sql.vectorized.ColumnVector;
+public final class ArrowColumnarArray extends AbstractColumnarArray {
-/**
- * Because `get` method in `ColumnarArray` don't check whether the data to get
is null and arrow
- * vectors will throw exception when we try to access null value, so we define
the following class
- * as a workaround. Its implementation is copied from Spark-4.0, except that
the `handleNull`
- * parameter is set to true when we call `SpecializedGettersReader.read` in
`get`, which means that
- * when trying to access a value of the array, we will check whether the value
to get is null first.
- *
- * <p>The actual implementation is put in [[ColumnarArrayShim]] because
Variant data type is
- * introduced in Spark-4.0.
- */
-public class ArrowColumnarArray extends ColumnarArrayShim {
public ArrowColumnarArray(ColumnVector data, int offset, int length) {
super(data, offset, length);
}
diff --git
a/shims/spark40/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
b/shims/spark40/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
deleted file mode 100644
index 25adf5d233..0000000000
---
a/shims/spark40/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.vectorized;
-
-import org.apache.spark.SparkUnsupportedOperationException;
-import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
-import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
-import org.apache.spark.sql.catalyst.util.ArrayData;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
-import org.apache.spark.sql.types.*;
-import org.apache.spark.sql.vectorized.ColumnVector;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.sql.vectorized.ColumnarRow;
-import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.UTF8String;
-import org.apache.spark.unsafe.types.VariantVal;
-
-public class ColumnarArrayShim extends ArrayData {
- // The data for this array. This array contains elements from
- // data[offset] to data[offset + length).
- private final ColumnVector data;
- private final int offset;
- private final int length;
-
- public ColumnarArrayShim(ColumnVector data, int offset, int length) {
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- @Override
- public int numElements() {
- return length;
- }
-
- /**
- * Sets all the appropriate null bits in the input UnsafeArrayData.
- *
- * @param arrayData The UnsafeArrayData to set the null bits for
- * @return The UnsafeArrayData with the null bits set
- */
- private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
- if (data.hasNull()) {
- for (int i = 0; i < length; i++) {
- if (data.isNullAt(offset + i)) {
- arrayData.setNullAt(i);
- }
- }
- }
- return arrayData;
- }
-
- @Override
- public ArrayData copy() {
- DataType dt = data.dataType();
-
- if (dt instanceof BooleanType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
- } else if (dt instanceof ByteType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
- } else if (dt instanceof ShortType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
- } else if (dt instanceof IntegerType
- || dt instanceof DateType
- || dt instanceof YearMonthIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
- } else if (dt instanceof LongType
- || dt instanceof TimestampType
- || dt instanceof DayTimeIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
- } else if (dt instanceof FloatType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
- } else if (dt instanceof DoubleType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
- } else {
- return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the
elements are copied.
- }
- }
-
- @Override
- public boolean[] toBooleanArray() {
- return data.getBooleans(offset, length);
- }
-
- @Override
- public byte[] toByteArray() {
- return data.getBytes(offset, length);
- }
-
- @Override
- public short[] toShortArray() {
- return data.getShorts(offset, length);
- }
-
- @Override
- public int[] toIntArray() {
- return data.getInts(offset, length);
- }
-
- @Override
- public long[] toLongArray() {
- return data.getLongs(offset, length);
- }
-
- @Override
- public float[] toFloatArray() {
- return data.getFloats(offset, length);
- }
-
- @Override
- public double[] toDoubleArray() {
- return data.getDoubles(offset, length);
- }
-
- // TODO: this is extremely expensive.
- @Override
- public Object[] array() {
- DataType dt = data.dataType();
- Object[] list = new Object[length];
- try {
- for (int i = 0; i < length; i++) {
- if (!data.isNullAt(offset + i)) {
- list[i] = get(i, dt);
- }
- }
- return list;
- } catch (Exception e) {
- throw new RuntimeException("Could not get the array", e);
- }
- }
-
- @Override
- public boolean isNullAt(int ordinal) {
- return data.isNullAt(offset + ordinal);
- }
-
- @Override
- public boolean getBoolean(int ordinal) {
- return data.getBoolean(offset + ordinal);
- }
-
- @Override
- public byte getByte(int ordinal) {
- return data.getByte(offset + ordinal);
- }
-
- @Override
- public short getShort(int ordinal) {
- return data.getShort(offset + ordinal);
- }
-
- @Override
- public int getInt(int ordinal) {
- return data.getInt(offset + ordinal);
- }
-
- @Override
- public long getLong(int ordinal) {
- return data.getLong(offset + ordinal);
- }
-
- @Override
- public float getFloat(int ordinal) {
- return data.getFloat(offset + ordinal);
- }
-
- @Override
- public double getDouble(int ordinal) {
- return data.getDouble(offset + ordinal);
- }
-
- @Override
- public Decimal getDecimal(int ordinal, int precision, int scale) {
- return data.getDecimal(offset + ordinal, precision, scale);
- }
-
- @Override
- public UTF8String getUTF8String(int ordinal) {
- return data.getUTF8String(offset + ordinal);
- }
-
- @Override
- public byte[] getBinary(int ordinal) {
- return data.getBinary(offset + ordinal);
- }
-
- @Override
- public CalendarInterval getInterval(int ordinal) {
- return data.getInterval(offset + ordinal);
- }
-
- @Override
- public VariantVal getVariant(int ordinal) {
- return data.getVariant(offset + ordinal);
- }
-
- @Override
- public ColumnarRow getStruct(int ordinal, int numFields) {
- return data.getStruct(offset + ordinal);
- }
-
- @Override
- public ColumnarArray getArray(int ordinal) {
- return data.getArray(offset + ordinal);
- }
-
- @Override
- public ColumnarMap getMap(int ordinal) {
- return data.getMap(offset + ordinal);
- }
-
- @Override
- public Object get(int ordinal, DataType dataType) {
- return SpecializedGettersReader.read(this, ordinal, dataType, true, false);
- }
-
- @Override
- public void update(int ordinal, Object value) {
- throw SparkUnsupportedOperationException.apply();
- }
-
- @Override
- public void setNullAt(int ordinal) {
- throw SparkUnsupportedOperationException.apply();
- }
-}
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
b/shims/spark40/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
similarity index 52%
copy from
gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
copy to
shims/spark40/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
index 3ea0444ee0..0e000a8b40 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
+++
b/shims/spark40/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
@@ -14,23 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.vectorized;
+package org.apache.spark.sql.vectorized;
-import org.apache.spark.sql.execution.vectorized.ColumnarArrayShim;
-import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.unsafe.types.VariantVal;
+
+public final class ArrowColumnarArray extends AbstractColumnarArray {
-/**
- * Because `get` method in `ColumnarArray` don't check whether the data to get
is null and arrow
- * vectors will throw exception when we try to access null value, so we define
the following class
- * as a workaround. Its implementation is copied from Spark-4.0, except that
the `handleNull`
- * parameter is set to true when we call `SpecializedGettersReader.read` in
`get`, which means that
- * when trying to access a value of the array, we will check whether the value
to get is null first.
- *
- * <p>The actual implementation is put in [[ColumnarArrayShim]] because
Variant data type is
- * introduced in Spark-4.0.
- */
-public class ArrowColumnarArray extends ColumnarArrayShim {
public ArrowColumnarArray(ColumnVector data, int offset, int length) {
super(data, offset, length);
}
+
+ @Override
+ public VariantVal getVariant(int ordinal) {
+ return data.getVariant(offset + ordinal);
+ }
}
diff --git
a/shims/spark41/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
b/shims/spark41/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
deleted file mode 100644
index 7d1347345a..0000000000
---
a/shims/spark41/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArrayShim.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.vectorized;
-
-import org.apache.spark.SparkUnsupportedOperationException;
-import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
-import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
-import org.apache.spark.sql.catalyst.util.ArrayData;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
-import org.apache.spark.sql.types.*;
-import org.apache.spark.sql.vectorized.ColumnVector;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.sql.vectorized.ColumnarRow;
-import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
-import org.apache.spark.unsafe.types.UTF8String;
-import org.apache.spark.unsafe.types.VariantVal;
-
-public class ColumnarArrayShim extends ArrayData {
- // The data for this array. This array contains elements from
- // data[offset] to data[offset + length).
- private final ColumnVector data;
- private final int offset;
- private final int length;
-
- public ColumnarArrayShim(ColumnVector data, int offset, int length) {
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- @Override
- public int numElements() {
- return length;
- }
-
- /**
- * Sets all the appropriate null bits in the input UnsafeArrayData.
- *
- * @param arrayData The UnsafeArrayData to set the null bits for
- * @return The UnsafeArrayData with the null bits set
- */
- private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
- if (data.hasNull()) {
- for (int i = 0; i < length; i++) {
- if (data.isNullAt(offset + i)) {
- arrayData.setNullAt(i);
- }
- }
- }
- return arrayData;
- }
-
- @Override
- public ArrayData copy() {
- DataType dt = data.dataType();
-
- if (dt instanceof BooleanType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
- } else if (dt instanceof ByteType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
- } else if (dt instanceof ShortType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
- } else if (dt instanceof IntegerType
- || dt instanceof DateType
- || dt instanceof YearMonthIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
- } else if (dt instanceof LongType
- || dt instanceof TimestampType
- || dt instanceof DayTimeIntervalType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
- } else if (dt instanceof FloatType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
- } else if (dt instanceof DoubleType) {
- return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
- } else {
- return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the
elements are copied.
- }
- }
-
- @Override
- public boolean[] toBooleanArray() {
- return data.getBooleans(offset, length);
- }
-
- @Override
- public byte[] toByteArray() {
- return data.getBytes(offset, length);
- }
-
- @Override
- public short[] toShortArray() {
- return data.getShorts(offset, length);
- }
-
- @Override
- public int[] toIntArray() {
- return data.getInts(offset, length);
- }
-
- @Override
- public long[] toLongArray() {
- return data.getLongs(offset, length);
- }
-
- @Override
- public float[] toFloatArray() {
- return data.getFloats(offset, length);
- }
-
- @Override
- public double[] toDoubleArray() {
- return data.getDoubles(offset, length);
- }
-
- // TODO: this is extremely expensive.
- @Override
- public Object[] array() {
- DataType dt = data.dataType();
- Object[] list = new Object[length];
- try {
- for (int i = 0; i < length; i++) {
- if (!data.isNullAt(offset + i)) {
- list[i] = get(i, dt);
- }
- }
- return list;
- } catch (Exception e) {
- throw new RuntimeException("Could not get the array", e);
- }
- }
-
- @Override
- public boolean isNullAt(int ordinal) {
- return data.isNullAt(offset + ordinal);
- }
-
- @Override
- public boolean getBoolean(int ordinal) {
- return data.getBoolean(offset + ordinal);
- }
-
- @Override
- public byte getByte(int ordinal) {
- return data.getByte(offset + ordinal);
- }
-
- @Override
- public short getShort(int ordinal) {
- return data.getShort(offset + ordinal);
- }
-
- @Override
- public int getInt(int ordinal) {
- return data.getInt(offset + ordinal);
- }
-
- @Override
- public long getLong(int ordinal) {
- return data.getLong(offset + ordinal);
- }
-
- @Override
- public float getFloat(int ordinal) {
- return data.getFloat(offset + ordinal);
- }
-
- @Override
- public double getDouble(int ordinal) {
- return data.getDouble(offset + ordinal);
- }
-
- @Override
- public Decimal getDecimal(int ordinal, int precision, int scale) {
- return data.getDecimal(offset + ordinal, precision, scale);
- }
-
- @Override
- public UTF8String getUTF8String(int ordinal) {
- return data.getUTF8String(offset + ordinal);
- }
-
- @Override
- public byte[] getBinary(int ordinal) {
- return data.getBinary(offset + ordinal);
- }
-
- @Override
- public GeographyVal getGeography(int ordinal) {
- return data.getGeography(offset + ordinal);
- }
-
- @Override
- public GeometryVal getGeometry(int ordinal) {
- return data.getGeometry(offset + ordinal);
- }
-
- @Override
- public CalendarInterval getInterval(int ordinal) {
- return data.getInterval(offset + ordinal);
- }
-
- @Override
- public VariantVal getVariant(int ordinal) {
- return data.getVariant(offset + ordinal);
- }
-
- @Override
- public ColumnarRow getStruct(int ordinal, int numFields) {
- return data.getStruct(offset + ordinal);
- }
-
- @Override
- public ColumnarArray getArray(int ordinal) {
- return data.getArray(offset + ordinal);
- }
-
- @Override
- public ColumnarMap getMap(int ordinal) {
- return data.getMap(offset + ordinal);
- }
-
- @Override
- public Object get(int ordinal, DataType dataType) {
- return SpecializedGettersReader.read(this, ordinal, dataType, true, false);
- }
-
- @Override
- public void update(int ordinal, Object value) {
- throw SparkUnsupportedOperationException.apply();
- }
-
- @Override
- public void setNullAt(int ordinal) {
- throw SparkUnsupportedOperationException.apply();
- }
-}
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
b/shims/spark41/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
similarity index 52%
rename from
gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
rename to
shims/spark41/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
index 3ea0444ee0..bfcc561e39 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ArrowColumnarArray.java
+++
b/shims/spark41/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnarArray.java
@@ -14,23 +14,30 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.vectorized;
+package org.apache.spark.sql.vectorized;
-import org.apache.spark.sql.execution.vectorized.ColumnarArrayShim;
-import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.VariantVal;
+
+public final class ArrowColumnarArray extends AbstractColumnarArray {
-/**
- * Because `get` method in `ColumnarArray` don't check whether the data to get
is null and arrow
- * vectors will throw exception when we try to access null value, so we define
the following class
- * as a workaround. Its implementation is copied from Spark-4.0, except that
the `handleNull`
- * parameter is set to true when we call `SpecializedGettersReader.read` in
`get`, which means that
- * when trying to access a value of the array, we will check whether the value
to get is null first.
- *
- * <p>The actual implementation is put in [[ColumnarArrayShim]] because
Variant data type is
- * introduced in Spark-4.0.
- */
-public class ArrowColumnarArray extends ColumnarArrayShim {
public ArrowColumnarArray(ColumnVector data, int offset, int length) {
super(data, offset, length);
}
+
+ @Override
+ public VariantVal getVariant(int ordinal) {
+ return data.getVariant(offset + ordinal);
+ }
+
+ @Override
+ public GeographyVal getGeography(int ordinal) {
+ return data.getGeography(offset + ordinal);
+ }
+
+ @Override
+ public GeometryVal getGeometry(int ordinal) {
+ return data.getGeometry(offset + ordinal);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]