This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b568ba43f0d [SPARK-45157][SQL] Avoid repeated `if` checks in `[On|Off|HeapColumnVector` b568ba43f0d is described below commit b568ba43f0dd80130bca1bf86c48d0d359e57883 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Wed Sep 13 08:36:05 2023 -0700 [SPARK-45157][SQL] Avoid repeated `if` checks in `[On|Off|HeapColumnVector` ### What changes were proposed in this pull request? This is a small followup of https://github.com/apache/spark/pull/42850. `getBytes` checks if the `dictionary` is null or not, then call `getByte` which also checks if the `dictionary` is null or not. This PR avoids the repeated if checks by copying one line code from `getByte` to `getBytes`. The same applies to other `getXXX` methods. ### Why are the changes needed? Make the perf-critical path more efficient. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #42903 from cloud-fan/vector. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../spark/sql/execution/vectorized/OffHeapColumnVector.java | 12 ++++++------ .../spark/sql/execution/vectorized/OnHeapColumnVector.java | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index 9cb1b1f0b5e..2bb0b02d4c9 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -218,7 +218,7 @@ public final class OffHeapColumnVector extends WritableColumnVector { Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count); } else { for (int i = 0; i < count; i++) { - array[i] = getByte(rowId + i); + array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -279,7 +279,7 @@ public final class OffHeapColumnVector extends WritableColumnVector { Platform.copyMemory(null, data + rowId * 2L, array, Platform.SHORT_ARRAY_OFFSET, count * 2L); } else { for (int i = 0; i < count; i++) { - array[i] = getShort(rowId + i); + array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -345,7 +345,7 @@ public final class OffHeapColumnVector extends WritableColumnVector { Platform.copyMemory(null, data + rowId * 4L, array, Platform.INT_ARRAY_OFFSET, count * 4L); } else { for (int i = 0; i < count; i++) { - array[i] = getInt(rowId + i); + array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -423,7 +423,7 @@ public final class OffHeapColumnVector extends WritableColumnVector { Platform.copyMemory(null, data + rowId * 8L, array, Platform.LONG_ARRAY_OFFSET, count * 8L); } else { for (int i = 0; i < count; i++) { - array[i] = getLong(rowId + i); + array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -487,7 +487,7 @@ public final class OffHeapColumnVector extends WritableColumnVector { Platform.copyMemory(null, data + rowId * 4L, array, Platform.FLOAT_ARRAY_OFFSET, count * 4L); } else { for (int i = 0; i < count; i++) { - array[i] = getFloat(rowId + i); + array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -553,7 +553,7 @@ public final class OffHeapColumnVector extends WritableColumnVector { count * 8L); } else { for (int i = 0; i < count; i++) { - array[i] = getDouble(rowId + i); + array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i)); } } return array; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index be590bb9ac7..2bf2b8d08fc 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -216,7 +216,7 @@ public final class OnHeapColumnVector extends WritableColumnVector { System.arraycopy(byteData, rowId, array, 0, count); } else { for (int i = 0; i < count; i++) { - array[i] = getByte(rowId + i); + array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -276,7 +276,7 @@ public final class OnHeapColumnVector extends WritableColumnVector { System.arraycopy(shortData, rowId, array, 0, count); } else { for (int i = 0; i < count; i++) { - array[i] = getShort(rowId + i); + array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -337,7 +337,7 @@ public final class OnHeapColumnVector extends WritableColumnVector { System.arraycopy(intData, rowId, array, 0, count); } else { for (int i = 0; i < count; i++) { - array[i] = getInt(rowId + i); + array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -409,7 +409,7 @@ public final class OnHeapColumnVector extends WritableColumnVector { System.arraycopy(longData, rowId, array, 0, count); } else { for (int i = 0; i < count; i++) { - array[i] = getLong(rowId + i); + array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -466,7 +466,7 @@ public final class OnHeapColumnVector extends WritableColumnVector { System.arraycopy(floatData, rowId, array, 0, count); } else { for (int i = 0; i < count; i++) { - array[i] = getFloat(rowId + i); + array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i)); } } return array; @@ -525,7 +525,7 @@ public final class OnHeapColumnVector extends WritableColumnVector { System.arraycopy(doubleData, rowId, array, 0, count); } else { for (int i = 0; i < count; i++) { - array[i] = getDouble(rowId + i); + array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i)); } } return array; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org