[spark] branch master updated: [SPARK-45157][SQL] Avoid repeated `if` checks in `[On|Off|HeapColumnVector`

dongjoon Wed, 13 Sep 2023 08:36:24 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b568ba43f0d [SPARK-45157][SQL] Avoid repeated `if` checks in 
`[On|Off|HeapColumnVector`
b568ba43f0d is described below

commit b568ba43f0dd80130bca1bf86c48d0d359e57883
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Wed Sep 13 08:36:05 2023 -0700

    [SPARK-45157][SQL] Avoid repeated `if` checks in `[On|Off|HeapColumnVector`
    
    ### What changes were proposed in this pull request?
    
    This is a small followup of https://github.com/apache/spark/pull/42850. 
`getBytes` checks if the `dictionary` is null or not, then call `getByte` which 
also checks if the `dictionary` is null or not. This PR avoids the repeated if 
checks by copying one line code from `getByte` to `getBytes`. The same applies 
to other `getXXX` methods.
    
    ### Why are the changes needed?
    
    Make the perf-critical path more efficient.
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    existing tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #42903 from cloud-fan/vector.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../spark/sql/execution/vectorized/OffHeapColumnVector.java  | 12 ++++++------
 .../spark/sql/execution/vectorized/OnHeapColumnVector.java   | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
 
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 9cb1b1f0b5e..2bb0b02d4c9 100644
--- 
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ 
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -218,7 +218,7 @@ public final class OffHeapColumnVector extends 
WritableColumnVector {
       Platform.copyMemory(null, data + rowId, array, 
Platform.BYTE_ARRAY_OFFSET, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getByte(rowId + i);
+        array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId 
+ i));
       }
     }
     return array;
@@ -279,7 +279,7 @@ public final class OffHeapColumnVector extends 
WritableColumnVector {
       Platform.copyMemory(null, data + rowId * 2L, array, 
Platform.SHORT_ARRAY_OFFSET, count * 2L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getShort(rowId + i);
+        array[i] = (short) 
dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
       }
     }
     return array;
@@ -345,7 +345,7 @@ public final class OffHeapColumnVector extends 
WritableColumnVector {
       Platform.copyMemory(null, data + rowId * 4L, array, 
Platform.INT_ARRAY_OFFSET, count * 4L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getInt(rowId + i);
+        array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
       }
     }
     return array;
@@ -423,7 +423,7 @@ public final class OffHeapColumnVector extends 
WritableColumnVector {
       Platform.copyMemory(null, data + rowId * 8L, array, 
Platform.LONG_ARRAY_OFFSET, count * 8L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getLong(rowId + i);
+        array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
       }
     }
     return array;
@@ -487,7 +487,7 @@ public final class OffHeapColumnVector extends 
WritableColumnVector {
       Platform.copyMemory(null, data + rowId * 4L, array, 
Platform.FLOAT_ARRAY_OFFSET, count * 4L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getFloat(rowId + i);
+        array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + 
i));
       }
     }
     return array;
@@ -553,7 +553,7 @@ public final class OffHeapColumnVector extends 
WritableColumnVector {
         count * 8L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getDouble(rowId + i);
+        array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + 
i));
       }
     }
     return array;
diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
 
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index be590bb9ac7..2bf2b8d08fc 100644
--- 
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ 
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -216,7 +216,7 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
       System.arraycopy(byteData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getByte(rowId + i);
+        array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId 
+ i));
       }
     }
     return array;
@@ -276,7 +276,7 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
       System.arraycopy(shortData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getShort(rowId + i);
+        array[i] = (short) 
dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
       }
     }
     return array;
@@ -337,7 +337,7 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
       System.arraycopy(intData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getInt(rowId + i);
+        array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
       }
     }
     return array;
@@ -409,7 +409,7 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
       System.arraycopy(longData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getLong(rowId + i);
+        array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
       }
     }
     return array;
@@ -466,7 +466,7 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
       System.arraycopy(floatData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getFloat(rowId + i);
+        array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + 
i));
       }
     }
     return array;
@@ -525,7 +525,7 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
       System.arraycopy(doubleData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getDouble(rowId + i);
+        array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + 
i));
       }
     }
     return array;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-45157][SQL] Avoid repeated `if` checks in `[On|Off|HeapColumnVector`

Reply via email to