This is an automated email from the ASF dual-hosted git repository.
markd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 769f0e3 [SYSTEMDS-2888] Fix incomplete cbind support in codegen row
templates (CUDA)
769f0e3 is described below
commit 769f0e3db1646acdb7212bbe0c2275d69013a2c2
Author: Mark Dokter <[email protected]>
AuthorDate: Mon Apr 12 23:06:32 2021 +0200
[SYSTEMDS-2888] Fix incomplete cbind support in codegen row templates (CUDA)
This patch is the CUDA version of the original bugfix (commit
1ec292a932c6e732bbac835a81cdb59371002114)
---
src/main/cuda/headers/spoof_utils.cuh | 28 ++++++++++++------
.../sysds/hops/codegen/cplan/cuda/Binary.java | 33 ++++++++++------------
2 files changed, 35 insertions(+), 26 deletions(-)
diff --git a/src/main/cuda/headers/spoof_utils.cuh
b/src/main/cuda/headers/spoof_utils.cuh
index 271f0c2..9bcaef5 100644
--- a/src/main/cuda/headers/spoof_utils.cuh
+++ b/src/main/cuda/headers/spoof_utils.cuh
@@ -337,18 +337,30 @@ __device__ void vectDivAdd(T* a, T b, T* c, int ai, int
ci, int len) {
}
template<typename T>
+__device__ Vector<T>& vectCbindWrite(T* a, T* b, uint32_t ai, uint32_t bi,
uint32_t alen, uint32_t blen, TempStorage<T>* fop) {
+ Vector<T>& c = fop->getTempStorage(alen+blen);
+ auto i = threadIdx.x;
+ while(i < alen) {
+ c[i] = a[ai + i];
+ i+=gridDim.x;
+ }
+ while(i < blen) {
+ c[alen + i] = b[bi + i];
+ }
+ return c;
+}
+
+template<typename T>
__device__ Vector<T>& vectCbindWrite(T* a, T b, uint32_t ai, uint32_t len,
TempStorage<T>* fop) {
Vector<T>& c = fop->getTempStorage(len+1);
-
- if(threadIdx.x < len) {
-// if(blockIdx.x==1 && threadIdx.x ==0)
-// printf("vecCbindWrite: bid=%d, tid=%d, ai=%d, len=%d,
a[%d]=%f\n", blockIdx.x, threadIdx.x, ai, len, ai * len + threadIdx.x, a[ai *
len + threadIdx.x]);
- c[threadIdx.x] = a[ai + threadIdx.x];
+ auto i = threadIdx.x;
+ while(i < len) {
+ c[i] = a[ai + i];
+ i += gridDim.x;
}
- if(threadIdx.x == len) {
-// printf("---> block %d thread %d, b=%f,, len=%d,
a[%d]=%f\n",blockIdx.x, threadIdx.x, b, len, ai, a[ai]);
- c[threadIdx.x] = b;
+ if(i == len) {
+ c[i] = b;
}
return c;
}
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/cuda/Binary.java
b/src/main/java/org/apache/sysds/hops/codegen/cplan/cuda/Binary.java
index 7d9655f..6d826b1 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/cuda/Binary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/cuda/Binary.java
@@ -30,6 +30,19 @@ public class Binary extends CodeTemplate
public String getTemplate(CNodeBinary.BinType type, boolean sparseLhs,
boolean sparseRhs,
boolean scalarVector, boolean scalarInput, boolean vectorVector)
{
+ if(type == CNodeBinary.BinType.VECT_CBIND) {
+ if(scalarInput)
+ return "\t\tVector<T>& %TMP% =
vectCbindWrite(%IN1%, %IN2%, this);\n";
+ else if (!vectorVector)
+ return sparseLhs ?
+ "\t\tVector<T>& %TMP% =
vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, %LEN%, this);\n" :
+ "\t\tVector<T>& %TMP% =
vectCbindWrite(%IN1%, %IN2%, %POS1%, %LEN%, this);\n";
+ else //vect/vect
+ return sparseLhs ?
+ "\t\tVector<T>& %TMP% =
vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, alen, %LEN1%, %LEN2%,
this);\n" :
+ "\t\tVector<T>& %TMP% =
vectCbindWrite(%IN1%, %IN2%, %POS1%, %POS2%, %LEN1%, %LEN2%, this);\n";
+ }
+
if(isSinglePrecision()) {
switch(type) {
case DOT_PRODUCT:
@@ -84,15 +97,7 @@ public class Binary extends CodeTemplate
else
return sparseLhs ? " T[]
%TMP% = LibSpoofPrimitives.vect" + vectName + "Write(%IN1v%, %IN2%, %IN1i%,
%POS1%, alen, %LEN%);\n" : " T[] %TMP% = LibSpoofPrimitives.vect" + vectName
+ "Write(%IN1%, %IN2%, %POS1%, %LEN%);\n";
}
-
- case VECT_CBIND:
- if(scalarInput)
- return "
Vector<T>& %TMP% = vectCbindWrite(%IN1%, %IN2%, this);\n";
- else
-// return sparseLhs ? " T[]
%TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen,
%LEN%);\n" : " T[] %TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1%,
%IN2%, %POS1%, %LEN%);\n";
- return sparseLhs ? " T[]
%TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen,
%LEN%);\n" : " Vector<T>& %TMP% = vectCbindWrite(%IN1%, %IN2%,
%POS1%, %LEN%, this);\n";
-
- //vector-vector operations
+ //vector-vector operations
case VECT_MULT:
case VECT_DIV:
case VECT_MINUS:
@@ -222,16 +227,8 @@ public class Binary extends CodeTemplate
// return sparseLhs ? " T[]
%TMP% = LibSpoofPrimitives.vect" + vectName + "Write(%IN1v%, %IN2%, %IN1i%,
%POS1%, alen, %LEN%);\n" : " T[] %TMP% = LibSpoofPrimitives.vect" + vectName
+ "Write(%IN1%, %IN2%, %POS1%, %LEN%);\n";
return sparseLhs ? "
Vector<T>& %TMP% = vect" + vectName + "Write(%IN1v%, %IN2%, %IN1i%, %POS1%,
alen, %LEN%, this);\n" : " Vector<T>& %TMP% = vect" + vectName +
"Write(%IN1%, %IN2%, static_cast<uint32_t>(%POS1%), %LEN%, this);\n";
}
-
- case VECT_CBIND:
- if(scalarInput)
- return "
Vector<T>& %TMP% = vectCbindWrite(%IN1%, %IN2%, this);\n";
- else
-// return sparseLhs ? " T[]
%TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen,
%LEN%);\n" : " T[] %TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1%,
%IN2%, %POS1%, %LEN%);\n";
-// return sparseLhs ? " T[]
%TMP% = vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, %LEN%);\n" : "
T* %TMP% = vectCbindWrite(%IN1%, %IN2%, %POS1%, %LEN%);\n";
- return sparseLhs ? " T[]
%TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen,
%LEN%);\n" : " Vector<T>& %TMP% = vectCbindWrite(%IN1%, %IN2%,
%POS1%, %LEN%, this);\n";
- //vector-vector operations
+ //vector-vector operations
case VECT_MULT:
case VECT_DIV:
case VECT_MINUS: