sc/source/core/opencl/formulagroupcl.cxx | 99 +++++++++++++++---------------- 1 file changed, 48 insertions(+), 51 deletions(-)
New commits: commit 8441dd5c9aa58e00fd1d70f68df2d624120007a9 Author: I-Jui (Ray) Sung <[email protected]> Date: Tue Nov 19 22:21:24 2013 -0600 GPU Calc: Fix COUNT() regression Change-Id: I1b313d7f5f144f1884abe60c816db1fd6a643489 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 0a33eb7..dbefbf8 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -24,8 +24,11 @@ #include "op_logical.hxx" #include "op_statistical.hxx" #include "op_array.hxx" +/// CONFIGURATIONS // Comment out this to turn off FMIN and FMAX intrinsics #define USE_FMIN_FMAX 1 +#define REDUCE_THRESHOLD 4 // set to 4 for correctness testing. priority 1 +#define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce) #include "formulagroupcl_public.hxx" #include <list> @@ -41,7 +44,6 @@ #include <boost/scoped_ptr.hpp> -#define UNROLLING using namespace formula; @@ -414,6 +416,7 @@ class OpSum; // Forward Declaration class OpAverage; // Forward Declaration class OpMin; // Forward Declaration class OpMax; // Forward Declaration +class OpCount; // Forward Declaration template<class Base> class DynamicKernelSlidingArgument: public Base { @@ -450,7 +453,7 @@ public: assert(mpDVR); size_t nCurWindowSize = mpDVR->GetRefRowSize(); // original for loop -#ifndef UNROLLING +#ifndef UNROLLING_FACTOR needBody = true; // No need to generate a for-loop for degenerated cases if (nCurWindowSize == 1) @@ -497,7 +500,7 @@ public: return nCurWindowSize; #endif -#ifdef UNROLLING +#ifdef UNROLLING_FACTOR { if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) { ss << "for (int i = "; @@ -515,7 +518,7 @@ return nCurWindowSize; ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t"; ss << "{int i;\n\t"; std::stringstream temp1,temp2; - int outLoopSize = 16; + int outLoopSize = UNROLLING_FACTOR; if ( nCurWindowSize/outLoopSize != 0){ ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t"; for(int count=0; count < outLoopSize; count++){ @@ -523,18 +526,10 @@ return nCurWindowSize; if(count==0){ temp1 << "if(i + gid0 < " <<mpDVR->GetArrayLength(); temp1 << "){\n\t\t"; - temp1 << "if (isNan("; - temp1 << GenSlidingWindowDeclRef(); - temp1 << ")){\n\t\t\t"; - temp1 << "tmp = "; - temp1 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t"; - temp1 << "}else{\n\t\t\t"; - temp1 << "tmp = "; temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp"); temp1 << ";\n\t\t\t"; temp1 << "nCount += 1;\n\t\t"; temp1 << "}\n\t"; - temp1 << "}\n\t"; } ss << temp1.str(); } @@ -546,18 +541,11 @@ return nCurWindowSize; if(count==nCurWindowSize/outLoopSize*outLoopSize){ temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength(); temp2 << "){\n\t\t"; - temp2 << "if (isNan("; - temp2 << GenSlidingWindowDeclRef(); - temp2 << ")){\n\t\t\t"; - temp2 << "tmp = "; - temp2 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t"; - temp2 << "}else{\n\t\t\t"; temp2 << "tmp = "; temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp"); temp2 << ";\n\t\t\t"; temp2 << "nCount += 1;\n\t\t"; temp2 << "}\n\t"; - temp2 << "}\n\t"; } ss << temp2.str(); } @@ -571,23 +559,16 @@ return nCurWindowSize; ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t"; ss << "{int i;\n\t"; std::stringstream temp1,temp2; - int outLoopSize = 16; + int outLoopSize = UNROLLING_FACTOR; if (nCurWindowSize/outLoopSize != 0){ ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t"; for(int count=0; count < outLoopSize; count++){ ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t"; if(count==0){ - temp1 << "if (isNan("; - temp1 << GenSlidingWindowDeclRef(); - temp1 << ")){\n\t\t\t"; - temp1 << "tmp = "; - temp1 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t"; - temp1 << "}else{\n\t\t\t"; temp1 << "tmp = "; temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp"); temp1 << ";\n\t\t\t"; temp1 << "nCount += 1;\n\t\t"; - temp1 << "}\n\t"; } ss << temp1.str(); } @@ -597,17 +578,10 @@ return nCurWindowSize; for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){ ss << "i = "<<count<<";\n\t"; if(count==nCurWindowSize/outLoopSize*outLoopSize){ - temp2 << "if (isNan("; - temp2 << GenSlidingWindowDeclRef(); - temp2 << ")){\n\t\t\t"; - temp2 << "tmp = "; - temp2 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t"; - temp2 << "}else{\n\t\t\t"; temp2 << "tmp = "; temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp"); temp2 << ";\n\t\t\t"; temp2 << "nCount += 1;\n\t\t"; - temp2 << "}\n\t"; } ss << temp2.str(); } @@ -694,12 +668,12 @@ public: ss << " for (int l=0; l<loop; l++){\n"; ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n"; ss << " int loopOffset = l*512;\n"; - ss << " if((loopOffset + lidx + offset + 256) < end)\n"; + ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; ss << " tmp = " << mpCodeGen->Gen2( "A[loopOffset + lidx + offset]", "tmp") <<";\n"; ss << " tmp = " << mpCodeGen->Gen2( "A[loopOffset + lidx + offset + 256]", "tmp") << ";\n"; - ss << " else if ((loopOffset + lidx + offset) < end)\n"; + ss << " } else if ((loopOffset + lidx + offset) < end)\n"; ss << " tmp = " << mpCodeGen->Gen2( "A[loopOffset + lidx + offset]", "tmp") <<";\n"; ss << " shm_buf[lidx] = tmp;\n"; @@ -707,15 +681,21 @@ public: ss << " for (int i = 128; i >0; i/=2) {\n"; ss << " if (lidx < i)\n"; ss << " shm_buf[lidx] = "; - ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]"); - ss << ";"; + // Special case count + if (dynamic_cast<OpCount*>(mpCodeGen.get())) + ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; + else + ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]")<<";\n"; ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " }\n"; - ss << " if (lidx == 0)\n"; - ss << " current_result ="; - ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); + ss << " if (lidx == 0)\n"; + ss << " current_result ="; + if (dynamic_cast<OpCount*>(mpCodeGen.get())) + ss << "shm_buf[0]"; + else + ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); ss << ";\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; + ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " }\n"; ss << " if (lidx == 0)\n"; ss << " result[writePos] = current_result;\n"; @@ -740,7 +720,11 @@ public: size_t nCurWindowSize = mpDVR->GetRefRowSize(); std::string temp = Base::GetName() + "[gid0]"; ss << "tmp = "; - ss << mpCodeGen->Gen2(temp, "tmp"); + // Special case count + if (dynamic_cast<OpCount*>(mpCodeGen.get())) + ss << temp << "+ tmp"; + else + ss << mpCodeGen->Gen2(temp, "tmp"); ss << ";\n\t"; needBody = false; return nCurWindowSize; @@ -1030,7 +1014,7 @@ public: ss << ") {\n"; ss << " double tmp = 0.0;\n"; ss << " int gid0 = get_global_id(0);\n"; -#ifndef UNROLLING +#ifndef UNROLLING_FACTOR ss << " int i ;\n"; ss << " for (i = 0; i < "<< nCurWindowSize <<"; i++)\n"; ss << " {\n"; @@ -1100,11 +1084,11 @@ public: ss << "}"; #endif -#ifdef UNROLLING +#ifdef UNROLLING_FACTOR ss << "\tint i;\n\t"; ss << "int currentCount0, currentCount1;\n\t"; std::stringstream temp3,temp4; - int outLoopSize = 16; + int outLoopSize = UNROLLING_FACTOR; if (nCurWindowSize/outLoopSize != 0){ ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t"; @@ -1554,6 +1538,11 @@ DynamicKernelArgument *VectorRefFactory(const std::string &s, { return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); } + // MUL is not supported yet + else if (dynamic_cast<OpMul*>(pCodeGen.get())) + { + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); + } // Sub is not a reduction per se else if (dynamic_cast<OpSub*>(pCodeGen.get())) { @@ -1569,14 +1558,11 @@ DynamicKernelArgument *VectorRefFactory(const std::string &s, dynamic_cast< const formula::DoubleVectorRefToken* >( ft->GetFormulaToken()); // Window being too small to justify a parallel reduction - if (pDVR->GetRefRowSize() < 4) + if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD) return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) || (!pDVR->IsStartFixed() && !pDVR->IsEndFixed())) return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index); - if ((pDVR->IsStartFixed() && !pDVR->IsEndFixed()) || - (!pDVR->IsStartFixed() && !pDVR->IsEndFixed())) - return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index); else // Other cases are not supported as well return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); } commit c0548c4f6363042432b2052362fd12d2a81532d8 Author: I-Jui (Ray) Sung <[email protected]> Date: Tue Nov 19 20:32:48 2013 -0600 GPU Calc: support reductions without uniform window sizes Change-Id: Iddd7a1bbc51f02b6b950c34afd9cbe95ec09bbf9 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index d283174..0a33eb7 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -675,23 +675,31 @@ public: ss << " int writePos = get_group_id(1);\n"; ss << " int lidx = get_local_id(0);\n"; ss << " __local double shm_buf[256];\n"; - if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) + if (mpDVR->IsStartFixed()) ss << " int offset = 0;\n"; - else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) + else // if (!mpDVR->IsStartFixed()) ss << " int offset = get_group_id(1);\n"; - else - throw Unhandled(); + if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) + ss << " int end = windowSize;\n"; + else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) + ss << " int end = offset + windowSize;\n"; + else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) + ss << " int end = windowSize + get_group_id(1);\n"; + else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) + ss << " int end = windowSize;\n"; + ss << " end = min(end, arrayLength);\n"; + ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " int loop = arrayLength/512 + 1;\n"; ss << " for (int l=0; l<loop; l++){\n"; ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n"; ss << " int loopOffset = l*512;\n"; - ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n"; + ss << " if((loopOffset + lidx + offset + 256) < end)\n"; ss << " tmp = " << mpCodeGen->Gen2( "A[loopOffset + lidx + offset]", "tmp") <<";\n"; ss << " tmp = " << mpCodeGen->Gen2( "A[loopOffset + lidx + offset + 256]", "tmp") << ";\n"; - ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n"; + ss << " else if ((loopOffset + lidx + offset) < end)\n"; ss << " tmp = " << mpCodeGen->Gen2( "A[loopOffset + lidx + offset]", "tmp") <<";\n"; ss << " shm_buf[lidx] = tmp;\n"; @@ -1561,11 +1569,14 @@ DynamicKernelArgument *VectorRefFactory(const std::string &s, dynamic_cast< const formula::DoubleVectorRefToken* >( ft->GetFormulaToken()); // Window being too small to justify a parallel reduction - if (pDVR->GetRefRowSize() < 100) + if (pDVR->GetRefRowSize() < 4) return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) || (!pDVR->IsStartFixed() && !pDVR->IsEndFixed())) return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index); + if ((pDVR->IsStartFixed() && !pDVR->IsEndFixed()) || + (!pDVR->IsStartFixed() && !pDVR->IsEndFixed())) + return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index); else // Other cases are not supported as well return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); } _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
