Changeset: 39c06064572b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=39c06064572b
Modified Files:
        monetdb5/extras/bwd/cl_program_utilities.c
        monetdb5/extras/bwd/operations.c
Branch: bwd
Log Message:

* here is a first implementation of a grouping in opencl, it is yet to be tested


Unterschiede (94 Zeilen):

diff --git a/monetdb5/extras/bwd/cl_program_utilities.c 
b/monetdb5/extras/bwd/cl_program_utilities.c
--- a/monetdb5/extras/bwd/cl_program_utilities.c
+++ b/monetdb5/extras/bwd/cl_program_utilities.c
@@ -237,5 +237,34 @@ cl_program getUSelectProgram(int type, c
 }
 
 cl_program getGroupProgram(unsigned int approximationBits){
-       return NULL;
+       char options[256];
+       const char* sourceCode = ""
+               "#define accessType unsigned int\n"
+               "  __constant static const unsigned int approximationMask = 
((1<<approximationBits)-1);\n"
+               "  __constant static const unsigned int targetTypeBits = 
sizeof(accessType)*8;\n"
+               "__kernel void group (\n"
+               "__global unsigned int* groupIDs,\n"
+               "__global unsigned int* histogramOIDs,\n"
+               "__global unsigned int* histogramCounts,\n"
+               "__global const unsigned int* inputTail\n"
+               ") {\n"
+               "    const size_t index = get_global_id(0);\n"
+               " if(index < inputTail[0]){\n"
+               "  const size_t inputIndex = get_global_id(0);"
+               "  size_t slot = 
(inputIndex*approximationBits)/targetTypeBits;\n"
+               "  size_t offset = 
(inputIndex*approximationBits)%targetTypeBits;\n"
+               "  __global const unsigned int* vals = &(inputTail[2]);\n"
+
+               "    const unsigned int delta = (("
+               "      
(((offset+approximationBits)>targetTypeBits)?(((vals[slot]<<(approximationBits-targetTypeBits+offset))
 + 
(vals[slot+1]>>(targetTypeBits-(approximationBits-targetTypeBits+offset))))&approximationMask):0)\n"
+               "    + 
(((offset+approximationBits)<=targetTypeBits)*(vals[slot]>>(targetTypeBits-offset-approximationBits)))"
+               "      )&approximationMask);\n"
+               "  const uint oldValue = atomic_cmpxchg(histogramOIDs + delta, 
0, inputIndex);\n"
+               "  const uint groupID = oldValue?oldValue:inputIndex;\n"
+               "  groupIDs[inputIndex] = groupID;\n"
+               "  atomic_inc(histogramCounts+groupID);\n"
+               " }\n"
+               "}";
+       snprintf(options, 256, "-DapproximationBits=%d", approximationBits);
+       return compileProgram(sourceCode, options);
 }
diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -1081,24 +1081,42 @@ str BWDGroupApproximate(int *rethisto, i
 
                if((err = clEnqueueReadBuffer(getCommandQueue(), valueColumn, 
CL_TRUE, 0, sizeof(int), &newTailDefinition.count , 0, NULL, NULL)))
                        printf("#%s, clEnqueueReadBuffer (b): %s;\n", __func__, 
clError(err));
-               
                { // create output objects
-                       BAT* bn = BATnew(b->htype, TYPE_oid, BATcount(b));
+                       BAT* groupIDs = BATnew(b->htype, TYPE_oid, BATcount(b));
                        BAT* histo = BATnew(TYPE_oid,TYPE_wrd, 0);
-                       const unsigned int newIndex = 
getNextFreeDecomposedBATSlotIndex();
-                       DecomposedBATSlot* slot = 
getDecomposedBATSlotForIndex(newIndex);
-                       BATsetprop(bn, batRegistryIndex, TYPE_int, 
(int[]){newIndex});
+                       { // groupID object
+                               const unsigned int newIndex = 
getNextFreeDecomposedBATSlotIndex();
+                               DecomposedBATSlot* groupIDSlot = 
getDecomposedBATSlotForIndex(newIndex);
+                               BATsetprop(groupIDs, batRegistryIndex, 
TYPE_int, (int[]){newIndex});
 
-                       slot->tailApproximation = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
calculatedBufferSize(newTailDefinition.count,slot->approximationBits)+sizeof(clTail),
 NULL, &err);
-                       
+                               groupIDSlot->tailPositions = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
sizeof(int)*newTailDefinition.count+sizeof(clHead), NULL, &err);
+                       }
+                       { // histogram object
+                               const unsigned int newIndex = 
getNextFreeDecomposedBATSlotIndex();
+                               DecomposedBATSlot* histogramSlot = 
getDecomposedBATSlotForIndex(newIndex);
+                               BATsetprop(histo, batRegistryIndex, TYPE_int, 
(int[]){newIndex});
+
+                               histogramSlot->tailPositions = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
sizeof(int)*newTailDefinition.count+sizeof(clHead), NULL, &err);
+                               histogramSlot->headApproximation = 
histogramSlot->tailPositions;
+                               histogramSlot->tailApproximation = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
sizeof(int)*newTailDefinition.count+sizeof(clTail), NULL, &err);
+                       }
                        { // run kernel
                                cl_kernel groupKernel = 
clCreateKernel(getGroupProgram(batTailApproximationBits(b)), "group", &err);
+                               int i;
+                               for (i = 0; i < 4; ++i) {
+                                       if((err = clSetKernelArg(groupKernel, 
i, sizeof(cl_mem), &((cl_mem[]){
+                                                                               
batTailPositions(groupIDs),
+                                                                               
        batHeadApproximation(histo),
+                                                                               
        batTailApproximation(histo),
+                                                                               
        batTailApproximation(b)}[i]
+                                                                       )))) 
printf("#%s, clSetKernelArg(%d): %s;\n", __func__, i, clError(err));
+                               }
                                if((err = 
clEnqueueNDRangeKernel(getCommandQueue(), groupKernel, 1, (const size_t[]){0}, 
(const 
size_t[]){ceil(newTailDefinition.count/((float)WORK_GROUP_SIZE))*WORK_GROUP_SIZE},
 (const size_t[]){WORK_GROUP_SIZE}, 0, NULL, NULL)))
                                        printf("#%s, clEnqueueNDRangeKernel: 
%s;\n", __func__, clError(err));
-
+                               if (synchronousGPU) clFinish(getCommandQueue());
                        }
                        BBPkeepref(*rethisto = histo->batCacheid);
-                       BBPkeepref(*retbid = bn->batCacheid);
+                       BBPkeepref(*retbid = groupIDs->batCacheid);
                }
        }
        BBPreleaseref(b->batCacheid);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to