Changeset: c8b821920c0a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c8b821920c0a
Modified Files:
monetdb5/extras/bwd/operations.c
Branch: bwd
Log Message:
* increased degree of paralellism on the gpu, turns out nvidia GPUs like more
parallelism
Unterschiede (23 Zeilen):
diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -26,7 +26,9 @@ static const int eagerBufferLoading = 1;
#ifdef __APPLE__
static const int WORK_GROUP_SIZE=1;
#else
-static const int WORK_GROUP_SIZE=16;
+static const int WORK_GROUP_SIZE=128;
+const int VALUES_PER_WORK_ITEM = 1;
+extern const int VALUES_PER_WORK_ITEM;
#endif
#define MAX_INTERMEDIATE_RESULT_SIZE 16777216
@@ -430,7 +432,7 @@ static inline str uselect(bat *res, bat
if((err =
clSetKernelArg(selectKernel, bufferI+i, sizeof(int), &(parameters[i])))) //
type specific
printf("#%s,
clSetKernelArg(%d): %s;\n", __func__, bufferI+i, clError(err));
- err =
clEnqueueNDRangeKernel(getCommandQueue(), selectKernel, 1, (const size_t[]){0},
(const size_t[]){ceil(dataCount/((float)WORK_GROUP_SIZE))*WORK_GROUP_SIZE},
(const size_t[]){WORK_GROUP_SIZE}, 0, NULL, NULL);
+ err =
clEnqueueNDRangeKernel(getCommandQueue(), selectKernel, 1, (const size_t[]){0},
(const
size_t[]){ceil(dataCount/((float)WORK_GROUP_SIZE*VALUES_PER_WORK_ITEM))*(WORK_GROUP_SIZE*VALUES_PER_WORK_ITEM)},
(const size_t[]){WORK_GROUP_SIZE}, 0, NULL, NULL);
if(err)
printf("#%s,
clEnqueueNDRangeKernel: %s;\n", __func__, clError(err));
if (synchronousGPU)
clFinish(getCommandQueue());
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list