Changeset: 7f091426cd97 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7f091426cd97
Modified Files:
monetdb5/extras/bwd/cl_program_utilities.c
Branch: bwd
Log Message:
* trying to make the kernels run on ati opencl (which doesn't support struct
flexible array members)
Unterschiede (180 Zeilen):
diff --git a/monetdb5/extras/bwd/cl_program_utilities.c
b/monetdb5/extras/bwd/cl_program_utilities.c
--- a/monetdb5/extras/bwd/cl_program_utilities.c
+++ b/monetdb5/extras/bwd/cl_program_utilities.c
@@ -82,27 +82,30 @@ cl_program getProjectionLeftjoinProgram(
" __constant static const size_t targetTypeBits =
(sizeof(accessType)*8);\n"
" __constant static const unsigned int
approximationMask = ((1<<approximationBits)-1);\n"
"__kernel void project (\n"
- "__global struct{int count; int base; unsigned int
values[];}* outputTail,\n"
- "__global struct{int count; int padding; unsigned int
positions[];}* inputTail,\n"
- "__global const struct{int count; int base; unsigned
int values[];}* approximationTail\n) {\n"
+ /* "__global struct{int count; int base; unsigned int
values[];}* outputTail,\n" */
+ /* "__global struct{int count; int padding; unsigned
int positions[];}* inputTail,\n" */
+ /* "__global const struct{int count; int base; unsigned
int values[];}* approximationTail\n) {\n" */
+ "__global unsigned int* outputTail,\n"
+ "__global unsigned int* inputTail,\n"
+ "__global const unsigned int* approximationTail\n) {\n"
" const size_t index = get_global_id(0);\n"
- " if(index < inputTail->count){\n"
- " const size_t inputIndex =
inputTail->positions[get_global_id(0)];"
+ " if(index < inputTail[0]){\n"
+ " const size_t inputIndex =
inputTail[2+get_global_id(0)];"
" size_t slot =
(inputIndex*approximationBits)/targetTypeBits;\n"
" size_t offset =
(inputIndex*approximationBits)%targetTypeBits;\n"
- " __global const unsigned int* vals =
approximationTail->values;\n"
+ " __global const unsigned int* vals =
&(approximationTail[2]);\n"
- " const unsigned int delta = (("
- "
(((offset+approximationBits)>targetTypeBits)?(((vals[slot]<<(approximationBits-targetTypeBits+offset))
+
(vals[slot+1]>>(targetTypeBits-(approximationBits-targetTypeBits+offset))))&approximationMask):0)\n"
- " +
(((offset+approximationBits)<=targetTypeBits)*(vals[slot]>>(targetTypeBits-offset-approximationBits)))"
- " )&approximationMask);\n"
+ " const unsigned int delta = (("
+ "
(((offset+approximationBits)>targetTypeBits)?(((vals[slot]<<(approximationBits-targetTypeBits+offset))
+
(vals[slot+1]>>(targetTypeBits-(approximationBits-targetTypeBits+offset))))&approximationMask):0)\n"
+ " +
(((offset+approximationBits)<=targetTypeBits)*(vals[slot]>>(targetTypeBits-offset-approximationBits)))"
+ " )&approximationMask);\n"
" size_t outslot =
(index*approximationBits)/targetTypeBits;\n"
" size_t outoffset =
(index*approximationBits)%targetTypeBits;\n"
" if(outoffset+approximationBits > targetTypeBits){"
- " atomic_add(&(outputTail->values[outslot]),
(delta >> (outoffset+approximationBits-targetTypeBits)));"
- " atomic_add(&(outputTail->values[outslot+1]),
(delta << (targetTypeBits-(outoffset+approximationBits-targetTypeBits))));"
+ " atomic_add(&(outputTail[2+outslot]), (delta >>
(outoffset+approximationBits-targetTypeBits)));"
+ " atomic_add(&(outputTail[2+outslot+1]), (delta <<
(targetTypeBits-(outoffset+approximationBits-targetTypeBits))));"
" }else{"
- " atomic_add(&(outputTail->values[outslot]),
(delta << (targetTypeBits-outoffset-approximationBits)));"
+ " atomic_add(&(outputTail[2+outslot]), (delta <<
(targetTypeBits-outoffset-approximationBits)));"
" }"
" }\n"
"}";
@@ -114,22 +117,32 @@ cl_program getProjectionLeftjoinProgram(
}
cl_program getUSelectProgram(int type, char* predicateOperation, char*
predicateOperation2, unsigned int approximationBits, unsigned int offsetBits,
char inputIsVoidHeaded){
+
const char* sourceCodeTemplates[] = {
[0] = "#define accessType unsigned int\n"
" __constant static const size_t targetTypeBits =
(sizeof(targetType)*8);\n"
" __constant static const unsigned int approximationMask =
((1<<approximationBits)-1);\n"
"__kernel void uselect (\n" // non-void-headed case
- "__global struct{int count; int padding; int positions[];}*
outputHead,\n"
- "__global struct{int count; int base; unsigned char values[];}*
outputTail,\n"
- "__global const struct{int count; int base; unsigned char
values[];}* approximationTail,\n"
- "__global const struct{int count; int padding; int
positions[];}* approximationHead,\n"
+ /* " struct HeadColumn{int count; int padding; int
positions[];};\n" */
+ /* " struct TailColumn{int count; int base; unsigned char
values[];} ;\n" */
+ /* "__global struct HeadColumn* outputHead,\n" */
+ /* "__global struct TailColumn* outputTail,\n" */
+ /* "__global const struct TailColumn* approximationTail,\n" */
+ /* "__global const struct HeadColumn* approximationHead\n" */
+ "__global unsigned int* outputHead,\n"
+ "__global unsigned int* outputTail,\n"
+ "__global const unsigned int* approximationTail,\n"
+ "__global const unsigned int* approximationHead,\n"
+
+
"const targetType operand,\n"
"const targetType operand2\n"
") {\n"
- " if(get_global_id(0) < approximationTail->count){\n"
- " __global const unsigned int* vals =
approximationTail->values;\n"
- " __global const unsigned char* approximation =
approximationTail->values;"
- " targetType value = approximationTail->base;\n"
+ " if(get_global_id(0) < approximationTail[0]){\n"
+ " __global const unsigned int* vals =
&(approximationTail[2]);\n"
+ /* " __global const unsigned char* approximation =
approximationTail->values;" */
+ " targetType value = ((__global targetType
*)approximationTail)[1];\n"
+
" size_t slot =
(get_global_id(0)*approximationBits)/targetTypeBits;\n"
" size_t offset =
(get_global_id(0)*approximationBits)%targetTypeBits;\n"
" const unsigned int delta = (("
@@ -140,33 +153,49 @@ cl_program getUSelectProgram(int type, c
" if((value firstOperator operand)"
" && (secondOperatorUnset || value secondOperator operand2)"
" ){\n"
- " const int index = atomic_inc(&(outputHead->count));\n"
- " atomic_inc(&(outputTail->count));\n" // TODO: this could
probably be done more efficiently
- " outputHead->positions[index] =
approximationHead->positions[get_global_id(0)];\n"
+ " const int index = atomic_inc(outputHead);\n"
+ " atomic_inc(outputTail);\n" // TODO: this could probably be
done more efficiently
+ " outputHead[2+index] =
approximationHead[2+get_global_id(0)];\n"
" size_t outslot =
(index*approximationBits)/targetTypeBits;\n"
" size_t outoffset =
(index*approximationBits)%targetTypeBits;\n"
- " if(outoffset+approximationBits > 8*sizeof(accessType))"
- " outputTail->values[outslot] |= (delta <<
(approximationBits-(8*sizeof(accessType)-outoffset)));"
- " else"
- " outputTail->values[outslot] |= (delta <<
(8*sizeof(accessType)-outoffset-approximationBits));"
- " }\n"
+ " if(outoffset+approximationBits > 8*sizeof(accessType)){"
+ /* " outputTail[2+outslot] |= (delta <<
(approximationBits-(8*sizeof(accessType)-outoffset)));" */
+ " atomic_add(&(outputTail[2+outslot]), (delta >>
(8*sizeof(accessType)+approximationBits-targetTypeBits)));"
+ " atomic_add(&(outputTail[2+outslot+1]), (delta <<
(8*sizeof(accessType)-(outoffset+approximationBits-targetTypeBits))));"
+ " }else{"
+ " atomic_add(&(outputTail[2+outslot]), (delta <<
(8*sizeof(accessType)-outoffset-approximationBits)));"
+ /* " outputTail[2+outslot] |= (delta <<
(8*sizeof(accessType)-outoffset-approximationBits));" */
+ " }"
+ " }\n"
" } \n"
" }\n",
+
[1] = "#define accessType unsigned int\n"
" __constant static const size_t targetTypeBits =
(sizeof(targetType)*8);\n"
" __constant static const unsigned int approximationMask =
((1<<approximationBits)-1);\n"
+ /* " struct HeadColumn{int count; int padding; int
positions[];};\n" */
+ /* " struct TailColumn{int count; int base; unsigned int
values[];};\n" */
+
"__kernel void uselect (\n" // void-headed case
- "__global struct{int count; int padding; int positions[];}*
outputHead,\n"
- "__global struct{int count; int base; accessType values[];}*
outputTail,\n"
- "__global const struct{int count; int base; unsigned char
values[];}* approximationTail,\n"
+
+ /* "__global struct HeadColumn* outputHead,\n" */
+ /* "__global struct TailColumn* outputTail,\n" */
+ /* "__global const struct TailColumn* approximationTail,\n" */
+ "__global unsigned int* outputHead,\n"
+ "__global unsigned int* outputTail,\n"
+ "__global const unsigned int* approximationTail,\n"
+
"const targetType operand,\n"
"const targetType operand2\n"
") {\n"
- " if(get_global_id(0) < approximationTail->count){\n"
- " targetType value = approximationTail->base;\n"
+ /* " if(get_global_id(0) < approximationTail->count){\n" */
+ /* " targetType value = approximationTail->base;\n" */
+ " if(get_global_id(0) < approximationTail[0]){\n" // count
+ " targetType value = ((__global
targetType*)approximationTail)[1];\n"//base
" size_t slot =
(get_global_id(0)*approximationBits)/targetTypeBits;\n"
" size_t offset =
(get_global_id(0)*approximationBits)%targetTypeBits;\n"
- " __global const unsigned int* vals =
approximationTail->values;\n"
+ /* " __global const unsigned int* vals =
approximationTail->values;\n" */
+ " __global const unsigned int* vals =
&(approximationTail[2]);\n"
" const unsigned int delta = (("
"
(((offset+approximationBits)>targetTypeBits)?(((vals[slot]<<(approximationBits-targetTypeBits+offset))
+
(vals[slot+1]>>(targetTypeBits-(approximationBits-targetTypeBits+offset))))&approximationMask):0)\n"
" +
(((offset+approximationBits)<=targetTypeBits)*(vals[slot]>>(targetTypeBits-offset-approximationBits)))"
@@ -175,15 +204,20 @@ cl_program getUSelectProgram(int type, c
" if((value firstOperator operand)"
" && (secondOperatorUnset || value secondOperator operand2)"
" ){\n"
- " const int index = atomic_inc(&(outputHead->count));\n"
- " atomic_inc(&(outputTail->count));\n" // TODO: this could
probably be done more efficiently
+ " const int index = atomic_inc(outputHead);\n"
+ " atomic_inc(outputTail);\n" // TODO: this could probably be
done more efficiently
" size_t outslot =
(index*approximationBits)/targetTypeBits;\n"
" size_t outoffset =
(index*approximationBits)%targetTypeBits;\n"
- " outputHead->positions[index] = get_global_id(0);\n"
- " if(outoffset+approximationBits > 8*sizeof(accessType))"
- " outputTail->values[outslot] |= (delta <<
(approximationBits-(8*sizeof(accessType)-outoffset)));"
- " else"
- " outputTail->values[outslot] |= (delta <<
(8*sizeof(accessType)-outoffset-approximationBits));"
+ " outputHead[2+index] = get_global_id(0);\n"
+ " if(outoffset+approximationBits > 8*sizeof(accessType)){"
+ " atomic_add(&(outputTail[2+outslot]), (delta >>
(8*sizeof(accessType)+approximationBits-targetTypeBits)));"
+ " atomic_add(&(outputTail[2+outslot+1]), (delta <<
(8*sizeof(accessType)-(outoffset+approximationBits-targetTypeBits))));"
+
+ /* " outputTail[2+outslot] |= (delta <<
(approximationBits-(8*sizeof(accessType)-outoffset)));" */
+ " }else"
+ " atomic_add(&(outputTail[2+outslot]), (delta <<
(8*sizeof(accessType)-outoffset-approximationBits)));"
+
+ /* " outputTail[2+outslot] |= (delta <<
(8*sizeof(accessType)-outoffset-approximationBits));" */
" }\n"
" }\n"
"}"
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list