Changeset: 43c22dac96df for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=43c22dac96df
Modified Files:
monetdb5/extras/bwd/bwd.c
monetdb5/extras/bwd/utilities.c
monetdb5/extras/bwd/utilities.h
Branch: bwd
Log Message:
* started working on q1 which needs handling of varchar(1) attributes, since
these are dictionary compressed with 1-byte offsets into the dictionary, I
implemented a function to decompress and decompose them
Unterschiede (238 Zeilen):
diff --git a/monetdb5/extras/bwd/bwd.c b/monetdb5/extras/bwd/bwd.c
--- a/monetdb5/extras/bwd/bwd.c
+++ b/monetdb5/extras/bwd/bwd.c
@@ -110,14 +110,16 @@ str bwdecompose(bat * res, bat * subject
if (VIEWtparent(subject))
subject = BATdescriptor(abs(VIEWtparent(subject)));
result = BATnew(TYPE_void, TYPE_str, 0);
- if(Tsize(subject) != 4){
- snprintf(buffer, 4096, "bat %d cannot be decomposed because it
is not a 32-bit type", subject->batCacheid);
- } else if(! batTailIsDecomposed(subject)){
- printf ("decomposing %s into %d and %d bits\n",
resolveBatToAttribute(subject->batCacheid, cntxt), approximationBits[0],
Tsize(subject)*8-*approximationBits);
+ if(batTailIsDecomposed(subject)) {
+ snprintf(buffer, 4096, "bat %d already decomposed",
subject->batCacheid);
+ } else if (Tsize(subject) == 4){
BATsetprop(subject, batRegistryIndex, TYPE_int,
(int[]){decomposeIntArray((int*)Tloc(subject, BUNfirst(subject)),
subject->batCount, *approximationBits)});
- snprintf(buffer, 4096, "successfully decomposed bat %d",
subject->batCacheid);
+ snprintf(buffer, 4096, "successfully decomposed integer bat
%d", subject->batCacheid);
+ } else if(Tsize(subject) == 1){
+ BATsetprop(subject, batRegistryIndex, TYPE_int,
(int[]){decomposeVarchar1Array(Tloc(subject, BUNfirst(subject)),
subject->batCount, Tsize(subject), Tbase(subject), *approximationBits)});
+ snprintf(buffer, 4096, "successfully decomposed char bat %d",
subject->batCacheid);
} else {
- snprintf(buffer, 4096, "bat %d already decomposed",
subject->batCacheid);
+ snprintf(buffer, 4096, "bat %d cannot be decomposed because it
is not a 32 or 8-bit type", subject->batCacheid);
}
BUNappend(result, buffer, 0);
BBPkeepref(*res = result->batCacheid);
diff --git a/monetdb5/extras/bwd/utilities.c b/monetdb5/extras/bwd/utilities.c
--- a/monetdb5/extras/bwd/utilities.c
+++ b/monetdb5/extras/bwd/utilities.c
@@ -332,6 +332,48 @@ char* humanreadablesize(unsigned int val
return buffer;
}
+void forceLoadOntoGPU(clTail* approximation, const size_t approximationSize,
DecomposedBATSlot* slot){
+ static cl_program program = NULL;
+ cl_int err;
+ cl_uint numberOfDevices;
+ cl_device_id devices[4] = {};
+ const cl_context context = getCLContext();
+ const char* sourceCode = "__kernel void dummy (__global const char*
inputTail1){}";
+ {
+ cl_platform_id platforms[4];
+ cl_uint foundPlatforms;
+ if(clGetPlatformIDs(4,platforms,&foundPlatforms) != CL_SUCCESS)
printf("problem when finding the platforms");
+ if (!foundPlatforms) printf("didn't find any OpenCL Platforms");
+
+
+ if ((err = clGetDeviceIDs(*platforms,CL_DEVICE_TYPE_ALL, 4,
devices, &numberOfDevices)) != CL_SUCCESS) printf("error getting GPU device
id: %s\n", clError(err));
+ }
+
+ program = clCreateProgramWithSource(context, 1, (const
char*[]){sourceCode}, (size_t[]){strlen(sourceCode)}, &err);
+ if(err) printf("#%s, clCreateProgramWithSource: %s;\n", __func__,
clError(err));
+ err = clBuildProgram(program, numberOfDevices, devices, "", NULL, NULL);
+ if(err) printf("#%s, clBuildProgram: %s;\n", __func__, clError(err));
+
+ {
+ uint device;
+ for (device = 0; device < numberOfDevices; ++device) {
+ cl_command_queue queue =
clCreateCommandQueue(context,devices[device],0,&err);
+ cl_kernel dummyKernel = clCreateKernel(program,
"dummy", &err);
+
+ slot->tailApproximations[device] =
bwdClCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
approximationSize, approximation, &err);
+ if(err) printf("#%s, clCreateBuffer: %s;\n", __func__,
clError(err));
+
+ if(err) printf("#%s, clCreateKernel: %s;\n", __func__,
clError(err));
+ err = clSetKernelArg(dummyKernel, 0, sizeof(cl_mem),
(cl_mem[]){slot->tailApproximations[device]});
+ if(err) printf("#%s, clSetKernelArg: %s;\n", __func__,
clError(err));
+ if((err = clEnqueueNDRangeKernel(queue, dummyKernel, 1,
(const size_t[]){0}, (const size_t[]){1}, (const size_t[]){1}, 0, NULL, NULL)))
+ printf("#%s, clEnqueueNDRangeKernel: %s;\n",
__func__, clError(err));
+ clFinish(queue);
+
+ }
+ }
+
+}
unsigned int decomposeIntArray(const int* subject, const size_t size, const
size_t baseBitsPlusApproximationBits){
const bounds subjectBounds = findBounds(subject,size);
@@ -344,19 +386,12 @@ unsigned int decomposeIntArray(const int
const size_t approximationSize = ceil(size*approximationBits/8.0) +
sizeof(long) + sizeof(clTail);
clTail* approximation = calloc(approximationSize, sizeof(char));
unsigned int i;
- static cl_program program = NULL;
- cl_int err;
-
-
-
slot->tailOffsetBits = 32-neededBits;
slot->approximationBits = approximationBits;
slot->isPersistentBAT = 1;
printf ("bounds: %d, %d, offset %zd bits\n",subjectBounds.min,
subjectBounds.max, slot->tailOffsetBits);
-
-
printf ("using %s for approximation and %s for residuals \n",
humanreadablesize(approximationSize, (char[64]){}, 64),
humanreadablesize(ceil(size*residualBits/8.0) + 8, (char[64]){}, 64));
slot->residuals = calloc(ceil(size*residualBits/8.0) + 8, sizeof(char));
approximation->base = subjectBounds.min;
@@ -386,48 +421,84 @@ unsigned int decomposeIntArray(const int
}
}
- {
- cl_uint numberOfDevices;
- cl_device_id devices[4] = {};
- const cl_context context = getCLContext();
- const char* sourceCode = "__kernel void dummy (__global const
struct{int count; int padding; char values[];}* inputTail1){}";
- {
- cl_platform_id platforms[4];
- cl_uint foundPlatforms;
- if(clGetPlatformIDs(4,platforms,&foundPlatforms) !=
CL_SUCCESS) printf("problem when finding the platforms");
- if (!foundPlatforms) printf("didn't find any OpenCL
Platforms");
-
-
- if ((err =
clGetDeviceIDs(*platforms,CL_DEVICE_TYPE_ALL, 4, devices, &numberOfDevices)) !=
CL_SUCCESS) printf("error getting GPU device id: %s\n", clError(err));
- }
-
- program = clCreateProgramWithSource(context, 1, (const
char*[]){sourceCode}, (size_t[]){strlen(sourceCode)}, &err);
- if(err) printf("#%s, clCreateProgramWithSource: %s;\n",
__func__, clError(err));
- err = clBuildProgram(program, numberOfDevices, devices, "",
NULL, NULL);
- if(err) printf("#%s, clBuildProgram: %s;\n", __func__,
clError(err));
-
-
-
- {
- uint device;
- for (device = 0; device < numberOfDevices; ++device) {
- cl_command_queue queue =
clCreateCommandQueue(context,devices[device],0,&err);
- cl_kernel dummyKernel = clCreateKernel(program,
"dummy", &err);
-
- slot->tailApproximations[device] =
bwdClCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
approximationSize, approximation, &err);
- if(err) printf("#%s, clCreateBuffer: %s;\n",
__func__, clError(err));
-
- if(err) printf("#%s, clCreateKernel: %s;\n",
__func__, clError(err));
- err = clSetKernelArg(dummyKernel, 0,
sizeof(cl_mem), (cl_mem[]){slot->tailApproximations[device]});
- if(err) printf("#%s, clSetKernelArg: %s;\n",
__func__, clError(err));
- if((err = clEnqueueNDRangeKernel(queue,
dummyKernel, 1, (const size_t[]){0}, (const size_t[]){1}, (const size_t[]){1},
0, NULL, NULL)))
- printf("#%s, clEnqueueNDRangeKernel:
%s;\n", __func__, clError(err));
- clFinish(queue);
-
- }
- }
-
- }
+ forceLoadOntoGPU(approximation, approximationSize, slot);
+
free(approximation);
return newIndex;
}
+
+
+static inline char* getVarcharValue(const char* subject, const char*
dictionary, const size_t i, const size_t width){
+ return (char*)dictionary+VarHeapVal(subject, i, width);
+}
+
+bounds findVarchar1Bounds(const char* subject, const size_t size, const size_t
width, const char* dictionary){
+ assert(size>0);
+ {
+ char value = *getVarcharValue(subject, dictionary, 0, width);
+ bounds result = {value, value};
+ unsigned int i;
+ for (i = 1; i < size; ++i) {
+ value = *getVarcharValue(subject, dictionary, i, width);
+ result.min = MIN(result.min, value);
+ result.max = MAX(result.max, value);
+ }
+ return result;
+ }
+}
+
+unsigned int decomposeVarchar1Array(const char* subject, const size_t size,
const size_t width, const char* dictionary, const size_t
baseBitsPlusApproximationBits){
+ const bounds subjectBounds = findVarchar1Bounds(subject, size, width,
dictionary);
+ const int neededBits = ceil(log2(subjectBounds.max -
subjectBounds.min));
+ const unsigned int newIndex = getNextFreeDecomposedBATSlotIndex();
+ DecomposedBATSlot* slot = getDecomposedBATSlotForIndex(newIndex);
+ const unsigned int residualBits = 8-baseBitsPlusApproximationBits;
+ const unsigned int residualMask = (1 << residualBits)-1;
+ const unsigned int approximationBits = neededBits-residualBits;
+ const size_t approximationSize = ceil(size*approximationBits/8.0) +
sizeof(long) + sizeof(clTail);
+ clTail* approximation = calloc(approximationSize, sizeof(char));
+ unsigned int i;
+
+ slot->tailOffsetBits = 8-neededBits;
+ slot->approximationBits = approximationBits;
+ slot->isPersistentBAT = 1;
+ printf ("bounds: %d, %d, offset %zd bits\n",subjectBounds.min,
subjectBounds.max, slot->tailOffsetBits);
+
+ printf ("using %s for approximation and %s for residuals \n",
humanreadablesize(approximationSize, (char[64]){}, 64),
humanreadablesize(ceil(size*residualBits/8.0) + 8, (char[64]){}, 64));
+ slot->residuals = calloc(ceil(size*residualBits/8.0) + 8, sizeof(char));
+ approximation->base = subjectBounds.min;
+ slot->tailOffsetValue = approximation->base;
+ approximation->count = size;
+
+ for (i = 0; i < size; ++i) {
+ const char value = *getVarcharValue(subject, dictionary, i,
width);
+ { // approximation
+ const size_t slotI = (i*approximationBits)/32;
+ const unsigned int offset = (i*approximationBits)%32;
+ if(offset+approximationBits > 32) {
+ ((unsigned int*)approximation->elements)[slotI]
|= ((((int)(value-approximation->base)) >>
residualBits)>>(approximationBits-(32-offset)));
+ ((unsigned
int*)approximation->elements)[slotI+1] |= ((((int)(value-approximation->base))
>> residualBits)<<(32-(approximationBits-(32-offset))));
+ } else {
+ ((unsigned int*)approximation->elements)[slotI]
|= ((((int)(value-approximation->base)) >>
residualBits)<<(32-offset-approximationBits));
+ }
+ }
+ { // residual
+ const size_t slotI = (i*residualBits)/32;
+ const unsigned int offset = (i*residualBits)%32;
+ if(offset+residualBits <= 32) {
+ ((unsigned int*)slot->residuals)[slotI] |=
((((int)(value-approximation->base))&residualMask)<<(32-offset-residualBits));
+ } else {
+ ((unsigned int*)slot->residuals)[slotI] |=
((((int)(value-approximation->base))&residualMask)>>(residualBits-(32-offset)));
+ ((unsigned int*)slot->residuals)[slotI+1] |=
((((int)(value-approximation->base))&residualMask)<<(32-(residualBits-(32-offset))));
+ }
+ }
+ }
+
+ forceLoadOntoGPU(approximation, approximationSize, slot);
+
+ free(approximation);
+ return newIndex;
+}
+
+
+
diff --git a/monetdb5/extras/bwd/utilities.h b/monetdb5/extras/bwd/utilities.h
--- a/monetdb5/extras/bwd/utilities.h
+++ b/monetdb5/extras/bwd/utilities.h
@@ -49,7 +49,8 @@ int isPersistentBAT(const BAT* subject)
int getGPUDeviceForThisThread();
- unsigned int decomposeIntArray(const int* subject, const size_t size, const
size_t approximationBits);
+unsigned int decomposeIntArray(const int* subject, const size_t size, const
size_t approximationBits);
+unsigned int decomposeVarchar1Array(const char* subject, const size_t size,
const size_t width, const char* dictionary, const size_t
baseBitsPlusApproximationBits);
const char* clError(int) __attribute__((pure));
cl_device_id getDeviceID();
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list