Changeset: 43c22dac96df for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=43c22dac96df
Modified Files:
        monetdb5/extras/bwd/bwd.c
        monetdb5/extras/bwd/utilities.c
        monetdb5/extras/bwd/utilities.h
Branch: bwd
Log Message:

* started working on q1 which needs handling of varchar(1) attributes, since 
these are dictionary compressed with 1-byte offsets into the dictionary, I 
implemented a function to decompress and decompose them


Unterschiede (238 Zeilen):

diff --git a/monetdb5/extras/bwd/bwd.c b/monetdb5/extras/bwd/bwd.c
--- a/monetdb5/extras/bwd/bwd.c
+++ b/monetdb5/extras/bwd/bwd.c
@@ -110,14 +110,16 @@ str bwdecompose(bat * res, bat * subject
        if (VIEWtparent(subject)) 
     subject = BATdescriptor(abs(VIEWtparent(subject)));
        result = BATnew(TYPE_void, TYPE_str, 0);
-       if(Tsize(subject) != 4){
-               snprintf(buffer, 4096, "bat %d cannot be decomposed because it 
is not a 32-bit type", subject->batCacheid);
-       } else if(! batTailIsDecomposed(subject)){
-               printf ("decomposing %s into %d and %d bits\n", 
resolveBatToAttribute(subject->batCacheid, cntxt), approximationBits[0], 
Tsize(subject)*8-*approximationBits);
+       if(batTailIsDecomposed(subject)) {
+               snprintf(buffer, 4096, "bat %d already decomposed", 
subject->batCacheid);
+       } else if (Tsize(subject) == 4){
                BATsetprop(subject, batRegistryIndex, TYPE_int, 
(int[]){decomposeIntArray((int*)Tloc(subject, BUNfirst(subject)), 
subject->batCount, *approximationBits)});
-               snprintf(buffer, 4096, "successfully decomposed bat %d", 
subject->batCacheid);
+               snprintf(buffer, 4096, "successfully decomposed integer bat 
%d", subject->batCacheid);
+       } else if(Tsize(subject) == 1){
+               BATsetprop(subject, batRegistryIndex, TYPE_int, 
(int[]){decomposeVarchar1Array(Tloc(subject, BUNfirst(subject)), 
subject->batCount, Tsize(subject), Tbase(subject), *approximationBits)});
+               snprintf(buffer, 4096, "successfully decomposed char bat %d", 
subject->batCacheid);
        } else {
-               snprintf(buffer, 4096, "bat %d already decomposed", 
subject->batCacheid);
+               snprintf(buffer, 4096, "bat %d cannot be decomposed because it 
is not a 32 or 8-bit type", subject->batCacheid);
        }
        BUNappend(result, buffer, 0);
        BBPkeepref(*res = result->batCacheid);
diff --git a/monetdb5/extras/bwd/utilities.c b/monetdb5/extras/bwd/utilities.c
--- a/monetdb5/extras/bwd/utilities.c
+++ b/monetdb5/extras/bwd/utilities.c
@@ -332,6 +332,48 @@ char* humanreadablesize(unsigned int val
        return buffer;
 }
 
+void forceLoadOntoGPU(clTail* approximation, const size_t approximationSize, 
DecomposedBATSlot* slot){ 
+       static cl_program program = NULL;
+       cl_int err;
+       cl_uint numberOfDevices;
+       cl_device_id devices[4] = {};
+       const cl_context context = getCLContext();
+       const char* sourceCode = "__kernel void dummy (__global const char* 
inputTail1){}";
+       {
+               cl_platform_id platforms[4];
+               cl_uint foundPlatforms;
+               if(clGetPlatformIDs(4,platforms,&foundPlatforms) != CL_SUCCESS) 
printf("problem when finding the platforms");
+               if (!foundPlatforms) printf("didn't find any OpenCL Platforms");
+               
+ 
+               if ((err = clGetDeviceIDs(*platforms,CL_DEVICE_TYPE_ALL, 4, 
devices, &numberOfDevices)) != CL_SUCCESS)  printf("error getting GPU device 
id: %s\n", clError(err));
+       }
+
+       program = clCreateProgramWithSource(context, 1, (const 
char*[]){sourceCode}, (size_t[]){strlen(sourceCode)}, &err);
+       if(err) printf("#%s, clCreateProgramWithSource: %s;\n", __func__, 
clError(err));
+       err = clBuildProgram(program, numberOfDevices, devices, "", NULL, NULL);
+       if(err) printf("#%s, clBuildProgram: %s;\n", __func__, clError(err));
+
+       {
+               uint device;
+               for (device = 0; device < numberOfDevices; ++device) {
+                       cl_command_queue queue = 
clCreateCommandQueue(context,devices[device],0,&err);
+                       cl_kernel dummyKernel = clCreateKernel(program, 
"dummy", &err);
+
+                       slot->tailApproximations[device] = 
bwdClCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
approximationSize, approximation, &err);
+                       if(err) printf("#%s, clCreateBuffer: %s;\n", __func__, 
clError(err));
+
+                       if(err) printf("#%s, clCreateKernel: %s;\n", __func__, 
clError(err));
+                       err = clSetKernelArg(dummyKernel, 0, sizeof(cl_mem), 
(cl_mem[]){slot->tailApproximations[device]});
+                       if(err) printf("#%s, clSetKernelArg: %s;\n", __func__, 
clError(err));
+                       if((err = clEnqueueNDRangeKernel(queue, dummyKernel, 1, 
(const size_t[]){0}, (const size_t[]){1}, (const size_t[]){1}, 0, NULL, NULL)))
+                               printf("#%s, clEnqueueNDRangeKernel: %s;\n", 
__func__, clError(err));
+                       clFinish(queue);
+                       
+               }
+       }
+                       
+}
 
 unsigned int decomposeIntArray(const int* subject, const size_t size, const 
size_t baseBitsPlusApproximationBits){
        const bounds subjectBounds = findBounds(subject,size);
@@ -344,19 +386,12 @@ unsigned int decomposeIntArray(const int
        const size_t approximationSize = ceil(size*approximationBits/8.0) + 
sizeof(long) + sizeof(clTail);
        clTail* approximation = calloc(approximationSize, sizeof(char));
        unsigned int i;
-       static cl_program program = NULL;
-       cl_int err;
-
-               
-       
 
        slot->tailOffsetBits = 32-neededBits;
        slot->approximationBits = approximationBits;
        slot->isPersistentBAT = 1;
        printf ("bounds: %d, %d, offset %zd bits\n",subjectBounds.min, 
subjectBounds.max, slot->tailOffsetBits);
                
-               
-       
        printf ("using %s for approximation and %s for residuals \n", 
humanreadablesize(approximationSize, (char[64]){}, 64), 
humanreadablesize(ceil(size*residualBits/8.0) + 8, (char[64]){}, 64));
        slot->residuals = calloc(ceil(size*residualBits/8.0) + 8, sizeof(char));
        approximation->base = subjectBounds.min;
@@ -386,48 +421,84 @@ unsigned int decomposeIntArray(const int
                }
        }
 
-       {               
-               cl_uint numberOfDevices;
-               cl_device_id devices[4] = {};
-               const cl_context context = getCLContext();
-               const char* sourceCode = "__kernel void dummy (__global const 
struct{int count; int padding; char values[];}* inputTail1){}";
-               {
-                       cl_platform_id platforms[4];
-                       cl_uint foundPlatforms;
-                       if(clGetPlatformIDs(4,platforms,&foundPlatforms) != 
CL_SUCCESS) printf("problem when finding the platforms");
-                       if (!foundPlatforms) printf("didn't find any OpenCL 
Platforms");
-               
- 
-                       if ((err = 
clGetDeviceIDs(*platforms,CL_DEVICE_TYPE_ALL, 4, devices, &numberOfDevices)) != 
CL_SUCCESS)  printf("error getting GPU device id: %s\n", clError(err));
-               }
-
-               program = clCreateProgramWithSource(context, 1, (const 
char*[]){sourceCode}, (size_t[]){strlen(sourceCode)}, &err);
-               if(err) printf("#%s, clCreateProgramWithSource: %s;\n", 
__func__, clError(err));
-               err = clBuildProgram(program, numberOfDevices, devices, "", 
NULL, NULL);
-               if(err) printf("#%s, clBuildProgram: %s;\n", __func__, 
clError(err));
-
-
-
-               {
-                       uint device;
-                       for (device = 0; device < numberOfDevices; ++device) {
-                               cl_command_queue queue = 
clCreateCommandQueue(context,devices[device],0,&err);
-                               cl_kernel dummyKernel = clCreateKernel(program, 
"dummy", &err);
-
-                               slot->tailApproximations[device] = 
bwdClCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
approximationSize, approximation, &err);
-                               if(err) printf("#%s, clCreateBuffer: %s;\n", 
__func__, clError(err));
-
-                               if(err) printf("#%s, clCreateKernel: %s;\n", 
__func__, clError(err));
-                               err = clSetKernelArg(dummyKernel, 0, 
sizeof(cl_mem), (cl_mem[]){slot->tailApproximations[device]});
-                               if(err) printf("#%s, clSetKernelArg: %s;\n", 
__func__, clError(err));
-                               if((err = clEnqueueNDRangeKernel(queue, 
dummyKernel, 1, (const size_t[]){0}, (const size_t[]){1}, (const size_t[]){1}, 
0, NULL, NULL)))
-                                       printf("#%s, clEnqueueNDRangeKernel: 
%s;\n", __func__, clError(err));
-                               clFinish(queue);
-                       
-                       }
-               }
-                       
-       }
+       forceLoadOntoGPU(approximation, approximationSize, slot);
+       
        free(approximation);
        return newIndex;
 }
+
+
+static inline char* getVarcharValue(const char* subject, const char* 
dictionary, const size_t i, const size_t width){
+       return (char*)dictionary+VarHeapVal(subject, i, width);
+}
+
+bounds findVarchar1Bounds(const char* subject, const size_t size, const size_t 
width, const char* dictionary){
+       assert(size>0);
+       {
+               char value = *getVarcharValue(subject, dictionary, 0, width);
+               bounds result = {value, value};
+               unsigned int i;
+               for (i = 1; i < size; ++i) {
+                       value = *getVarcharValue(subject, dictionary, i, width);
+                       result.min = MIN(result.min, value);
+                       result.max = MAX(result.max, value);
+               }
+               return result;
+       }
+}
+
+unsigned int decomposeVarchar1Array(const char* subject, const size_t size, 
const size_t width, const char* dictionary, const size_t 
baseBitsPlusApproximationBits){
+       const bounds subjectBounds = findVarchar1Bounds(subject, size, width, 
dictionary);
+       const int neededBits = ceil(log2(subjectBounds.max - 
subjectBounds.min));
+       const unsigned int newIndex = getNextFreeDecomposedBATSlotIndex();
+       DecomposedBATSlot* slot = getDecomposedBATSlotForIndex(newIndex);
+       const unsigned int residualBits = 8-baseBitsPlusApproximationBits;
+       const unsigned int residualMask = (1 << residualBits)-1;
+       const unsigned int approximationBits = neededBits-residualBits;
+       const size_t approximationSize = ceil(size*approximationBits/8.0) + 
sizeof(long) + sizeof(clTail);
+       clTail* approximation = calloc(approximationSize, sizeof(char));
+       unsigned int i;
+
+       slot->tailOffsetBits = 8-neededBits;
+       slot->approximationBits = approximationBits;
+       slot->isPersistentBAT = 1;
+       printf ("bounds: %d, %d, offset %zd bits\n",subjectBounds.min, 
subjectBounds.max, slot->tailOffsetBits);
+               
+       printf ("using %s for approximation and %s for residuals \n", 
humanreadablesize(approximationSize, (char[64]){}, 64), 
humanreadablesize(ceil(size*residualBits/8.0) + 8, (char[64]){}, 64));
+       slot->residuals = calloc(ceil(size*residualBits/8.0) + 8, sizeof(char));
+       approximation->base = subjectBounds.min;
+       slot->tailOffsetValue = approximation->base;
+       approximation->count = size;
+       
+       for (i = 0; i < size; ++i) {
+               const char value = *getVarcharValue(subject, dictionary, i, 
width);
+               { // approximation
+                       const size_t slotI = (i*approximationBits)/32;
+                       const unsigned int offset = (i*approximationBits)%32;
+                       if(offset+approximationBits > 32) {
+                               ((unsigned int*)approximation->elements)[slotI] 
|= ((((int)(value-approximation->base)) >> 
residualBits)>>(approximationBits-(32-offset)));
+                               ((unsigned 
int*)approximation->elements)[slotI+1] |= ((((int)(value-approximation->base)) 
>> residualBits)<<(32-(approximationBits-(32-offset))));
+                       } else {
+                               ((unsigned int*)approximation->elements)[slotI] 
|= ((((int)(value-approximation->base)) >> 
residualBits)<<(32-offset-approximationBits));
+                       }
+               }
+               { // residual
+                       const size_t slotI = (i*residualBits)/32;
+                       const unsigned int offset = (i*residualBits)%32;
+                       if(offset+residualBits <= 32) {
+                               ((unsigned int*)slot->residuals)[slotI] |= 
((((int)(value-approximation->base))&residualMask)<<(32-offset-residualBits));
+                       } else {
+                               ((unsigned int*)slot->residuals)[slotI] |= 
((((int)(value-approximation->base))&residualMask)>>(residualBits-(32-offset)));
+                               ((unsigned int*)slot->residuals)[slotI+1] |= 
((((int)(value-approximation->base))&residualMask)<<(32-(residualBits-(32-offset))));
+                       }
+               }
+       }
+
+       forceLoadOntoGPU(approximation, approximationSize, slot);
+       
+       free(approximation);
+       return newIndex;
+}
+
+
+
diff --git a/monetdb5/extras/bwd/utilities.h b/monetdb5/extras/bwd/utilities.h
--- a/monetdb5/extras/bwd/utilities.h
+++ b/monetdb5/extras/bwd/utilities.h
@@ -49,7 +49,8 @@ int isPersistentBAT(const BAT* subject) 
 int getGPUDeviceForThisThread();
 
 
- unsigned int decomposeIntArray(const int* subject, const size_t size, const 
size_t approximationBits);
+unsigned int decomposeIntArray(const int* subject, const size_t size, const 
size_t approximationBits);
+unsigned int decomposeVarchar1Array(const char* subject, const size_t size, 
const size_t width, const char* dictionary, const size_t 
baseBitsPlusApproximationBits);
 
 const char* clError(int) __attribute__((pure));
 cl_device_id getDeviceID();
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to