Changeset: 5d32722dce60 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5d32722dce60
Modified Files:
        monetdb5/extras/bwd/operations.c
Branch: bwd
Log Message:

* using gdkmalloc and (slightly more) correct buffer sizes


Unterschiede (249 Zeilen):

diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -7,7 +7,7 @@
 #include "opt_statistics.h"
 #include "algebra.h"
 #include <math.h>
-
+#include "opt_groups.h"
 
 #include "bwd.h"
 
@@ -94,7 +94,7 @@ clTail* getApproximateValuesColumn(cl_me
        return buffer;
 }
 
-
+#ifndef CL_API_SUFFIX__VERSION_1_2
 cl_int clEnqueueFillBuffer(cl_command_queue    command_queue ,
                     cl_mem              buffer , 
                     const void *        pattern , 
@@ -106,15 +106,19 @@ cl_int clEnqueueFillBuffer(cl_command_qu
                     cl_event *          event ) {
        if(pattern_size == 4 && ((int*)pattern)[0] == 0){
                cl_int err = 0;
-               void* tmpbuffer = calloc(size, 1);
-               err = clEnqueueWriteBuffer(command_queue, buffer, CL_FALSE, 
offset, size, tmpbuffer, num_events_in_wait_list, event_wait_list, event); 
+               int* tmpbuffer = GDKzalloc(4);
+               err = clEnqueueWriteBuffer(command_queue, buffer, CL_TRUE, 
offset, size, tmpbuffer, num_events_in_wait_list, event_wait_list, event); 
                if(err) printf("#%s, clEnqueueWriteBuffer: %s;\n", __func__, 
clError(err));
-               free(tmpbuffer);
+               GDKfree(tmpBuffer);
                return err;
        }
        return -1;
 };
+#endif
 
+size_t calculatedBufferSize(size_t headCount, size_t approximationBits){
+       return (((int)ceil(headCount*approximationBits/8.0))/8)*8+16;
+}
 
 str BWDLeftJoinApproximate(bat * res, bat * l, bat * r){
        BAT* left;
@@ -135,7 +139,7 @@ str BWDLeftJoinApproximate(bat * res, ba
                const size_t offset = left->tseqbase;
                const size_t approximationBytes = 
batTailApproximationBits(right)/8;
                const unsigned int residualBits = 
32-batTailApproximationBits(right);
-               unsigned char* approximation = 
malloc(left->batCount*approximationBytes); 
+               unsigned char* approximation = 
GDKmalloc(left->batCount*approximationBytes); 
                BAT* result = BATnew(ATOMtype(left->htype), 
ATOMtype(right->ttype), left->batCount);
 
                BATsetcount(result, left->batCount);
@@ -150,7 +154,7 @@ str BWDLeftJoinApproximate(bat * res, ba
                for (i = 0; i < left->batCount; ++i)
                        outputRegion[i] = (*((unsigned int*)(approximation + 
(i+offset)*approximationBytes))) << residualBits;
 
-               free(approximation);
+               GDKfree(approximation);
                BBPkeepref((*res = result->batCacheid));
                BBPreleaseref(left->batCacheid);
                BBPreleaseref(right->batCacheid);
@@ -181,6 +185,7 @@ str BWDLeftJoinApproximate(bat * res, ba
                
                result = BATnew(TYPE_void, ATOMtype(right->ttype), 0);
                {
+                       int* zeroIntPattern = GDKzalloc(4);
                        const unsigned int newIndex = 
getNextFreeDecomposedBATSlotIndex();
                        DecomposedBATSlot* slot = 
getDecomposedBATSlotForIndex(newIndex);
                        BATsetprop(result, batRegistryIndex, TYPE_int, 
(int[]){newIndex});
@@ -192,22 +197,22 @@ str BWDLeftJoinApproximate(bat * res, ba
                        if(CL_REFCOUNT_DEBUG) printf ("retaining %p\n", 
leftColumn);
                        if((err=clRetainMemObject(leftColumn)))
                                printf("#%s, clRetainMemObject: %s;\n", 
__func__, clError(err));
-                       slot->tailApproximation = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
headCount*slot->approximationBits/8+8+sizeof(clTail), NULL, &err);
+                       slot->tailApproximation = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
calculatedBufferSize(headCount,slot->approximationBits)+sizeof(clTail), NULL, 
&err);
                        if(CL_REFCOUNT_DEBUG) printf ("%s, result (%d) tail 
approximation: %p\n", __func__, result->batCacheid , 
batTailApproximation(result));
                        assert(slot->tailPositions);
                        if(err) printf("#%s, bwdClCreateBuffer: %s;\n", 
__func__, clError(err));
 
-                       clEnqueueWriteBuffer(getCommandQueue(), 
slot->tailApproximation, CL_TRUE, 0, sizeof(clTail), &newTailDefinition, 0, 
NULL, NULL); 
-                       if(err) printf("#%s, bwdClCreateBuffer: %s;\n", 
__func__, clError(err));
-                       clEnqueueFillBuffer(getCommandQueue(), 
slot->tailApproximation, (int[]){0}, sizeof(int), sizeof(clTail), 
headCount*slot->approximationBits/8+8, 0, NULL, NULL);
-
+                       err = clEnqueueWriteBuffer(getCommandQueue(), 
slot->tailApproximation, CL_TRUE, 0, sizeof(clTail), &newTailDefinition, 0, 
NULL, NULL); 
+                       if(err) printf("#%s, clEnqueueWriteBuffer: %s;\n", 
__func__, clError(err));
+                       err = clEnqueueFillBuffer(getCommandQueue(), 
slot->tailApproximation, zeroIntPattern, sizeof(int), sizeof(clTail), 
calculatedBufferSize(headCount, slot->approximationBits), 0, NULL, NULL);
+                       if(err) printf("#%s, clEnqueueFillBuffer: %s;\n", 
__func__, clError(err));
+                       free(zeroIntPattern)
                }
                for (i = 0; i < 3; ++i) {
                        if((err = clSetKernelArg(projectKernel, i, 
sizeof(cl_mem), &((cl_mem[]){
                                                                
batTailApproximation(result), leftColumn, rightColumn}[i]
                                                        )))) printf("#%s, 
clSetKernelArg(%d): %s;\n", __func__, 0, clError(err));
                }
-
                if((err = clEnqueueNDRangeKernel(getCommandQueue(), 
projectKernel, 1, (const size_t[]){0}, (const 
size_t[]){ceil(headCount/((float)WORK_GROUP_SIZE))*WORK_GROUP_SIZE}, (const 
size_t[]){WORK_GROUP_SIZE}, 0, NULL, NULL)))
                        printf("#%s, clEnqueueNDRangeKernel: %s;\n", __func__, 
clError(err));
                if (synchronousGPU) clFinish(getCommandQueue());
@@ -306,11 +311,11 @@ str BWDLeftJoinRefine(bat * res, bat * l
                        clTail* supersetApproximateValuesColumn;
                        size_t bufferSize;
                        getPositionsColumn(batTailPositions(approximation), 
NULL, &bufferSize);
-                       supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize), 
&bufferSize);
+                       supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), GDKmalloc(bufferSize), 
&bufferSize);
 
                        
getApproximateValuesColumn(batTailApproximation(approximation), NULL, 
&bufferSize);
 
-                       supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
malloc(bufferSize), &bufferSize);
+                       supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
GDKmalloc(bufferSize), &bufferSize);
 
 
                        refinement = BATnew(TYPE_void, ATOMtype(right->ttype), 
left->batCount);
@@ -332,8 +337,8 @@ str BWDLeftJoinRefine(bat * res, bat * l
                                BATseqbase(refinement, 0);
                        }               
                
-                       free(supersetPositionsColumn);
-                       free(supersetApproximateValuesColumn);
+                       GDKfree(supersetPositionsColumn);
+                       GDKfree(supersetApproximateValuesColumn);
                }
 
                BBPkeepref(*res = refinement->batCacheid);
@@ -441,7 +446,7 @@ static inline str uselect(bat *res, bat 
 
                                                
clGetMemObjectInfo(batTailApproximation(result), CL_MEM_SIZE, sizeof(size_t), 
&resultSize
                                                                                
                                         , NULL);
-                                               compressedTail = 
malloc(resultSize);
+                                               compressedTail = 
GDKmalloc(resultSize);
                                                err = 
clEnqueueReadBuffer(getCommandQueue(), batTailApproximation(result), CL_TRUE, 
0, resultSize, compressedTail , 0, NULL, NULL);     
                                                if(err) printf("#%s, 
clEnqueueReadBuffer: %s;\n", __func__, clError(err));
                                                (void) compressedTail;
@@ -617,7 +622,7 @@ str uselectrefine(bat *res, bat *bid, pt
                        {
                                size_t approximationSize;
                                clGetMemObjectInfo(headApproximation, 
CL_MEM_SIZE, sizeof(size_t), &approximationSize, NULL);
-                               compressedHead = malloc(approximationSize);
+                               compressedHead = GDKmalloc(approximationSize);
                                err = clEnqueueReadBuffer(getCommandQueue(), 
batHeadApproximation(approximation), CL_TRUE, 0, approximationSize, 
compressedHead , 0, NULL, NULL);       
                                if(err) printf("#%s, clEnqueueReadBuffer: 
%s;\n", __func__, clError(err));
                        }
@@ -651,7 +656,7 @@ str uselectrefine(bat *res, bat *bid, pt
 
                                                
clGetMemObjectInfo(batTailApproximation(approximation), CL_MEM_SIZE, 
sizeof(size_t), &approximationSize
                                                                                
                                         , NULL);
-                                               compressedTail = 
malloc(approximationSize);
+                                               compressedTail = 
GDKmalloc(approximationSize);
                                                err = 
clEnqueueReadBuffer(getCommandQueue(), batTailApproximation(approximation), 
CL_TRUE, 0, approximationSize, compressedTail , 0, NULL, NULL);       
                                                if(err) printf("#%s, 
clEnqueueReadBuffer: %s;\n", __func__, clError(err));
                                        }
@@ -667,8 +672,8 @@ str uselectrefine(bat *res, bat *bid, pt
                                                BATsetcount(result, j);
                                        }
                                        BATseqbase(BATmirror(result), oid_nil);
-                                       free(compressedHead);
-                                       free(compressedTail);
+                                       GDKfree(compressedHead);
+                                       GDKfree(compressedTail);
                                }
 
                        }
@@ -760,7 +765,8 @@ str BWDSemijoinApproximate(int *res, int
                                printf("#%s, clEnqueueReadBuffer: %s;\n", 
__func__, clError(err));
 
 
-                       {               
+                       {
+                               int* zeroIntPattern = GDKzalloc(4);
                                const unsigned int newIndex = 
getNextFreeDecomposedBATSlotIndex();
                                DecomposedBATSlot* slot = 
getDecomposedBATSlotForIndex(newIndex);
                                BATsetprop(result, batRegistryIndex, TYPE_int, 
(int[]){newIndex});
@@ -774,14 +780,15 @@ str BWDSemijoinApproximate(int *res, int
                                if((err=clRetainMemObject(positionColumn)))
                                        printf("#%s, clRetainMemObject: %s;\n", 
__func__, clError(err));
 
-                               slot->tailApproximation = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
headCount*slot->approximationBits/8+8+sizeof(clTail), NULL, &err);
+                               slot->tailApproximation = 
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
calculatedBufferSize(headCount, slot->approximationBits)+sizeof(clTail), NULL, 
&err);
                                assert(slot->tailPositions);
 
                                if(err) printf("#%s, bwdClCreateBuffer: %s;\n", 
__func__, clError(err));
 
                                clEnqueueWriteBuffer(getCommandQueue(), 
slot->tailApproximation, CL_TRUE, 0, sizeof(clTail), &newTailDefinition, 0, 
NULL, NULL); 
                                if(err) printf("#%s, bwdClCreateBuffer: %s;\n", 
__func__, clError(err));
-                       clEnqueueFillBuffer(getCommandQueue(), 
slot->tailApproximation, (int[]){0}, sizeof(int), sizeof(clTail), 
headCount*slot->approximationBits/8+8, 0, NULL, NULL);
+                               clEnqueueFillBuffer(getCommandQueue(), 
slot->tailApproximation, zeroIntPattern, sizeof(int), sizeof(clTail), 
calculatedBufferSize(headCount, slot->approximationBits), 0, NULL, NULL);
+                               GDKfree(zeroIntPattern);
                        }
                        {
                                int i;
@@ -801,13 +808,13 @@ str BWDSemijoinApproximate(int *res, int
                                clHead* supersetPositionsColumn;
                                size_t bufferSize;
                                
getPositionsColumn(batTailPositions(approximation), NULL, &bufferSize);
-                               supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize), 
&bufferSize);
+                               supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), GDKmalloc(bufferSize), 
&bufferSize);
                                {
                                        size_t bufferSize;
                                        clTail* 
supersetApproximateValuesColumn;                        
 
                                        
getApproximateValuesColumn(batTailApproximation(approximation), NULL, 
&bufferSize);
-                                       supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
malloc(bufferSize), &bufferSize);
+                                       supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
GDKmalloc(bufferSize), &bufferSize);
                                        if(1) printf ("first position: %d\n", 
supersetPositionsColumn->positions[0]);
                                        (void)supersetApproximateValuesColumn;
                                }
@@ -864,7 +871,7 @@ str BWDSemijoinRefine(int *res, int *lid
                        clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
                
                        getPositionsColumn(batTailPositions(approximation), 
NULL, &bufferSize);
-                       supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize), 
&bufferSize);
+                       supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), GDKmalloc(bufferSize), 
&bufferSize);
 
                        clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
                        printf ("clHead* supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize), 
&bufferSize); took %ld 
nanoseconds\n",(after.tv_sec*1000000000+after.tv_nsec)-(before.tv_sec*1000000000+before.tv_nsec));
@@ -872,7 +879,7 @@ str BWDSemijoinRefine(int *res, int *lid
                
                        
getApproximateValuesColumn(batTailApproximation(approximation), NULL, 
&bufferSize);
                
-                       supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
malloc(bufferSize), &bufferSize);
+                       supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
GDKmalloc(bufferSize), &bufferSize);
 
                        {
                                if(supersetPositionsColumn){
@@ -886,7 +893,7 @@ str BWDSemijoinRefine(int *res, int *lid
                                                if(residualBits > 0){
                                                        unsigned int i,j;
                                                        size_t refinementCount 
= 0;
-                                                       filteredResiduals = 
calloc(ceil((supersetPositionsColumn->count)*residualBits/8) + sizeof(size_t), 
1);
+                                                       filteredResiduals = 
GDKzalloc(ceil((supersetPositionsColumn->count)*residualBits/8.0) + 
sizeof(size_t));
                                                        
                                                        if(1){
                                                                const unsigned 
int targetTypeBits = sizeof(int)*8;
@@ -955,7 +962,7 @@ str BWDSemijoinRefine(int *res, int *lid
                        getDecomposedBATSlot(refinement)->approximationBits = 
batTailApproximationBits(left);
                        getDecomposedBATSlot(refinement)->tailOffsetBits = 
batTailOffsetBits(left);
 
-                       free(supersetPositionsColumn);
+                       GDKfree(supersetPositionsColumn);
 
                
                } else {
@@ -1062,5 +1069,5 @@ str BWDjoinRefine(int *ret, int *lid, in
 
 
 str BWDMulticolumnGroupApproximate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci){
-               throw(MAL, __func__, "not implemented yet: %s", __func__);
+       return GRPmulticolumngroup(cntxt, mb, stk, pci);
 }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to