Changeset: 5d32722dce60 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5d32722dce60
Modified Files:
monetdb5/extras/bwd/operations.c
Branch: bwd
Log Message:
* using gdkmalloc and (slightly more) correct buffer sizes
Unterschiede (249 Zeilen):
diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -7,7 +7,7 @@
#include "opt_statistics.h"
#include "algebra.h"
#include <math.h>
-
+#include "opt_groups.h"
#include "bwd.h"
@@ -94,7 +94,7 @@ clTail* getApproximateValuesColumn(cl_me
return buffer;
}
-
+#ifndef CL_API_SUFFIX__VERSION_1_2
cl_int clEnqueueFillBuffer(cl_command_queue command_queue ,
cl_mem buffer ,
const void * pattern ,
@@ -106,15 +106,19 @@ cl_int clEnqueueFillBuffer(cl_command_qu
cl_event * event ) {
if(pattern_size == 4 && ((int*)pattern)[0] == 0){
cl_int err = 0;
- void* tmpbuffer = calloc(size, 1);
- err = clEnqueueWriteBuffer(command_queue, buffer, CL_FALSE,
offset, size, tmpbuffer, num_events_in_wait_list, event_wait_list, event);
+ int* tmpbuffer = GDKzalloc(4);
+ err = clEnqueueWriteBuffer(command_queue, buffer, CL_TRUE,
offset, size, tmpbuffer, num_events_in_wait_list, event_wait_list, event);
if(err) printf("#%s, clEnqueueWriteBuffer: %s;\n", __func__,
clError(err));
- free(tmpbuffer);
+ GDKfree(tmpBuffer);
return err;
}
return -1;
};
+#endif
+size_t calculatedBufferSize(size_t headCount, size_t approximationBits){
+ return (((int)ceil(headCount*approximationBits/8.0))/8)*8+16;
+}
str BWDLeftJoinApproximate(bat * res, bat * l, bat * r){
BAT* left;
@@ -135,7 +139,7 @@ str BWDLeftJoinApproximate(bat * res, ba
const size_t offset = left->tseqbase;
const size_t approximationBytes =
batTailApproximationBits(right)/8;
const unsigned int residualBits =
32-batTailApproximationBits(right);
- unsigned char* approximation =
malloc(left->batCount*approximationBytes);
+ unsigned char* approximation =
GDKmalloc(left->batCount*approximationBytes);
BAT* result = BATnew(ATOMtype(left->htype),
ATOMtype(right->ttype), left->batCount);
BATsetcount(result, left->batCount);
@@ -150,7 +154,7 @@ str BWDLeftJoinApproximate(bat * res, ba
for (i = 0; i < left->batCount; ++i)
outputRegion[i] = (*((unsigned int*)(approximation +
(i+offset)*approximationBytes))) << residualBits;
- free(approximation);
+ GDKfree(approximation);
BBPkeepref((*res = result->batCacheid));
BBPreleaseref(left->batCacheid);
BBPreleaseref(right->batCacheid);
@@ -181,6 +185,7 @@ str BWDLeftJoinApproximate(bat * res, ba
result = BATnew(TYPE_void, ATOMtype(right->ttype), 0);
{
+ int* zeroIntPattern = GDKzalloc(4);
const unsigned int newIndex =
getNextFreeDecomposedBATSlotIndex();
DecomposedBATSlot* slot =
getDecomposedBATSlotForIndex(newIndex);
BATsetprop(result, batRegistryIndex, TYPE_int,
(int[]){newIndex});
@@ -192,22 +197,22 @@ str BWDLeftJoinApproximate(bat * res, ba
if(CL_REFCOUNT_DEBUG) printf ("retaining %p\n",
leftColumn);
if((err=clRetainMemObject(leftColumn)))
printf("#%s, clRetainMemObject: %s;\n",
__func__, clError(err));
- slot->tailApproximation =
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE,
headCount*slot->approximationBits/8+8+sizeof(clTail), NULL, &err);
+ slot->tailApproximation =
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE,
calculatedBufferSize(headCount,slot->approximationBits)+sizeof(clTail), NULL,
&err);
if(CL_REFCOUNT_DEBUG) printf ("%s, result (%d) tail
approximation: %p\n", __func__, result->batCacheid ,
batTailApproximation(result));
assert(slot->tailPositions);
if(err) printf("#%s, bwdClCreateBuffer: %s;\n",
__func__, clError(err));
- clEnqueueWriteBuffer(getCommandQueue(),
slot->tailApproximation, CL_TRUE, 0, sizeof(clTail), &newTailDefinition, 0,
NULL, NULL);
- if(err) printf("#%s, bwdClCreateBuffer: %s;\n",
__func__, clError(err));
- clEnqueueFillBuffer(getCommandQueue(),
slot->tailApproximation, (int[]){0}, sizeof(int), sizeof(clTail),
headCount*slot->approximationBits/8+8, 0, NULL, NULL);
-
+ err = clEnqueueWriteBuffer(getCommandQueue(),
slot->tailApproximation, CL_TRUE, 0, sizeof(clTail), &newTailDefinition, 0,
NULL, NULL);
+ if(err) printf("#%s, clEnqueueWriteBuffer: %s;\n",
__func__, clError(err));
+ err = clEnqueueFillBuffer(getCommandQueue(),
slot->tailApproximation, zeroIntPattern, sizeof(int), sizeof(clTail),
calculatedBufferSize(headCount, slot->approximationBits), 0, NULL, NULL);
+ if(err) printf("#%s, clEnqueueFillBuffer: %s;\n",
__func__, clError(err));
+ free(zeroIntPattern)
}
for (i = 0; i < 3; ++i) {
if((err = clSetKernelArg(projectKernel, i,
sizeof(cl_mem), &((cl_mem[]){
batTailApproximation(result), leftColumn, rightColumn}[i]
)))) printf("#%s,
clSetKernelArg(%d): %s;\n", __func__, 0, clError(err));
}
-
if((err = clEnqueueNDRangeKernel(getCommandQueue(),
projectKernel, 1, (const size_t[]){0}, (const
size_t[]){ceil(headCount/((float)WORK_GROUP_SIZE))*WORK_GROUP_SIZE}, (const
size_t[]){WORK_GROUP_SIZE}, 0, NULL, NULL)))
printf("#%s, clEnqueueNDRangeKernel: %s;\n", __func__,
clError(err));
if (synchronousGPU) clFinish(getCommandQueue());
@@ -306,11 +311,11 @@ str BWDLeftJoinRefine(bat * res, bat * l
clTail* supersetApproximateValuesColumn;
size_t bufferSize;
getPositionsColumn(batTailPositions(approximation),
NULL, &bufferSize);
- supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize),
&bufferSize);
+ supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), GDKmalloc(bufferSize),
&bufferSize);
getApproximateValuesColumn(batTailApproximation(approximation), NULL,
&bufferSize);
- supersetApproximateValuesColumn =
getApproximateValuesColumn(batTailApproximation(approximation),
malloc(bufferSize), &bufferSize);
+ supersetApproximateValuesColumn =
getApproximateValuesColumn(batTailApproximation(approximation),
GDKmalloc(bufferSize), &bufferSize);
refinement = BATnew(TYPE_void, ATOMtype(right->ttype),
left->batCount);
@@ -332,8 +337,8 @@ str BWDLeftJoinRefine(bat * res, bat * l
BATseqbase(refinement, 0);
}
- free(supersetPositionsColumn);
- free(supersetApproximateValuesColumn);
+ GDKfree(supersetPositionsColumn);
+ GDKfree(supersetApproximateValuesColumn);
}
BBPkeepref(*res = refinement->batCacheid);
@@ -441,7 +446,7 @@ static inline str uselect(bat *res, bat
clGetMemObjectInfo(batTailApproximation(result), CL_MEM_SIZE, sizeof(size_t),
&resultSize
, NULL);
- compressedTail =
malloc(resultSize);
+ compressedTail =
GDKmalloc(resultSize);
err =
clEnqueueReadBuffer(getCommandQueue(), batTailApproximation(result), CL_TRUE,
0, resultSize, compressedTail , 0, NULL, NULL);
if(err) printf("#%s,
clEnqueueReadBuffer: %s;\n", __func__, clError(err));
(void) compressedTail;
@@ -617,7 +622,7 @@ str uselectrefine(bat *res, bat *bid, pt
{
size_t approximationSize;
clGetMemObjectInfo(headApproximation,
CL_MEM_SIZE, sizeof(size_t), &approximationSize, NULL);
- compressedHead = malloc(approximationSize);
+ compressedHead = GDKmalloc(approximationSize);
err = clEnqueueReadBuffer(getCommandQueue(),
batHeadApproximation(approximation), CL_TRUE, 0, approximationSize,
compressedHead , 0, NULL, NULL);
if(err) printf("#%s, clEnqueueReadBuffer:
%s;\n", __func__, clError(err));
}
@@ -651,7 +656,7 @@ str uselectrefine(bat *res, bat *bid, pt
clGetMemObjectInfo(batTailApproximation(approximation), CL_MEM_SIZE,
sizeof(size_t), &approximationSize
, NULL);
- compressedTail =
malloc(approximationSize);
+ compressedTail =
GDKmalloc(approximationSize);
err =
clEnqueueReadBuffer(getCommandQueue(), batTailApproximation(approximation),
CL_TRUE, 0, approximationSize, compressedTail , 0, NULL, NULL);
if(err) printf("#%s,
clEnqueueReadBuffer: %s;\n", __func__, clError(err));
}
@@ -667,8 +672,8 @@ str uselectrefine(bat *res, bat *bid, pt
BATsetcount(result, j);
}
BATseqbase(BATmirror(result), oid_nil);
- free(compressedHead);
- free(compressedTail);
+ GDKfree(compressedHead);
+ GDKfree(compressedTail);
}
}
@@ -760,7 +765,8 @@ str BWDSemijoinApproximate(int *res, int
printf("#%s, clEnqueueReadBuffer: %s;\n",
__func__, clError(err));
- {
+ {
+ int* zeroIntPattern = GDKzalloc(4);
const unsigned int newIndex =
getNextFreeDecomposedBATSlotIndex();
DecomposedBATSlot* slot =
getDecomposedBATSlotForIndex(newIndex);
BATsetprop(result, batRegistryIndex, TYPE_int,
(int[]){newIndex});
@@ -774,14 +780,15 @@ str BWDSemijoinApproximate(int *res, int
if((err=clRetainMemObject(positionColumn)))
printf("#%s, clRetainMemObject: %s;\n",
__func__, clError(err));
- slot->tailApproximation =
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE,
headCount*slot->approximationBits/8+8+sizeof(clTail), NULL, &err);
+ slot->tailApproximation =
bwdClCreateBuffer(getCLContext(), CL_MEM_READ_WRITE,
calculatedBufferSize(headCount, slot->approximationBits)+sizeof(clTail), NULL,
&err);
assert(slot->tailPositions);
if(err) printf("#%s, bwdClCreateBuffer: %s;\n",
__func__, clError(err));
clEnqueueWriteBuffer(getCommandQueue(),
slot->tailApproximation, CL_TRUE, 0, sizeof(clTail), &newTailDefinition, 0,
NULL, NULL);
if(err) printf("#%s, bwdClCreateBuffer: %s;\n",
__func__, clError(err));
- clEnqueueFillBuffer(getCommandQueue(),
slot->tailApproximation, (int[]){0}, sizeof(int), sizeof(clTail),
headCount*slot->approximationBits/8+8, 0, NULL, NULL);
+ clEnqueueFillBuffer(getCommandQueue(),
slot->tailApproximation, zeroIntPattern, sizeof(int), sizeof(clTail),
calculatedBufferSize(headCount, slot->approximationBits), 0, NULL, NULL);
+ GDKfree(zeroIntPattern);
}
{
int i;
@@ -801,13 +808,13 @@ str BWDSemijoinApproximate(int *res, int
clHead* supersetPositionsColumn;
size_t bufferSize;
getPositionsColumn(batTailPositions(approximation), NULL, &bufferSize);
- supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize),
&bufferSize);
+ supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), GDKmalloc(bufferSize),
&bufferSize);
{
size_t bufferSize;
clTail*
supersetApproximateValuesColumn;
getApproximateValuesColumn(batTailApproximation(approximation), NULL,
&bufferSize);
- supersetApproximateValuesColumn =
getApproximateValuesColumn(batTailApproximation(approximation),
malloc(bufferSize), &bufferSize);
+ supersetApproximateValuesColumn =
getApproximateValuesColumn(batTailApproximation(approximation),
GDKmalloc(bufferSize), &bufferSize);
if(1) printf ("first position: %d\n",
supersetPositionsColumn->positions[0]);
(void)supersetApproximateValuesColumn;
}
@@ -864,7 +871,7 @@ str BWDSemijoinRefine(int *res, int *lid
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
getPositionsColumn(batTailPositions(approximation),
NULL, &bufferSize);
- supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize),
&bufferSize);
+ supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), GDKmalloc(bufferSize),
&bufferSize);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
printf ("clHead* supersetPositionsColumn =
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize),
&bufferSize); took %ld
nanoseconds\n",(after.tv_sec*1000000000+after.tv_nsec)-(before.tv_sec*1000000000+before.tv_nsec));
@@ -872,7 +879,7 @@ str BWDSemijoinRefine(int *res, int *lid
getApproximateValuesColumn(batTailApproximation(approximation), NULL,
&bufferSize);
- supersetApproximateValuesColumn =
getApproximateValuesColumn(batTailApproximation(approximation),
malloc(bufferSize), &bufferSize);
+ supersetApproximateValuesColumn =
getApproximateValuesColumn(batTailApproximation(approximation),
GDKmalloc(bufferSize), &bufferSize);
{
if(supersetPositionsColumn){
@@ -886,7 +893,7 @@ str BWDSemijoinRefine(int *res, int *lid
if(residualBits > 0){
unsigned int i,j;
size_t refinementCount
= 0;
- filteredResiduals =
calloc(ceil((supersetPositionsColumn->count)*residualBits/8) + sizeof(size_t),
1);
+ filteredResiduals =
GDKzalloc(ceil((supersetPositionsColumn->count)*residualBits/8.0) +
sizeof(size_t));
if(1){
const unsigned
int targetTypeBits = sizeof(int)*8;
@@ -955,7 +962,7 @@ str BWDSemijoinRefine(int *res, int *lid
getDecomposedBATSlot(refinement)->approximationBits =
batTailApproximationBits(left);
getDecomposedBATSlot(refinement)->tailOffsetBits =
batTailOffsetBits(left);
- free(supersetPositionsColumn);
+ GDKfree(supersetPositionsColumn);
} else {
@@ -1062,5 +1069,5 @@ str BWDjoinRefine(int *ret, int *lid, in
str BWDMulticolumnGroupApproximate(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci){
- throw(MAL, __func__, "not implemented yet: %s", __func__);
+ return GRPmulticolumngroup(cntxt, mb, stk, pci);
}
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list