On Donnerstag 15 Oktober 2009, Allan Peter Engsig-Karup wrote: > I can add here... > > This is my experience after a succesfull installation of pyCuda (both the > former and the latest based on distribute) on my new MacBook Pro doing a > quick run of examples... > > 14 examples , with 1 a dump and 1 requiring openGL which I have not > installed yet, the 12 remaining tests tells me that I am didn¹t spend my > money on the right GPU... out of memory!
I don't quite understand--you are getting these errors at context creation, which, for me at least, has never failed for that reason. Are you running anything that you would suspect could take up all your GPU memory? Maybe something's wedged and you need to reboot? Can you run a C-CUDA example (I've attached one--run with nvcc -run simple.cu It should spit a bunch of numbers.) HTH, Andreas
// trivia
#include <stdio.h>
#define CUDA_CHK(NAME, ARGS) { \
cudaError_t cuda_err_code = NAME ARGS; \
if (cuda_err_code != cudaSuccess) { \
printf("%s failed with code %d\n", #NAME, cuda_err_code); \
abort(); \
} \
}
// end
// kernel
__global__ void square_array(float *a, float *b, int n)
{
int i = (blockIdx.x * blockDim.y + threadIdx.y)
* blockDim.x + threadIdx.x;
if (i < n)
a[i] = a[i] * b[i];
}
// end
// main1
int main()
{
cudaSetDevice(0); // EDIT ME
const int n = 4096;
float *a_host = (float *) malloc(n*sizeof(float));
float *b_host = (float *) malloc(n*sizeof(float));
float *a_device, *b_device;
CUDA_CHK(cudaMalloc, ((void **) &a_device, n*sizeof(float)));
CUDA_CHK(cudaMalloc, ((void **) &b_device, n*sizeof(float)));
// end
// main2
for (int i = 0; i < n; i++) { a_host[i] = i; b_host[i] = i+1; }
CUDA_CHK(cudaMemcpy, (a_device, a_host, n*sizeof(float),
cudaMemcpyHostToDevice));
CUDA_CHK(cudaMemcpy, (b_device, b_host, n*sizeof(float),
cudaMemcpyHostToDevice));
dim3 block_dim(16, 16);
int block_size = block_dim.x*block_dim.y;
int n_blocks = (n + block_size-1) / block_size;
square_array<<<n_blocks, block_dim>>>(a_device, b_device, n);
// end
// main3
CUDA_CHK(cudaMemcpy, (a_host, a_device, n*sizeof(float),
cudaMemcpyDeviceToHost));
for (int i = 0; i < n; i++)
printf("%.0f ", a_host[i]);
puts("\n");
free(a_host);
CUDA_CHK(cudaFree, (a_device));
}
// end
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ PyCUDA mailing list [email protected] http://tiker.net/mailman/listinfo/pycuda_tiker.net
