On Thu, Jul 07, 2022 at 11:34:32AM +0100, Andrew Stubbs wrote: > libgomp/ChangeLog: > > * allocator.c (MEMSPACE_ALLOC): New macro. > (MEMSPACE_CALLOC): New macro. > (MEMSPACE_REALLOC): New macro. > (MEMSPACE_FREE): New macro. > (dynamic_smem_size): New constants. > (omp_alloc): Use MEMSPACE_ALLOC. > Implement fall-backs for predefined allocators. > (omp_free): Use MEMSPACE_FREE. > (omp_calloc): Use MEMSPACE_CALLOC. > Implement fall-backs for predefined allocators. > (omp_realloc): Use MEMSPACE_REALLOC and MEMSPACE_ALLOC.. > Implement fall-backs for predefined allocators. > * config/nvptx/team.c (__nvptx_lowlat_heap_root): New variable. > (__nvptx_lowlat_pool): New asm varaible. > (gomp_nvptx_main): Initialize the low-latency heap. > * plugin/plugin-nvptx.c (lowlat_pool_size): New variable. > (GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar. > (GOMP_OFFLOAD_run): Apply lowlat_pool_size. > * config/nvptx/allocator.c: New file. > * testsuite/libgomp.c/allocators-1.c: New test. > * testsuite/libgomp.c/allocators-2.c: New test. > * testsuite/libgomp.c/allocators-3.c: New test. > * testsuite/libgomp.c/allocators-4.c: New test. > * testsuite/libgomp.c/allocators-5.c: New test. > * testsuite/libgomp.c/allocators-6.c: New test. > > co-authored-by: Kwok Cheung Yeung <k...@codesourcery.com>
> +/* These macros may be overridden in config/<target>/allocator.c. */ > +#ifndef MEMSPACE_ALLOC > +#define MEMSPACE_ALLOC(MEMSPACE, SIZE) malloc (SIZE) > +#endif Rather than uglifying the sources with __attribute__((unused)) on the memspace variables, wouldn't it be better to always use MEMSPACE? So, #define MEMSPACE_ALLOC(MEMSPACE, SIZE) malloc (((MEMSPACE), (SIZE))) or so (similarly other macros)? > +#ifndef MEMSPACE_CALLOC > +#define MEMSPACE_CALLOC(MEMSPACE, SIZE) calloc (1, SIZE) > +#endif > +#ifndef MEMSPACE_REALLOC > +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) realloc (ADDR, SIZE) > +#endif > +#ifndef MEMSPACE_FREE > +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) free (ADDR) > +#endif > +/* Map the predefined allocators to the correct memory space. > + The index to this table is the omp_allocator_handle_t enum value. */ > +static const omp_memspace_handle_t predefined_alloc_mapping[] = { > + omp_default_mem_space, /* omp_null_allocator. */ > + omp_default_mem_space, /* omp_default_mem_alloc. */ > + omp_large_cap_mem_space, /* omp_large_cap_mem_alloc. */ > + omp_default_mem_space, /* omp_const_mem_alloc. */ Shouldn't this be omp_const_mem_space ? That is what the standard says and you need to handle it in MEMSPACE_ALLOC etc. anyway because omp_init_allocator could be done with that memspace. > + omp_high_bw_mem_space, /* omp_high_bw_mem_alloc. */ > + omp_low_lat_mem_space, /* omp_low_lat_mem_alloc. */ > + omp_low_lat_mem_space, /* omp_cgroup_mem_alloc. */ > + omp_low_lat_mem_space, /* omp_pteam_mem_alloc. */ > + omp_low_lat_mem_space, /* omp_thread_mem_alloc. */ The above 3 are implementation defined, so we can choose whatever we want. > @@ -496,35 +530,38 @@ retry: > return ret; > > fail: > - if (allocator_data) > + int fallback = (allocator_data > + ? allocator_data->fallback > + : allocator == omp_default_mem_alloc > + ? omp_atv_null_fb > + : omp_atv_default_mem_fb); A label can be only followed by variable declaration in C2X (and in C++), I think we should keep libgomp in C99 for the time being. So, it should be fail:; > + || (allocator_data > + && allocator_data->pool_size < ~(uintptr_t) 0) > + || !allocator_data) This would be better written as: || allocator_data == NULL || allocator_data->pool_size < ~(uintptr_t) 0) > @@ -766,35 +816,38 @@ retry: > return ret; > > fail: > - if (allocator_data) > + int fallback = (allocator_data > + ? allocator_data->fallback > + : allocator == omp_default_mem_alloc > + ? omp_atv_null_fb > + : omp_atv_default_mem_fb); See above. > + || (allocator_data > + && allocator_data->pool_size < ~(uintptr_t) 0) > + || !allocator_data) And again. > @@ -1073,35 +1139,38 @@ retry: > return ret; > > fail: > - if (allocator_data) > + int fallback = (allocator_data And again. > + || (allocator_data > + && allocator_data->pool_size < ~(uintptr_t) 0) > + || !allocator_data) And again. > --- /dev/null > +++ b/libgomp/config/nvptx/allocator.c > @@ -0,0 +1,370 @@ > +/* Copyright (C) 2021 Free Software Foundation, Inc. -2022 > +static void * > +nvptx_memspace_alloc (omp_memspace_handle_t memspace, size_t size) > +{ > + if (memspace == omp_low_lat_mem_space) > + { > + char *shared_pool; > + asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r"(shared_pool)); Space between " and ( > + uint32_t *chunkptr = (uint32_t*)(shared_pool + chunk.desc.offset); Space between ) and ( and before * > + chunkptr = (uint32_t*)(shared_pool + chunk.desc.offset); Ditto. > + uint32_t *stillfreeptr = (uint32_t*)(shared_pool > + + stillfree.desc.offset); And again. > + for (unsigned i = 0; i < (unsigned)size/8; i++) Space in between ) and size and 2 spaces around / > + result[i] = 0; > + > + return result; > + } > + else > + return calloc (1, size); > +} > + > +static void > +nvptx_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size) > +{ > + if (memspace == omp_low_lat_mem_space) > + { > + char *shared_pool; > + asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r"(shared_pool)); Formatting. > + uint32_t *chunkptr = (uint32_t*)(shared_pool + chunk.desc.offset); Again. > + heapdesc onward_chain = {chunkptr[0]}; > + while (chunk.desc.size != 0 && addr > (void*)chunkptr) Again (won't enumerate anymore). > --- a/libgomp/plugin/plugin-nvptx.c > +++ b/libgomp/plugin/plugin-nvptx.c > @@ -334,6 +334,11 @@ struct ptx_device > > static struct ptx_device **ptx_devices; > > +/* OpenMP kernels reserve a small amount of ".shared" space for use by > + omp_alloc. The size is configured using GOMP_NVPTX_LOWLAT_POOL, but the > + default is set here. */ > +static unsigned lowlat_pool_size = 8*1024; Spaces around * > +void > +test (int n, omp_allocator_handle_t allocator) > +{ > + #pragma omp target map(to:n) map(to:allocator) > + { > + int *a; > + a = (int *) omp_alloc(n*sizeof(int), allocator); Space before ( (twice) and around *. > + > + omp_free(a, allocator); Space before ( > + a = (int **) omp_alloc(n*sizeof(int*), allocator); Again plus space before *) > + a[i] = omp_alloc(sizeof(int)*10, allocator); Again. > + omp_free(a[i], allocator); Again. > + > +return 0; 2 spaces before return 0; > +} > + Jakub