On Thu, Jul 07, 2022 at 11:34:32AM +0100, Andrew Stubbs wrote:
> libgomp/ChangeLog:
>
> * allocator.c (MEMSPACE_ALLOC): New macro.
> (MEMSPACE_CALLOC): New macro.
> (MEMSPACE_REALLOC): New macro.
> (MEMSPACE_FREE): New macro.
> (dynamic_smem_size): New constants.
> (omp_alloc): Use MEMSPACE_ALLOC.
> Implement fall-backs for predefined allocators.
> (omp_free): Use MEMSPACE_FREE.
> (omp_calloc): Use MEMSPACE_CALLOC.
> Implement fall-backs for predefined allocators.
> (omp_realloc): Use MEMSPACE_REALLOC and MEMSPACE_ALLOC..
> Implement fall-backs for predefined allocators.
> * config/nvptx/team.c (__nvptx_lowlat_heap_root): New variable.
> (__nvptx_lowlat_pool): New asm varaible.
> (gomp_nvptx_main): Initialize the low-latency heap.
> * plugin/plugin-nvptx.c (lowlat_pool_size): New variable.
> (GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar.
> (GOMP_OFFLOAD_run): Apply lowlat_pool_size.
> * config/nvptx/allocator.c: New file.
> * testsuite/libgomp.c/allocators-1.c: New test.
> * testsuite/libgomp.c/allocators-2.c: New test.
> * testsuite/libgomp.c/allocators-3.c: New test.
> * testsuite/libgomp.c/allocators-4.c: New test.
> * testsuite/libgomp.c/allocators-5.c: New test.
> * testsuite/libgomp.c/allocators-6.c: New test.
>
> co-authored-by: Kwok Cheung Yeung <[email protected]>
> +/* These macros may be overridden in config/<target>/allocator.c. */
> +#ifndef MEMSPACE_ALLOC
> +#define MEMSPACE_ALLOC(MEMSPACE, SIZE) malloc (SIZE)
> +#endif
Rather than uglifying the sources with __attribute__((unused)) on the
memspace variables, wouldn't it be better to always use MEMSPACE?
So,
#define MEMSPACE_ALLOC(MEMSPACE, SIZE) malloc (((MEMSPACE), (SIZE)))
or so (similarly other macros)?
> +#ifndef MEMSPACE_CALLOC
> +#define MEMSPACE_CALLOC(MEMSPACE, SIZE) calloc (1, SIZE)
> +#endif
> +#ifndef MEMSPACE_REALLOC
> +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) realloc (ADDR, SIZE)
> +#endif
> +#ifndef MEMSPACE_FREE
> +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) free (ADDR)
> +#endif
> +/* Map the predefined allocators to the correct memory space.
> + The index to this table is the omp_allocator_handle_t enum value. */
> +static const omp_memspace_handle_t predefined_alloc_mapping[] = {
> + omp_default_mem_space, /* omp_null_allocator. */
> + omp_default_mem_space, /* omp_default_mem_alloc. */
> + omp_large_cap_mem_space, /* omp_large_cap_mem_alloc. */
> + omp_default_mem_space, /* omp_const_mem_alloc. */
Shouldn't this be omp_const_mem_space ?
That is what the standard says and you need to handle it in MEMSPACE_ALLOC
etc. anyway because omp_init_allocator could be done with that memspace.
> + omp_high_bw_mem_space, /* omp_high_bw_mem_alloc. */
> + omp_low_lat_mem_space, /* omp_low_lat_mem_alloc. */
> + omp_low_lat_mem_space, /* omp_cgroup_mem_alloc. */
> + omp_low_lat_mem_space, /* omp_pteam_mem_alloc. */
> + omp_low_lat_mem_space, /* omp_thread_mem_alloc. */
The above 3 are implementation defined, so we can choose whatever we want.
> @@ -496,35 +530,38 @@ retry:
> return ret;
>
> fail:
> - if (allocator_data)
> + int fallback = (allocator_data
> + ? allocator_data->fallback
> + : allocator == omp_default_mem_alloc
> + ? omp_atv_null_fb
> + : omp_atv_default_mem_fb);
A label can be only followed by variable declaration in C2X (and in C++),
I think we should keep libgomp in C99 for the time being.
So, it should be
fail:;
> + || (allocator_data
> + && allocator_data->pool_size < ~(uintptr_t) 0)
> + || !allocator_data)
This would be better written as:
|| allocator_data == NULL
|| allocator_data->pool_size < ~(uintptr_t) 0)
> @@ -766,35 +816,38 @@ retry:
> return ret;
>
> fail:
> - if (allocator_data)
> + int fallback = (allocator_data
> + ? allocator_data->fallback
> + : allocator == omp_default_mem_alloc
> + ? omp_atv_null_fb
> + : omp_atv_default_mem_fb);
See above.
> + || (allocator_data
> + && allocator_data->pool_size < ~(uintptr_t) 0)
> + || !allocator_data)
And again.
> @@ -1073,35 +1139,38 @@ retry:
> return ret;
>
> fail:
> - if (allocator_data)
> + int fallback = (allocator_data
And again.
> + || (allocator_data
> + && allocator_data->pool_size < ~(uintptr_t) 0)
> + || !allocator_data)
And again.
> --- /dev/null
> +++ b/libgomp/config/nvptx/allocator.c
> @@ -0,0 +1,370 @@
> +/* Copyright (C) 2021 Free Software Foundation, Inc.
-2022
> +static void *
> +nvptx_memspace_alloc (omp_memspace_handle_t memspace, size_t size)
> +{
> + if (memspace == omp_low_lat_mem_space)
> + {
> + char *shared_pool;
> + asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r"(shared_pool));
Space between " and (
> + uint32_t *chunkptr = (uint32_t*)(shared_pool + chunk.desc.offset);
Space between ) and ( and before *
> + chunkptr = (uint32_t*)(shared_pool + chunk.desc.offset);
Ditto.
> + uint32_t *stillfreeptr = (uint32_t*)(shared_pool
> + + stillfree.desc.offset);
And again.
> + for (unsigned i = 0; i < (unsigned)size/8; i++)
Space in between ) and size and 2 spaces around /
> + result[i] = 0;
> +
> + return result;
> + }
> + else
> + return calloc (1, size);
> +}
> +
> +static void
> +nvptx_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size)
> +{
> + if (memspace == omp_low_lat_mem_space)
> + {
> + char *shared_pool;
> + asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r"(shared_pool));
Formatting.
> + uint32_t *chunkptr = (uint32_t*)(shared_pool + chunk.desc.offset);
Again.
> + heapdesc onward_chain = {chunkptr[0]};
> + while (chunk.desc.size != 0 && addr > (void*)chunkptr)
Again (won't enumerate anymore).
> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c
> @@ -334,6 +334,11 @@ struct ptx_device
>
> static struct ptx_device **ptx_devices;
>
> +/* OpenMP kernels reserve a small amount of ".shared" space for use by
> + omp_alloc. The size is configured using GOMP_NVPTX_LOWLAT_POOL, but the
> + default is set here. */
> +static unsigned lowlat_pool_size = 8*1024;
Spaces around *
> +void
> +test (int n, omp_allocator_handle_t allocator)
> +{
> + #pragma omp target map(to:n) map(to:allocator)
> + {
> + int *a;
> + a = (int *) omp_alloc(n*sizeof(int), allocator);
Space before ( (twice) and around *.
> +
> + omp_free(a, allocator);
Space before (
> + a = (int **) omp_alloc(n*sizeof(int*), allocator);
Again plus space before *)
> + a[i] = omp_alloc(sizeof(int)*10, allocator);
Again.
> + omp_free(a[i], allocator);
Again.
> +
> +return 0;
2 spaces before return 0;
> +}
> +
Jakub