Hi! The latest spec adds a bunch of new functions, this patch attempts to implement them, except I gave up partly in omp_target_associate_ptr and completely in omp_target_disassociate_ptr for now.
As for the plugins, I think we'll want some plugin callback to support offloading device <-> offloading device memcpy (at least for the same devicep and target_id), and perhaps as optimization also some callbacks through which 2 or 3 dimensional omp_target_memcpy_rect in between host and device, or device and host, or device to same device can be optimized to avoid too many separate operations. For the associate/disassociate, I'm waiting for some clarifications (well, for omp_target_is_present too) and then supposedly it should wait until you are done with your enter/exit data changes. Shall I commit this now, or wait until it is clarified etc.? 2015-07-09 Jakub Jelinek <ja...@redhat.com> * omp.h.in (omp_get_initial_device, omp_target_alloc, omp_target_free, omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect, omp_target_associate_ptr, omp_target_disassociate_ptr): New prototypes. * omp_lib.f90.in (omp_get_initial_device): New interface. * omp_lib.h.in (omp_get_initial_device): New extern. * libgomp.map (OMP_4.1): Add omp_get_initial_device, omp_get_initial_device_, omp_target_alloc, omp_target_free, omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect, omp_target_associate_ptr and omp_target_disassociate_ptr symbols. * env.c (omp_get_initial_device): New function. Add ialias. * fortran.c (omp_get_initial_device): Add iredirect. (omp_get_initial_device_): New function. * target.c (omp_target_alloc, omp_target_free, omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect_worker, omp_target_memcpy_rect, omp_target_associate_ptr, omp_target_disassociate_ptr): New functions. --- libgomp/omp.h.in.jj 2015-07-09 09:31:17.183203026 +0200 +++ libgomp/omp.h.in 2015-07-09 11:05:02.761819081 +0200 @@ -139,8 +139,25 @@ extern int omp_get_num_teams (void) __GO extern int omp_get_team_num (void) __GOMP_NOTHROW; extern int omp_is_initial_device (void) __GOMP_NOTHROW; +extern int omp_get_initial_device (void) __GOMP_NOTHROW; extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; +extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; +extern void omp_target_free (void *, int) __GOMP_NOTHROW; +extern int omp_target_is_present (void *, __SIZE_TYPE__, int) __GOMP_NOTHROW; +extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, + __SIZE_TYPE__, int, int) __GOMP_NOTHROW; +extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, int, int) + __GOMP_NOTHROW; +extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, + __SIZE_TYPE__, int) __GOMP_NOTHROW; +extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; + #ifdef __cplusplus } #endif --- libgomp/omp_lib.f90.in.jj 2015-07-09 09:31:17.182203041 +0200 +++ libgomp/omp_lib.f90.in 2015-07-09 11:05:02.762819066 +0200 @@ -422,6 +422,12 @@ end interface interface + function omp_get_initial_device () + integer (4) :: omp_get_initial_device + end function omp_get_initial_device + end interface + + interface function omp_get_max_task_priority () integer (4) :: omp_get_max_task_priority end function omp_get_max_task_priority --- libgomp/omp_lib.h.in.jj 2015-07-09 09:31:17.170203220 +0200 +++ libgomp/omp_lib.h.in 2015-07-09 11:05:02.761819081 +0200 @@ -121,6 +121,8 @@ external omp_is_initial_device logical(4) omp_is_initial_device + external omp_get_initial_device + integer(4) omp_get_initial_device external omp_get_max_task_priority integer(4) omp_get_max_task_priority --- libgomp/libgomp.map.jj 2015-06-24 12:11:40.000000000 +0200 +++ libgomp/libgomp.map 2015-07-09 12:42:58.387515811 +0200 @@ -153,6 +153,15 @@ OMP_4.1 { omp_get_partition_place_nums; omp_get_partition_place_nums_; omp_get_partition_place_nums_8_; + omp_get_initial_device; + omp_get_initial_device_; + omp_target_alloc; + omp_target_free; + omp_target_is_present; + omp_target_memcpy; + omp_target_memcpy_rect; + omp_target_associate_ptr; + omp_target_disassociate_ptr; } OMP_4.0; GOMP_1.0 { --- libgomp/env.c.jj 2015-06-24 12:11:40.000000000 +0200 +++ libgomp/env.c 2015-07-09 15:51:28.659893446 +0200 @@ -29,6 +29,7 @@ #include "libgomp.h" #include "libgomp_f.h" #include "oacc-int.h" +#include "gomp-constants.h" #include <ctype.h> #include <stdlib.h> #include <stdio.h> @@ -1461,6 +1462,12 @@ omp_is_initial_device (void) } int +omp_get_initial_device (void) +{ + return GOMP_DEVICE_HOST_FALLBACK; +} + +int omp_get_num_places (void) { return gomp_places_list_len; @@ -1526,6 +1533,7 @@ ialias (omp_get_num_devices) ialias (omp_get_num_teams) ialias (omp_get_team_num) ialias (omp_is_initial_device) +ialias (omp_get_initial_device) ialias (omp_get_max_task_priority) ialias (omp_get_num_places) ialias (omp_get_place_num) --- libgomp/fortran.c.jj 2015-06-24 12:11:40.000000000 +0200 +++ libgomp/fortran.c 2015-07-09 12:33:13.544183419 +0200 @@ -80,6 +80,7 @@ ialias_redirect (omp_get_num_devices) ialias_redirect (omp_get_num_teams) ialias_redirect (omp_get_team_num) ialias_redirect (omp_is_initial_device) +ialias_redirect (omp_get_initial_device) ialias_redirect (omp_get_max_task_priority) #endif @@ -565,6 +566,12 @@ omp_is_initial_device_ (void) } int32_t +omp_get_initial_device_ (void) +{ + return omp_get_initial_device (); +} + +int32_t omp_get_max_task_priority_ (void) { return omp_get_max_task_priority (); --- libgomp/target.c.jj 2015-06-26 15:05:41.000000000 +0200 +++ libgomp/target.c 2015-07-09 15:53:30.474108680 +0200 @@ -38,6 +38,7 @@ #endif #include <string.h> #include <assert.h> +#include <errno.h> #ifdef PLUGIN_SUPPORT #include <dlfcn.h> @@ -1175,6 +1176,343 @@ GOMP_teams (unsigned int num_teams, unsi (void) num_teams; } +void * +omp_target_alloc (size_t size, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return malloc (size); + + if (device_num < 0) + return NULL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return NULL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return malloc (size); + + gomp_mutex_lock (&devicep->lock); + void *ret = devicep->alloc_func (devicep->target_id, size); + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +void +omp_target_free (void *device_ptr, int device_num) +{ + if (device_ptr == NULL) + return; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + { + free (device_ptr); + return; + } + + if (device_num < 0) + return; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + { + free (device_ptr); + return; + } + + gomp_mutex_lock (&devicep->lock); + devicep->free_func (devicep->target_id, device_ptr); + gomp_mutex_unlock (&devicep->lock); +} + +int +omp_target_is_present (void *ptr, size_t offset, int device_num) +{ + if (ptr == NULL) + return offset == 0; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return 1; + + if (device_num < 0) + return 0; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return 0; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return 1; + + gomp_mutex_lock (&devicep->lock); + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = (uintptr_t) ptr + offset; + cur_node.host_end = cur_node.host_start + 1; + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + /* Could be possibly zero size array section. */ + cur_node.host_end--; + n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + cur_node.host_start--; + n = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_start++; + } + } + int ret = n != NULL; + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, + size_t src_offset, int dst_device, int src_device) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (dst_device != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device < 0) + return EINVAL; + + src_devicep = resolve_device (src_device); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + if (src_devicep == NULL && dst_devicep == NULL) + { + memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); + return 0; + } + if (src_devicep == NULL) + { + gomp_mutex_lock (&dst_devicep->lock); + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&dst_devicep->lock); + return 0; + } + if (dst_devicep == NULL) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + /* FIXME: Support device-to-device somehow? */ + return EINVAL; +} + +static int +omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + struct gomp_device_descr *dst_devicep, + struct gomp_device_descr *src_devicep) +{ + size_t dst_slice = element_size; + size_t src_slice = element_size; + size_t j, dst_off, src_off, length; + int i, ret; + + if (num_dims == 1) + { + if (__builtin_mul_overflow (element_size, volume[0], &length) + || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) + return EINVAL; + if (dst_devicep == NULL && src_devicep == NULL) + memcpy ((char *) dst + dst_off, (char *) src + src_off, length); + else if (src_devicep == NULL) + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (dst_devicep == NULL) + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else + return EINVAL; + return 0; + } + + /* FIXME: it would be nice to have some plugin function to handle + num_dims == 2 and num_dims == 3 more efficiently. Larger ones can + be handled in the generic recursion below, and for host-host it + should be used even for any num_dims >= 2. */ + + for (i = 1; i < num_dims; i++) + if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) + || __builtin_mul_overflow (src_slice, dst_dimensions[i], &src_slice)) + return EINVAL; + if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) + return EINVAL; + for (j = 0; j < volume[0]; j++) + { + ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, + (char *) src + src_off, + element_size, num_dims - 1, + volume + 1, dst_offsets + 1, + src_offsets + 1, dst_dimensions + 1, + src_dimensions + 1, dst_devicep, + src_devicep); + if (ret) + return ret; + dst_off += dst_slice; + src_off += src_slice; + } + return 0; +} + +int +omp_target_memcpy_rect (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (!dst && !src) + return INT_MAX; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + + /* FIXME: Support device-to-device somehow? */ + if (src_devicep != NULL && dst_devicep != NULL) + return EINVAL; + + if (src_devicep) + gomp_mutex_lock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_lock (&dst_devicep->lock); + int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, + volume, dst_offsets, src_offsets, + dst_dimensions, src_dimensions, + dst_devicep, src_devicep); + if (src_devicep) + gomp_mutex_unlock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_unlock (&dst_devicep->lock); + return ret; +} + +int +omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, + size_t device_offset, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) host_ptr; + cur_node.host_end = cur_node.host_start + size; + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n) + { + if (n->tgt->tgt_start + n->tgt_offset + == (uintptr_t) device_ptr + device_offset + && n->host_start <= cur_node.host_start + && n->host_end >= cur_node.host_end) + ret = 0; + } + else + { + /* FIXME: finish this. */ + } + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_disassociate_ptr (void *ptr, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + /* FIXME: finish this. */ + return EINVAL; +} + #ifdef PLUGIN_SUPPORT /* This function tries to load a plugin for DEVICE. Name of plugin is passed Jakub