Yes, I assume that all the gpgpu in the list would complete in order, but it may not true. I have sent a new version.
> -----Original Message----- > From: Zhigang Gong [mailto:[email protected]] > Sent: Monday, October 27, 2014 14:33 > To: Yang, Rong R > Cc: [email protected] > Subject: Re: [Beignet] [PATCH] Refine the intel gpgpu delete. > > > When loop for the gpgpu list, it's better to check each gpgpu structure once > with non-blocking mode. Please see below diff. > > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index > 7f9b8ac..c4a1c9c 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -168,14 +168,17 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu) > struct intel_gpgpu_node *p, *node; > > PPTHREAD_MUTEX_LOCK(drv); > - while(drv->gpgpu_list) { > + p = drv->gpgpu_list; > + while(p) { > p = drv->gpgpu_list; > if(p->gpgpu->batch && p->gpgpu->batch->buffer && > !drm_intel_bo_busy(p->gpgpu->batch->buffer)) { > drv->gpgpu_list = p->next; > intel_gpgpu_delete_finished(p->gpgpu); > cl_free(p); > - } > + p = drv->gpgpu_list->next; > + } else > + p = p->next; > } > if (gpgpu == NULL) > return; > > > On Mon, Oct 27, 2014 at 02:01:26PM +0800, Yang Rong wrote: > > The intel gpgpu struct is destroyed when a new gpgpu struct needed. > > But in that time, the command batch relative with the destroyed gpgpu > > may have not finish, and the resource in gpgpu still used by gpgpu, can't be > destroyed. > > So, when delete a gpgpu, check the batch status, if have not complete, > > insert to list in intel driver, and delete all finished gpgpu in that list. > > > > Signed-off-by: Yang Rong <[email protected]> > > --- > > src/intel/intel_batchbuffer.c | 5 +- > > src/intel/intel_driver.c | 27 +--------- > > src/intel/intel_driver.h | 28 +++++++++++ > > src/intel/intel_gpgpu.c | 112 +++++++++++++++++++++------------------ > --- > > src/intel/intel_gpgpu.h | 64 ++++++++++++++++++++++++ > > 5 files changed, 154 insertions(+), 82 deletions(-) > > > > diff --git a/src/intel/intel_batchbuffer.c > > b/src/intel/intel_batchbuffer.c index ba6b05f..b054fc7 100644 > > --- a/src/intel/intel_batchbuffer.c > > +++ b/src/intel/intel_batchbuffer.c > > @@ -136,8 +136,9 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch) > > if (!is_locked) > > intel_driver_unlock_hardware(batch->intel); > > > > - // Release the buffer > > - intel_batchbuffer_terminate(batch); > > + // Can't release buffer here. gpgpu only can be delete only when the > batch buffer is complete. > > + // Remain the buffer for gpgpu delete check. > > + //intel_batchbuffer_terminate(batch); > > } > > > > LOCAL void > > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index > > 390e965..bb97220 100644 > > --- a/src/intel/intel_driver.c > > +++ b/src/intel/intel_driver.c > > @@ -79,31 +79,6 @@ > > #include "cl_device_id.h" > > #include "cl_platform_id.h" > > > > -#define SET_BLOCKED_SIGSET(DRIVER) do { \ > > - sigset_t bl_mask; \ > > - sigfillset(&bl_mask); \ > > - sigdelset(&bl_mask, SIGFPE); \ > > - sigdelset(&bl_mask, SIGILL); \ > > - sigdelset(&bl_mask, SIGSEGV); \ > > - sigdelset(&bl_mask, SIGBUS); \ > > - sigdelset(&bl_mask, SIGKILL); \ > > - pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ -} > > while (0) > > - > > -#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \ > > - pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \ > > -} while (0) > > - > > -#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \ > > - SET_BLOCKED_SIGSET(DRIVER); \ > > - pthread_mutex_lock(&(DRIVER)->ctxmutex); \ > > -} while (0) > > - > > -#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \ > > - pthread_mutex_unlock(&(DRIVER)->ctxmutex); \ > > - RESTORE_BLOCKED_SIGSET(DRIVER); \ > > -} while (0) > > - > > static void > > intel_driver_delete(intel_driver_t *driver) { @@ -423,11 +398,13 @@ > > intel_get_device_id(void) > > return intel_device_id; > > } > > > > +extern void intel_gpgpu_delete_all(intel_driver_t *driver); > > static void > > cl_intel_driver_delete(intel_driver_t *driver) { > > if (driver == NULL) > > return; > > + intel_gpgpu_delete_all(driver); > > intel_driver_context_destroy(driver); > > intel_driver_close(driver); > > intel_driver_terminate(driver); > > diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h index > > 107fdfc..8b82738 100644 > > --- a/src/intel/intel_driver.h > > +++ b/src/intel/intel_driver.h > > @@ -54,6 +54,7 @@ > > #include <drm.h> > > #include <i915_drm.h> > > #include <intel_bufmgr.h> > > +#include <intel/intel_gpgpu.h> > > > > #define CMD_MI (0x0 << 29) > > #define CMD_2D (0x2 << 29) > > @@ -73,6 +74,7 @@ > > #define BR13_8888 (0x3 << 24) > > > > struct dri_state; > > +struct intel_gpgpu_node; > > typedef struct _XDisplay Display; > > > > typedef struct intel_driver > > @@ -88,8 +90,34 @@ typedef struct intel_driver > > int need_close; > > Display *x11_display; > > struct dri_state *dri_ctx; > > + struct intel_gpgpu_node *gpgpu_list; > > } intel_driver_t; > > > > +#define SET_BLOCKED_SIGSET(DRIVER) do { \ > > + sigset_t bl_mask; \ > > + sigfillset(&bl_mask); \ > > + sigdelset(&bl_mask, SIGFPE); \ > > + sigdelset(&bl_mask, SIGILL); \ > > + sigdelset(&bl_mask, SIGSEGV); \ > > + sigdelset(&bl_mask, SIGBUS); \ > > + sigdelset(&bl_mask, SIGKILL); \ > > + pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ } > > +while (0) > > + > > +#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \ > > + pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \ > > +} while (0) > > + > > +#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \ > > + SET_BLOCKED_SIGSET(DRIVER); \ > > + pthread_mutex_lock(&(DRIVER)->ctxmutex); \ > > +} while (0) > > + > > +#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \ > > + pthread_mutex_unlock(&(DRIVER)->ctxmutex); \ > > + RESTORE_BLOCKED_SIGSET(DRIVER); \ > > +} while (0) > > + > > /* device control */ > > extern void intel_driver_lock_hardware(intel_driver_t*); > > extern void intel_driver_unlock_hardware(intel_driver_t*); > > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index > > 325d982..7f9b8ac 100644 > > --- a/src/intel/intel_gpgpu.c > > +++ b/src/intel/intel_gpgpu.c > > @@ -33,8 +33,6 @@ > > #include "intel/intel_gpgpu.h" > > #include "intel/intel_defines.h" > > #include "intel/intel_structs.h" > > -#include "intel/intel_batchbuffer.h" > > -#include "intel/intel_driver.h" > > #include "program.h" // for BTI_RESERVED_NUM > > > > #include "cl_alloc.h" > > @@ -69,58 +67,6 @@ typedef struct intel_event { > > > > #define MAX_IF_DESC 32 > > > > -/* We can bind only a limited number of buffers */ -enum { max_buf_n > > = 128 }; > > - > > -enum { max_img_n = 128}; > > - > > -enum {max_sampler_n = 16 }; > > - > > -/* Handle GPGPU state */ > > -struct intel_gpgpu > > -{ > > - void* ker_opaque; > > - size_t global_wk_sz[3]; > > - void* printf_info; > > - intel_driver_t *drv; > > - intel_batchbuffer_t *batch; > > - cl_gpgpu_kernel *ker; > > - drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the > > call */ > > - uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers > binded for the call */ > > - uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe > buffer */ > > - uint32_t binded_n; /* number of buffers binded */ > > - > > - unsigned long img_bitmap; /* image usage bitmap. */ > > - unsigned int img_index_base; /* base index for image surface.*/ > > - > > - unsigned long sampler_bitmap; /* sampler usage bitmap. */ > > - > > - struct { drm_intel_bo *bo; } stack_b; > > - struct { drm_intel_bo *bo; } perf_b; > > - struct { drm_intel_bo *bo; } scratch_b; > > - struct { drm_intel_bo *bo; } constant_b; > > - struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ > > - struct { drm_intel_bo *bo; > > - drm_intel_bo *ibo;} printf_b; /* the printf buf and index > > buf*/ > > - > > - struct { drm_intel_bo *bo; } aux_buf; > > - struct { > > - uint32_t surface_heap_offset; > > - uint32_t curbe_offset; > > - uint32_t idrt_offset; > > - uint32_t sampler_state_offset; > > - uint32_t sampler_border_color_state_offset; > > - } aux_offset; > > - > > - uint32_t per_thread_scratch; > > - struct { > > - uint32_t num_cs_entries; > > - uint32_t size_cs_entry; /* size of one entry in 512bit elements */ > > - } curb; > > - > > - uint32_t max_threads; /* max threads requested by the user */ > > -}; > > - > > typedef struct intel_gpgpu intel_gpgpu_t; > > > > typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t > > use_slm); @@ -172,7 +118,7 @@ static void > > intel_gpgpu_unref_batch_buf(void *buf) } > > > > static void > > -intel_gpgpu_delete(intel_gpgpu_t *gpgpu) > > +intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu) > > { > > if (gpgpu == NULL) > > return; > > @@ -198,6 +144,62 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu) > > cl_free(gpgpu); > > } > > > > +/* Destroy the all intel_gpgpu, no matter finish or not, when driver > > +destroy */ void intel_gpgpu_delete_all(intel_driver_t *drv) { > > + struct intel_gpgpu_node *p; > > + if(drv->gpgpu_list == NULL) > > + return; > > + > > + PPTHREAD_MUTEX_LOCK(drv); > > + while(drv->gpgpu_list) { > > + p = drv->gpgpu_list; > > + drv->gpgpu_list = p->next; > > + intel_gpgpu_delete_finished(p->gpgpu); > > + cl_free(p); > > + } > > + PPTHREAD_MUTEX_UNLOCK(drv); > > +} > > + > > +static void > > +intel_gpgpu_delete(intel_gpgpu_t *gpgpu) { > > + intel_driver_t *drv = gpgpu->drv; > > + struct intel_gpgpu_node *p, *node; > > + > > + PPTHREAD_MUTEX_LOCK(drv); > > + while(drv->gpgpu_list) { > > + p = drv->gpgpu_list; > > + if(p->gpgpu->batch && p->gpgpu->batch->buffer && > > + !drm_intel_bo_busy(p->gpgpu->batch->buffer)) { > > + drv->gpgpu_list = p->next; > > + intel_gpgpu_delete_finished(p->gpgpu); > > + cl_free(p); > > + } > > + } > > + if (gpgpu == NULL) > > + return; > > + > > + if(gpgpu->batch && gpgpu->batch->buffer && > > + !drm_intel_bo_busy(gpgpu->batch->buffer)) { > > + TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node)); > > + node->gpgpu = gpgpu; > > + node->next = NULL; > > + p = drv->gpgpu_list; > > + if(p == NULL) > > + drv->gpgpu_list= node; > > + else { > > + while(p->next) > > + p = p->next; > > + p->next = node; > > + } > > + } else > > + intel_gpgpu_delete_finished(gpgpu); > > + > > +error: > > + PPTHREAD_MUTEX_UNLOCK(drv); > > +} > > + > > static intel_gpgpu_t* > > intel_gpgpu_new(intel_driver_t *drv) > > { > > diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h index > > d593ac7..7c89ac7 100644 > > --- a/src/intel/intel_gpgpu.h > > +++ b/src/intel/intel_gpgpu.h > > @@ -23,10 +23,74 @@ > > > > #include "cl_utils.h" > > #include "cl_driver.h" > > +#include "intel/intel_batchbuffer.h" > > +#include "intel/intel_driver.h" > > > > #include <stdlib.h> > > #include <stdint.h> > > > > + > > +/* We can bind only a limited number of buffers */ enum { max_buf_n = > > +128 }; > > + > > +enum { max_img_n = 128}; > > + > > +enum {max_sampler_n = 16 }; > > + > > +struct intel_driver; > > +struct intel_batchbuffer; > > + > > +/* Handle GPGPU state */ > > +struct intel_gpgpu > > +{ > > + void* ker_opaque; > > + size_t global_wk_sz[3]; > > + void* printf_info; > > + struct intel_driver *drv; > > + struct intel_batchbuffer *batch; > > + cl_gpgpu_kernel *ker; > > + drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the > > +call */ > > + uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers > binded for the call */ > > + uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe > buffer */ > > + uint32_t binded_n; /* number of buffers binded */ > > + > > + unsigned long img_bitmap; /* image usage bitmap. */ > > + unsigned int img_index_base; /* base index for image surface.*/ > > + > > + unsigned long sampler_bitmap; /* sampler usage bitmap. */ > > + > > + struct { drm_intel_bo *bo; } stack_b; struct { drm_intel_bo *bo; } > > + perf_b; struct { drm_intel_bo *bo; } scratch_b; struct { > > + drm_intel_bo *bo; } constant_b; struct { drm_intel_bo *bo; } > > + time_stamp_b; /* time stamp buffer */ struct { drm_intel_bo *bo; > > + drm_intel_bo *ibo;} printf_b; /* the printf buf and index > > buf*/ > > + > > + struct { drm_intel_bo *bo; } aux_buf; struct { > > + uint32_t surface_heap_offset; > > + uint32_t curbe_offset; > > + uint32_t idrt_offset; > > + uint32_t sampler_state_offset; > > + uint32_t sampler_border_color_state_offset; > > + } aux_offset; > > + > > + uint32_t per_thread_scratch; > > + struct { > > + uint32_t num_cs_entries; > > + uint32_t size_cs_entry; /* size of one entry in 512bit elements > > + */ } curb; > > + > > + uint32_t max_threads; /* max threads requested by the user */ > > +}; > > + > > +struct intel_gpgpu_node { > > + struct intel_gpgpu *gpgpu; > > + struct intel_gpgpu_node *next; > > +}; > > + > > + > > /* Set the gpgpu related call backs */ extern void > > intel_set_gpgpu_callbacks(int device_id); > > > > -- > > 1.8.3.2 > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
