LGTM, just pushed, thanks.
On Mon, Oct 27, 2014 at 04:16:51PM +0800, Yang Rong wrote: > The intel gpgpu struct is destroyed when a new gpgpu struct needed. But in > that time, > the command batch relative with the destroyed gpgpu may have not finish, and > the resource > in gpgpu still used by gpgpu, can't be destroyed. > So, when delete a gpgpu, check the batch status, if have not complete, insert > to list in intel driver, > and delete all finished gpgpu in that list. > > V2: V1 assume all the gpgpu list would complete in order, but it is not true. > The insert order in is the > gpgpu delete order, not the flush command batch order. So visit and > delete all complete gpgpu in the list. > > Signed-off-by: Yang Rong <[email protected]> > --- > src/intel/intel_batchbuffer.c | 5 +- > src/intel/intel_driver.c | 27 +-------- > src/intel/intel_driver.h | 28 ++++++++++ > src/intel/intel_gpgpu.c | 127 > ++++++++++++++++++++++++------------------ > src/intel/intel_gpgpu.h | 64 +++++++++++++++++++++ > 5 files changed, 169 insertions(+), 82 deletions(-) > > diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c > index ba6b05f..b054fc7 100644 > --- a/src/intel/intel_batchbuffer.c > +++ b/src/intel/intel_batchbuffer.c > @@ -136,8 +136,9 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch) > if (!is_locked) > intel_driver_unlock_hardware(batch->intel); > > - // Release the buffer > - intel_batchbuffer_terminate(batch); > + // Can't release buffer here. gpgpu only can be delete only when the batch > buffer is complete. > + // Remain the buffer for gpgpu delete check. > + //intel_batchbuffer_terminate(batch); > } > > LOCAL void > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c > index 390e965..bb97220 100644 > --- a/src/intel/intel_driver.c > +++ b/src/intel/intel_driver.c > @@ -79,31 +79,6 @@ > #include "cl_device_id.h" > #include "cl_platform_id.h" > > -#define SET_BLOCKED_SIGSET(DRIVER) do { \ > - sigset_t bl_mask; \ > - sigfillset(&bl_mask); \ > - sigdelset(&bl_mask, SIGFPE); \ > - sigdelset(&bl_mask, SIGILL); \ > - sigdelset(&bl_mask, SIGSEGV); \ > - sigdelset(&bl_mask, SIGBUS); \ > - sigdelset(&bl_mask, SIGKILL); \ > - pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ > -} while (0) > - > -#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \ > - pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \ > -} while (0) > - > -#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \ > - SET_BLOCKED_SIGSET(DRIVER); \ > - pthread_mutex_lock(&(DRIVER)->ctxmutex); \ > -} while (0) > - > -#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \ > - pthread_mutex_unlock(&(DRIVER)->ctxmutex); \ > - RESTORE_BLOCKED_SIGSET(DRIVER); \ > -} while (0) > - > static void > intel_driver_delete(intel_driver_t *driver) > { > @@ -423,11 +398,13 @@ intel_get_device_id(void) > return intel_device_id; > } > > +extern void intel_gpgpu_delete_all(intel_driver_t *driver); > static void > cl_intel_driver_delete(intel_driver_t *driver) > { > if (driver == NULL) > return; > + intel_gpgpu_delete_all(driver); > intel_driver_context_destroy(driver); > intel_driver_close(driver); > intel_driver_terminate(driver); > diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h > index 107fdfc..8b82738 100644 > --- a/src/intel/intel_driver.h > +++ b/src/intel/intel_driver.h > @@ -54,6 +54,7 @@ > #include <drm.h> > #include <i915_drm.h> > #include <intel_bufmgr.h> > +#include <intel/intel_gpgpu.h> > > #define CMD_MI (0x0 << 29) > #define CMD_2D (0x2 << 29) > @@ -73,6 +74,7 @@ > #define BR13_8888 (0x3 << 24) > > struct dri_state; > +struct intel_gpgpu_node; > typedef struct _XDisplay Display; > > typedef struct intel_driver > @@ -88,8 +90,34 @@ typedef struct intel_driver > int need_close; > Display *x11_display; > struct dri_state *dri_ctx; > + struct intel_gpgpu_node *gpgpu_list; > } intel_driver_t; > > +#define SET_BLOCKED_SIGSET(DRIVER) do { \ > + sigset_t bl_mask; \ > + sigfillset(&bl_mask); \ > + sigdelset(&bl_mask, SIGFPE); \ > + sigdelset(&bl_mask, SIGILL); \ > + sigdelset(&bl_mask, SIGSEGV); \ > + sigdelset(&bl_mask, SIGBUS); \ > + sigdelset(&bl_mask, SIGKILL); \ > + pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ > +} while (0) > + > +#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \ > + pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \ > +} while (0) > + > +#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \ > + SET_BLOCKED_SIGSET(DRIVER); \ > + pthread_mutex_lock(&(DRIVER)->ctxmutex); \ > +} while (0) > + > +#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \ > + pthread_mutex_unlock(&(DRIVER)->ctxmutex); \ > + RESTORE_BLOCKED_SIGSET(DRIVER); \ > +} while (0) > + > /* device control */ > extern void intel_driver_lock_hardware(intel_driver_t*); > extern void intel_driver_unlock_hardware(intel_driver_t*); > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c > index 325d982..603def6 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -33,8 +33,6 @@ > #include "intel/intel_gpgpu.h" > #include "intel/intel_defines.h" > #include "intel/intel_structs.h" > -#include "intel/intel_batchbuffer.h" > -#include "intel/intel_driver.h" > #include "program.h" // for BTI_RESERVED_NUM > > #include "cl_alloc.h" > @@ -69,58 +67,6 @@ typedef struct intel_event { > > #define MAX_IF_DESC 32 > > -/* We can bind only a limited number of buffers */ > -enum { max_buf_n = 128 }; > - > -enum { max_img_n = 128}; > - > -enum {max_sampler_n = 16 }; > - > -/* Handle GPGPU state */ > -struct intel_gpgpu > -{ > - void* ker_opaque; > - size_t global_wk_sz[3]; > - void* printf_info; > - intel_driver_t *drv; > - intel_batchbuffer_t *batch; > - cl_gpgpu_kernel *ker; > - drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */ > - uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers > binded for the call */ > - uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer > */ > - uint32_t binded_n; /* number of buffers binded */ > - > - unsigned long img_bitmap; /* image usage bitmap. */ > - unsigned int img_index_base; /* base index for image surface.*/ > - > - unsigned long sampler_bitmap; /* sampler usage bitmap. */ > - > - struct { drm_intel_bo *bo; } stack_b; > - struct { drm_intel_bo *bo; } perf_b; > - struct { drm_intel_bo *bo; } scratch_b; > - struct { drm_intel_bo *bo; } constant_b; > - struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ > - struct { drm_intel_bo *bo; > - drm_intel_bo *ibo;} printf_b; /* the printf buf and index > buf*/ > - > - struct { drm_intel_bo *bo; } aux_buf; > - struct { > - uint32_t surface_heap_offset; > - uint32_t curbe_offset; > - uint32_t idrt_offset; > - uint32_t sampler_state_offset; > - uint32_t sampler_border_color_state_offset; > - } aux_offset; > - > - uint32_t per_thread_scratch; > - struct { > - uint32_t num_cs_entries; > - uint32_t size_cs_entry; /* size of one entry in 512bit elements */ > - } curb; > - > - uint32_t max_threads; /* max threads requested by the user */ > -}; > - > typedef struct intel_gpgpu intel_gpgpu_t; > > typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm); > @@ -172,7 +118,7 @@ static void intel_gpgpu_unref_batch_buf(void *buf) > } > > static void > -intel_gpgpu_delete(intel_gpgpu_t *gpgpu) > +intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu) > { > if (gpgpu == NULL) > return; > @@ -198,6 +144,77 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu) > cl_free(gpgpu); > } > > +/* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy > */ > +void intel_gpgpu_delete_all(intel_driver_t *drv) > +{ > + struct intel_gpgpu_node *p; > + if(drv->gpgpu_list == NULL) > + return; > + > + PPTHREAD_MUTEX_LOCK(drv); > + while(drv->gpgpu_list) { > + p = drv->gpgpu_list; > + drv->gpgpu_list = p->next; > + intel_gpgpu_delete_finished(p->gpgpu); > + cl_free(p); > + } > + PPTHREAD_MUTEX_UNLOCK(drv); > +} > + > +static void > +intel_gpgpu_delete(intel_gpgpu_t *gpgpu) > +{ > + intel_driver_t *drv = gpgpu->drv; > + struct intel_gpgpu_node *p, *node; > + > + PPTHREAD_MUTEX_LOCK(drv); > + p = drv->gpgpu_list; > + if(p) { > + node = p->next; > + while(node) { > + if(node->gpgpu->batch && node->gpgpu->batch->buffer && > + !drm_intel_bo_busy(node->gpgpu->batch->buffer)) { > + p->next = node->next; > + intel_gpgpu_delete_finished(node->gpgpu); > + cl_free(node); > + node = p->next; > + } else { > + p = node; > + node = node->next; > + } > + } > + node = drv->gpgpu_list; > + if(node->gpgpu->batch && node->gpgpu->batch->buffer && > + !drm_intel_bo_busy(node->gpgpu->batch->buffer)) { > + drv->gpgpu_list = drv->gpgpu_list->next; > + intel_gpgpu_delete_finished(node->gpgpu); > + cl_free(node); > + node = p->next; > + } > + } > + if (gpgpu == NULL) > + return; > + > + if(gpgpu->batch && gpgpu->batch->buffer && > + !drm_intel_bo_busy(gpgpu->batch->buffer)) { > + TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node)); > + node->gpgpu = gpgpu; > + node->next = NULL; > + p = drv->gpgpu_list; > + if(p == NULL) > + drv->gpgpu_list= node; > + else { > + while(p->next) > + p = p->next; > + p->next = node; > + } > + } else > + intel_gpgpu_delete_finished(gpgpu); > + > +error: > + PPTHREAD_MUTEX_UNLOCK(drv); > +} > + > static intel_gpgpu_t* > intel_gpgpu_new(intel_driver_t *drv) > { > diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h > index d593ac7..7c89ac7 100644 > --- a/src/intel/intel_gpgpu.h > +++ b/src/intel/intel_gpgpu.h > @@ -23,10 +23,74 @@ > > #include "cl_utils.h" > #include "cl_driver.h" > +#include "intel/intel_batchbuffer.h" > +#include "intel/intel_driver.h" > > #include <stdlib.h> > #include <stdint.h> > > + > +/* We can bind only a limited number of buffers */ > +enum { max_buf_n = 128 }; > + > +enum { max_img_n = 128}; > + > +enum {max_sampler_n = 16 }; > + > +struct intel_driver; > +struct intel_batchbuffer; > + > +/* Handle GPGPU state */ > +struct intel_gpgpu > +{ > + void* ker_opaque; > + size_t global_wk_sz[3]; > + void* printf_info; > + struct intel_driver *drv; > + struct intel_batchbuffer *batch; > + cl_gpgpu_kernel *ker; > + drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */ > + uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers > binded for the call */ > + uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer > */ > + uint32_t binded_n; /* number of buffers binded */ > + > + unsigned long img_bitmap; /* image usage bitmap. */ > + unsigned int img_index_base; /* base index for image surface.*/ > + > + unsigned long sampler_bitmap; /* sampler usage bitmap. */ > + > + struct { drm_intel_bo *bo; } stack_b; > + struct { drm_intel_bo *bo; } perf_b; > + struct { drm_intel_bo *bo; } scratch_b; > + struct { drm_intel_bo *bo; } constant_b; > + struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ > + struct { drm_intel_bo *bo; > + drm_intel_bo *ibo;} printf_b; /* the printf buf and index > buf*/ > + > + struct { drm_intel_bo *bo; } aux_buf; > + struct { > + uint32_t surface_heap_offset; > + uint32_t curbe_offset; > + uint32_t idrt_offset; > + uint32_t sampler_state_offset; > + uint32_t sampler_border_color_state_offset; > + } aux_offset; > + > + uint32_t per_thread_scratch; > + struct { > + uint32_t num_cs_entries; > + uint32_t size_cs_entry; /* size of one entry in 512bit elements */ > + } curb; > + > + uint32_t max_threads; /* max threads requested by the user */ > +}; > + > +struct intel_gpgpu_node { > + struct intel_gpgpu *gpgpu; > + struct intel_gpgpu_node *next; > +}; > + > + > /* Set the gpgpu related call backs */ > extern void intel_set_gpgpu_callbacks(int device_id); > > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
