or disable atomics in L3 for HSW.

Yang, Rong R Wed, 17 Jun 2015 20:38:47 -0700

Can you avoid use global var atomic_test_result? It is not good manner using 
global var to communicate between cl and intel component.


> -----Original Message-----
> From: Beignet [mailto:[email protected]] On Behalf Of
> [email protected]
> Sent: Friday, June 12, 2015 09:10
> To: [email protected]
> Cc: Luo, Xionghu
> Subject: [Beignet] [PATCH 2/2] use self test to determine enable/or disable
> atomics in L3 for HSW.
> 
> From: Luo Xionghu <[email protected]>
> 
> check the selftest kernel return value, if enqueue kernel failed, set the flag
> to not enable atomics the L3 for HSW.
> 
> This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc.
> 
> Signed-off-by: Luo Xionghu <[email protected]>
> ---
>  src/cl_device_id.c        | 30 ++++++++++++++++++------------
>  src/cl_utils.h            |  2 ++
>  src/intel/intel_defines.h |  3 +++
>  src/intel/intel_gpgpu.c   | 18 +++++++++++++++++-
>  4 files changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f2..278a91c
> 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -197,6 +197,7 @@ static struct _cl_device_id intel_skl_gt4_device =
> {  #include "cl_gen75_device.h"
>  };
> 
> +int atomic_test_result = 0;
> 
>  LOCAL cl_device_id
>  cl_get_gt_device(void)
> @@ -628,18 +629,23 @@ cl_get_device_ids(cl_platform_id    platform,
> 
>    /* Do we have a usable device? */
>    device = cl_get_gt_device();
> -  if (device && cl_self_test(device)) {
> -    int disable_self_test = 0;
> -    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
> -    const char *env = getenv("OCL_IGNORE_SELF_TEST");
> -    if (env != NULL) {
> -      sscanf(env, "%i", &disable_self_test);
> -    }
> -    if (disable_self_test) {
> -      printf("Beignet: Warning - overriding self-test failure\n");
> -    } else {
> -      printf("Beignet: disabling non-working device\n");
> -      device = 0;
> +  if (device) {
> +    int ret = cl_self_test(device);
> +    if(ret != 0) {
> +      if(ret == 2)
> +        atomic_test_result = 1;
> +      int disable_self_test = 0;
> +      // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
> +      const char *env = getenv("OCL_IGNORE_SELF_TEST");
> +      if (env != NULL) {
> +        sscanf(env, "%i", &disable_self_test);
> +      }
> +      if (disable_self_test) {
> +        printf("Beignet: Warning - overriding self-test failure\n");
> +      } else {
> +        printf("Beignet: disabling non-working device\n");
> +        device = 0;
> +      }
>      }
>    }
>    if (!device) {
> diff --git a/src/cl_utils.h b/src/cl_utils.h index 28fdef6..17bb0f5 100644
> --- a/src/cl_utils.h
> +++ b/src/cl_utils.h
> @@ -312,5 +312,7 @@ static INLINE int atomic_add(atomic_t *v, const int c)
> {  static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); }  
> static
> INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); }
> 
> +extern int atomic_test_result;
> +
>  #endif /* __CL_UTILS_H__ */
> 
> diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index
> 1080a91..6ada30c 100644
> --- a/src/intel/intel_defines.h
> +++ b/src/intel/intel_defines.h
> @@ -304,6 +304,9 @@
> 
>  #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \
>                                   IS_G4X(intel->device_id) ? 384 : 256)
> +// HSW
> +#define HSW_SCRATCH1_OFFSET                      (0xB038)
> +#define HSW_ROW_CHICKEN3_HDC_OFFSET              (0xE49C)
> 
>  // L3 cache stuff
>  #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET          (0XB010)
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index
> b083dab..7257d46 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -719,7 +719,23 @@ static void
>  intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)  {
>    /* still set L3 in batch buffer for fulsim. */
> -  BEGIN_BATCH(gpgpu->batch, 9);
> +  if(atomic_test_result == 0)
> +  {
> +    BEGIN_BATCH(gpgpu->batch, 15);
> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -
> 2 */
> +    /* FIXME: KMD always disable the atomic in L3 for some reason.
> +       I checked the spec, and don't think we need that workaround now.
> +       Before I send a patch to kernel, let's just enable it here. */
> +    OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
> +    OUT_BATCH(gpgpu->batch, 0);                         /* enable atomic in 
> L3 */
> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -
> 2 */
> +    OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
> +    OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16);          /* enable atomic in 
> L3
> */
> +  }
> +  else
> +  {
> +    BEGIN_BATCH(gpgpu->batch, 9);
> +  }
>    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2
> */
>    OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
>    OUT_BATCH(gpgpu->batch, 0x08800000);
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH 2/2] use self test to determine enable/or disable atomics in L3 for HSW.

Reply via email to