Can you avoid use global var atomic_test_result? It is not good manner using global var to communicate between cl and intel component.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > [email protected] > Sent: Friday, June 12, 2015 09:10 > To: [email protected] > Cc: Luo, Xionghu > Subject: [Beignet] [PATCH 2/2] use self test to determine enable/or disable > atomics in L3 for HSW. > > From: Luo Xionghu <[email protected]> > > check the selftest kernel return value, if enqueue kernel failed, set the flag > to not enable atomics the L3 for HSW. > > This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc. > > Signed-off-by: Luo Xionghu <[email protected]> > --- > src/cl_device_id.c | 30 ++++++++++++++++++------------ > src/cl_utils.h | 2 ++ > src/intel/intel_defines.h | 3 +++ > src/intel/intel_gpgpu.c | 18 +++++++++++++++++- > 4 files changed, 40 insertions(+), 13 deletions(-) > > diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f2..278a91c > 100644 > --- a/src/cl_device_id.c > +++ b/src/cl_device_id.c > @@ -197,6 +197,7 @@ static struct _cl_device_id intel_skl_gt4_device = > { #include "cl_gen75_device.h" > }; > > +int atomic_test_result = 0; > > LOCAL cl_device_id > cl_get_gt_device(void) > @@ -628,18 +629,23 @@ cl_get_device_ids(cl_platform_id platform, > > /* Do we have a usable device? */ > device = cl_get_gt_device(); > - if (device && cl_self_test(device)) { > - int disable_self_test = 0; > - // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++ > - const char *env = getenv("OCL_IGNORE_SELF_TEST"); > - if (env != NULL) { > - sscanf(env, "%i", &disable_self_test); > - } > - if (disable_self_test) { > - printf("Beignet: Warning - overriding self-test failure\n"); > - } else { > - printf("Beignet: disabling non-working device\n"); > - device = 0; > + if (device) { > + int ret = cl_self_test(device); > + if(ret != 0) { > + if(ret == 2) > + atomic_test_result = 1; > + int disable_self_test = 0; > + // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++ > + const char *env = getenv("OCL_IGNORE_SELF_TEST"); > + if (env != NULL) { > + sscanf(env, "%i", &disable_self_test); > + } > + if (disable_self_test) { > + printf("Beignet: Warning - overriding self-test failure\n"); > + } else { > + printf("Beignet: disabling non-working device\n"); > + device = 0; > + } > } > } > if (!device) { > diff --git a/src/cl_utils.h b/src/cl_utils.h index 28fdef6..17bb0f5 100644 > --- a/src/cl_utils.h > +++ b/src/cl_utils.h > @@ -312,5 +312,7 @@ static INLINE int atomic_add(atomic_t *v, const int c) > { static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); } > static > INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); } > > +extern int atomic_test_result; > + > #endif /* __CL_UTILS_H__ */ > > diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index > 1080a91..6ada30c 100644 > --- a/src/intel/intel_defines.h > +++ b/src/intel/intel_defines.h > @@ -304,6 +304,9 @@ > > #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ > IS_G4X(intel->device_id) ? 384 : 256) > +// HSW > +#define HSW_SCRATCH1_OFFSET (0xB038) > +#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C) > > // L3 cache stuff > #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010) > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index > b083dab..7257d46 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -719,7 +719,23 @@ static void > intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm) { > /* still set L3 in batch buffer for fulsim. */ > - BEGIN_BATCH(gpgpu->batch, 9); > + if(atomic_test_result == 0) > + { > + BEGIN_BATCH(gpgpu->batch, 15); > + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - > 2 */ > + /* FIXME: KMD always disable the atomic in L3 for some reason. > + I checked the spec, and don't think we need that workaround now. > + Before I send a patch to kernel, let's just enable it here. */ > + OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET); > + OUT_BATCH(gpgpu->batch, 0); /* enable atomic in > L3 */ > + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - > 2 */ > + OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET); > + OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in > L3 > */ > + } > + else > + { > + BEGIN_BATCH(gpgpu->batch, 9); > + } > OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 > */ > OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET); > OUT_BATCH(gpgpu->batch, 0x08800000); > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
