selftests: Avoid repeatedly harming the same innocent context

kbuild test robot Sat, 31 Mar 2018 13:43:17 -0700

Hi Chris,

Thank you for the patch! Yet something to improve:


[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v4.16-rc7 next-20180329]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:    
https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Avoid-repeatedly-harming-the-same-innocent-context/20180401-022503
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-x011-201813 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/intel_hangcheck.c:465:0:
   drivers/gpu/drm/i915/selftests/intel_hangcheck.c: In function 
'igt_reset_queue':
>> drivers/gpu/drm/i915/selftests/intel_hangcheck.c:988:5: error: implicit 
>> declaration of function 'GEM_TRACE_DUMP'; did you mean 'GEM_TRACE'? 
>> [-Werror=implicit-function-declaration]
        GEM_TRACE_DUMP();
        ^~~~~~~~~~~~~~
        GEM_TRACE
   cc1: some warnings being treated as errors

vim +988 drivers/gpu/drm/i915/selftests/intel_hangcheck.c

   922  
   923  static int igt_reset_queue(void *arg)
   924  {
   925          struct drm_i915_private *i915 = arg;
   926          struct intel_engine_cs *engine;
   927          enum intel_engine_id id;
   928          struct hang h;
   929          int err;
   930  
   931          /* Check that we replay pending requests following a hang */
   932  
   933          global_reset_lock(i915);
   934  
   935          mutex_lock(&i915->drm.struct_mutex);
   936          err = hang_init(&h, i915);
   937          if (err)
   938                  goto unlock;
   939  
   940          for_each_engine(engine, i915, id) {
   941                  struct i915_request *prev;
   942                  IGT_TIMEOUT(end_time);
   943                  unsigned int count;
   944  
   945                  if (!intel_engine_can_store_dword(engine))
   946                          continue;
   947  
   948                  prev = hang_create_request(&h, engine);
   949                  if (IS_ERR(prev)) {
   950                          err = PTR_ERR(prev);
   951                          goto fini;
   952                  }
   953  
   954                  i915_request_get(prev);
   955                  __i915_request_add(prev, true);
   956  
   957                  count = 0;
   958                  do {
   959                          struct i915_request *rq;
   960                          unsigned int reset_count;
   961  
   962                          rq = hang_create_request(&h, engine);
   963                          if (IS_ERR(rq)) {
   964                                  err = PTR_ERR(rq);
   965                                  goto fini;
   966                          }
   967  
   968                          i915_request_get(rq);
   969                          __i915_request_add(rq, true);
   970  
   971                          /*
   972                           * XXX We don't handle resetting the kernel 
context
   973                           * very well. If we trigger a device reset 
twice in
   974                           * quick succession while the kernel context is
   975                           * executing, we may end up skipping the 
breadcrumb.
   976                           * This is really only a problem for the 
selftest as
   977                           * normally there is a large interlude between 
resets
   978                           * (hangcheck), or we focus on resetting just 
one
   979                           * engine and so avoid repeatedly resetting 
innocents.
   980                           */
   981                          err = wait_for_others(i915, engine);
   982                          if (err) {
   983                                  pr_err("%s(%s): Failed to idle other 
inactive engines after device reset\n",
   984                                         __func__, engine->name);
   985                                  i915_request_put(rq);
   986                                  i915_request_put(prev);
   987  
 > 988                                  GEM_TRACE_DUMP();
   989                                  i915_gem_set_wedged(i915);
   990                                  goto fini;
   991                          }
   992  
   993                          if (!wait_for_hang(&h, prev)) {
   994                                  struct drm_printer p = 
drm_info_printer(i915->drm.dev);
   995  
   996                                  pr_err("%s(%s): Failed to start request 
%x, at %x\n",
   997                                         __func__, engine->name,
   998                                         prev->fence.seqno, hws_seqno(&h, 
prev));
   999                                  intel_engine_dump(engine, &p,
  1000                                                    "%s\n", engine->name);
  1001  
  1002                                  i915_request_put(rq);
  1003                                  i915_request_put(prev);
  1004  
  1005                                  i915_reset(i915, 0);
  1006                                  i915_gem_set_wedged(i915);
  1007  
  1008                                  err = -EIO;
  1009                                  goto fini;
  1010                          }
  1011  
  1012                          reset_count = fake_hangcheck(prev);
  1013  
  1014                          i915_reset(i915, I915_RESET_QUIET);
  1015  
  1016                          GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
  1017                                              &i915->gpu_error.flags));
  1018  
  1019                          if (prev->fence.error != -EIO) {
  1020                                  pr_err("GPU reset not recorded on 
hanging request [fence.error=%d]!\n",
  1021                                         prev->fence.error);
  1022                                  i915_request_put(rq);
  1023                                  i915_request_put(prev);
  1024                                  err = -EINVAL;
  1025                                  goto fini;
  1026                          }
  1027  
  1028                          if (rq->fence.error) {
  1029                                  pr_err("Fence error status not zero 
[%d] after unrelated reset\n",
  1030                                         rq->fence.error);
  1031                                  i915_request_put(rq);
  1032                                  i915_request_put(prev);
  1033                                  err = -EINVAL;
  1034                                  goto fini;
  1035                          }
  1036  
  1037                          if (i915_reset_count(&i915->gpu_error) == 
reset_count) {
  1038                                  pr_err("No GPU reset recorded!\n");
  1039                                  i915_request_put(rq);
  1040                                  i915_request_put(prev);
  1041                                  err = -EINVAL;
  1042                                  goto fini;
  1043                          }
  1044  
  1045                          i915_request_put(prev);
  1046                          prev = rq;
  1047                          count++;
  1048                  } while (time_before(jiffies, end_time));
  1049                  pr_info("%s: Completed %d resets\n", engine->name, 
count);
  1050  
  1051                  *h.batch = MI_BATCH_BUFFER_END;
  1052                  i915_gem_chipset_flush(i915);
  1053  
  1054                  i915_request_put(prev);
  1055  
  1056                  err = flush_test(i915, I915_WAIT_LOCKED);
  1057                  if (err)
  1058                          break;
  1059          }
  1060  
  1061  fini:
  1062          hang_fini(&h);
  1063  unlock:
  1064          mutex_unlock(&i915->drm.struct_mutex);
  1065          global_reset_unlock(i915);
  1066  
  1067          if (i915_terminally_wedged(&i915->gpu_error))
  1068                  return -EIO;
  1069  
  1070          return err;
  1071  }
  1072  

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

.config.gz
Description: application/gzip

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Avoid repeatedly harming the same innocent context

Reply via email to