If we listen to the uevents from the kernel, we can detect when the GPU
hangs. This requires us to fork a helper process to do so and send a
signal back to the parent.

Signed-off-by: Chris Wilson <[email protected]>
---
 benchmarks/Makefile.am   |  2 +-
 debugger/Makefile.am     |  2 +-
 demos/Makefile.am        |  2 +-
 lib/Makefile.am          | 12 +++++--
 lib/igt_aux.c            | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_aux.h            |  3 ++
 tests/Makefile.am        |  3 +-
 tests/gem_exec_whisper.c |  4 +++
 tools/Makefile.am        |  2 +-
 9 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index c67f472..2c2d100 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -3,7 +3,7 @@ include Makefile.sources
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) 
$(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
 
 benchmarks_LTLIBRARIES = gem_exec_tracer.la
 gem_exec_tracer_la_LDFLAGS = -module -avoid-version -no-undefined
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
index 5a523f5..9d231d3 100644
--- a/debugger/Makefile.am
+++ b/debugger/Makefile.am
@@ -15,4 +15,4 @@ AM_CFLAGS =                   \
        $(LIBUNWIND_CFLAGS)     \
        $(CWARNFLAGS)
 
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) 
$(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/demos/Makefile.am b/demos/Makefile.am
index d18a705..e6fbb3b 100644
--- a/demos/Makefile.am
+++ b/demos/Makefile.am
@@ -4,4 +4,4 @@ bin_PROGRAMS =                          \
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) 
$(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) 
$(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a8a1eb6..d2f2e16 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -15,12 +15,20 @@ if HAVE_VC4
 endif
 
 AM_CPPFLAGS = -I$(top_srcdir)
-AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
+AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) 
$(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
            -DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \
            -DIGT_DATADIR=\""$(pkgdatadir)"\" \
            -DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \
            -pthread
 
-LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
 AM_CFLAGS += $(CAIRO_CFLAGS)
 
+libintel_tools_la_LIBADD = \
+       $(DRM_LIBS) \
+       $(PCIACCESS_LIBS) \
+       $(CAIRO_LIBS) \
+       $(LIBUDEV_LIBS) \
+       $(LIBUNWIND_LIBS) \
+       $(TIMER_LIBS) \
+       -lm
+
diff --git a/lib/igt_aux.c b/lib/igt_aux.c
index 7deaf2f..d8f72fb 100644
--- a/lib/igt_aux.c
+++ b/lib/igt_aux.c
@@ -42,6 +42,7 @@
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
+#include <sys/poll.h>
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/types.h>
@@ -360,6 +361,87 @@ void igt_stop_signal_helper(void)
        sig_stat = 0;
 }
 
+#if HAVE_UDEV
+#include <libudev.h>
+
+static struct igt_helper_process hang_detector;
+static void __attribute__((noreturn))
+hang_detector_process(pid_t pid, dev_t rdev)
+{
+       struct udev_monitor *mon =
+               udev_monitor_new_from_netlink(udev_new(), "kernel");
+       struct pollfd pfd;
+
+       udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL);
+       udev_monitor_enable_receiving(mon);
+
+       pfd.fd = udev_monitor_get_fd(mon);
+       pfd.events = POLLIN;
+
+       while (poll(&pfd, 1, -1) > 0) {
+               struct udev_device *dev = udev_monitor_receive_device(mon);
+               dev_t devnum;
+
+               if (dev == NULL)
+                       break;
+
+               devnum = udev_device_get_devnum(dev);
+               if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) {
+                       const char *str;
+
+                       str = udev_device_get_property_value(dev, "ERROR");
+                       if (str && atoi(str) == 1)
+                               kill(pid, SIGRTMAX);
+               }
+
+               udev_device_unref(dev);
+               if (kill(pid, 0)) /* Parent has died, so must we. */
+                       break;
+       }
+
+       exit(0);
+}
+
+static void sig_abort(int sig)
+{
+       igt_assert(!"GPU hung");
+}
+
+void igt_fork_hang_detector(int fd)
+{
+       struct stat st;
+
+       if (igt_only_list_subtests())
+               return;
+
+       igt_assert(fstat(fd, &st) == 0);
+
+       signal(SIGRTMAX, sig_abort);
+       igt_fork_helper(&hang_detector)
+               hang_detector_process(getppid(), st.st_rdev);
+}
+
+void igt_stop_hang_detector(void)
+{
+       if (igt_only_list_subtests())
+               return;
+
+       igt_stop_helper(&hang_detector);
+}
+#else
+void igt_fork_hang_detector(int fd)
+{
+       if (igt_only_list_subtests())
+               return;
+
+       igt_skip();
+}
+
+void igt_stop_hang_detector(void)
+{
+}
+#endif
+
 /**
  * igt_check_boolean_env_var:
  * @env_var: environment variable name
diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9fade67..eee80ca 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -40,6 +40,9 @@ extern int num_trash_bos;
 void igt_fork_signal_helper(void);
 void igt_stop_signal_helper(void);
 
+void igt_fork_hang_detector(int fd);
+void igt_stop_hang_detector(void);
+
 struct igt_sigiter {
        unsigned pass;
 };
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 839b37d..24d374a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -56,9 +56,8 @@ AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(DEBUG_CFLAGS)\
        $(LIBUNWIND_CFLAGS) \
        $(NULL)
 
-LDADD = ../lib/libintel_tools.la $(PCIACCESS_LIBS) $(DRM_LIBS) 
$(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = ../lib/libintel_tools.la $(GLIB_LIBS)
 
-LDADD += $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(GLIB_LIBS) -lm
 AM_CFLAGS += $(CAIRO_CFLAGS) $(LIBUDEV_CFLAGS) $(GLIB_CFLAGS)
 AM_LDFLAGS = -Wl,--as-needed
 
diff --git a/tests/gem_exec_whisper.c b/tests/gem_exec_whisper.c
index b84f1a2..1991fed 100644
--- a/tests/gem_exec_whisper.c
+++ b/tests/gem_exec_whisper.c
@@ -368,6 +368,8 @@ igt_main
        igt_fixture
                fd = drm_open_driver_master(DRIVER_INTEL);
 
+       igt_fork_hang_detector(fd);
+
        for (const struct mode *m = modes; m->name; m++)
                igt_subtest_f("%s", *m->name ? m->name : "basic")
                        whisper(fd, -1, m->flags);
@@ -382,6 +384,8 @@ igt_main
                                whisper(fd, e->exec_id | e->flags, m->flags);
        }
 
+       igt_stop_hang_detector();
+
        igt_fixture
                close(fd);
 }
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 74c5521..df48d94 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -4,7 +4,7 @@ SUBDIRS = null_state_gen registers
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DEBUG_CFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) 
$(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\"
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) 
$(CAIRO_LIBS) $(LIBUDEV_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
 AM_LDFLAGS = -Wl,--as-needed
 
 
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to