The revised series looks OK to me.  One whitespace issue below.

For the series,
Reviewed-by: Brian Paul <bri...@vmware.com>


On 09/06/2018 07:04 PM, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

v2: use set_context_param
---
  src/gallium/auxiliary/util/u_helpers.c | 42 +++++++++++++++++++
  src/gallium/auxiliary/util/u_helpers.h |  4 ++
  src/mesa/state_tracker/st_context.c    |  3 ++
  src/mesa/state_tracker/st_manager.c    |  9 ++++
  src/util/u_thread.h                    | 57 ++++++++++++++++++++++++++
  5 files changed, 115 insertions(+)


[...]

diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index 69286b57916..7a37f9850f8 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -1056,20 +1056,29 @@ st_api_make_current(struct st_api *stapi, struct 
st_context_iface *stctxi,
           ret = _mesa_make_current(st->ctx, incomplete, incomplete);
        }
st_framebuffer_reference(&stdraw, NULL);
        st_framebuffer_reference(&stread, NULL);
/* Purge the context's winsys_buffers list in case any
         * of the referenced drawables no longer exist.
         */
        st_framebuffers_purge(st);
+
+      /* Notify the driver that the context thread may have been changed.
+       * This should pin all driver threads to a specific L3 cache for optimal
+       * performance on AMD Zen CPUs.
+       */
+      struct glthread_state *glthread = st->ctx->GLThread;
+      thrd_t *upper_thread = glthread ? &glthread->queue.threads[0] : NULL;
+
+      util_context_thread_changed(st->pipe, upper_thread);
     }
     else {
        ret = _mesa_make_current(NULL, NULL, NULL);
     }
return ret;
  }
static void
diff --git a/src/util/u_thread.h b/src/util/u_thread.h
index 8c6e0bdc59e..0555ba61111 100644
--- a/src/util/u_thread.h
+++ b/src/util/u_thread.h
@@ -63,20 +63,77 @@ static inline void u_thread_setname( const char *name )
  #if defined(HAVE_PTHREAD)
  #  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) 
&& \
        (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) && \
        defined(__linux__)
     pthread_setname_np(pthread_self(), name);
  #  endif
  #endif
     (void)name;
  }
+/**
+ * An AMD Zen CPU consists of multiple modules where each module has its own L3
+ * cache. Inter-thread communication such as locks and atomics between modules
+ * is very expensive. It's desirable to pin a group of closely cooperating
+ * threads to one group of cores sharing L3.
+ *
+ * \param thread        thread
+ * \param L3_index      index of the L3 cache
+ * \param cores_per_L3  number of CPU cores shared by one L3
+ */
+static inline void
+util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3)
+{
+#if defined(HAVE_PTHREAD)
+   cpu_set_t cpuset;
+
+   CPU_ZERO(&cpuset);
+   for (unsigned i = 0; i < cores_per_L3; i++)
+          CPU_SET(L3_index * cores_per_L3 + i, &cpuset);

Indentation.


+   pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+#endif
+}
+
+/**
+ * Return the index of L3 that the thread is pinned to. If the thread is
+ * pinned to multiple L3 caches, return -1.
+ *
+ * \param thread        thread
+ * \param cores_per_L3  number of CPU cores shared by one L3
+ */
+static inline int
+util_get_L3_for_pinned_thread(thrd_t thread, unsigned cores_per_L3)
+{
+#if defined(HAVE_PTHREAD)
+   cpu_set_t cpuset;
+
+   if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) {
+      int L3_index = -1;
+
+      for (unsigned i = 0; i < CPU_SETSIZE; i++) {
+         if (CPU_ISSET(i, &cpuset)) {
+            int x = i / cores_per_L3;
+
+            if (L3_index != x) {
+               if (L3_index == -1)
+                  L3_index = x;
+               else
+                  return -1; /* multiple L3s are set */
+            }
+         }
+      }
+      return L3_index;
+   }
+#endif
+   return -1;
+}
+
  /*
   * Thread statistics.
   */
/* Return the time of a thread's CPU time clock. */
  static inline int64_t
  u_thread_get_time_nano(thrd_t thread)
  {
  #if defined(__linux__) && defined(HAVE_PTHREAD)
     struct timespec ts;


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to