Re: [Mesa-dev] [PATCH 2/4] i965: perf: query topology

2018-03-06 Thread Lionel Landwerlin

On 06/03/18 01:07, Rafael Antognolli wrote:

On Thu, Feb 22, 2018 at 05:24:38PM +, Lionel Landwerlin wrote:

With the introduction of asymmetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam to tell userspace what subslices
are available.

We introduce a new uAPI in the kernel driver to report exactly what
part of the GPU are fused and require this to be available on Gen10+.

Prior generations can continue to rely on GETPARAM on older kernels.

This patch is quite a lot of code because we have to support lots of
different kernel versions, ranging from not providing any information
(for Haswell on 4.13 through 4.17), to being able to query through
GETPARAM (for gen8/9 on 4.13 through 4.17), to finally requiring 4.17
for Gen10+.

I don't think it's that much code. It's reasonable given how many
interfaces we have to query such data.


This change stores topology information in a unified way on
brw_context.topology from the various kernel APIs. And then generates
the appropriate values for the equations from that unified topology.

Signed-off-by: Lionel Landwerlin 
---
  src/mesa/drivers/dri/i965/brw_context.h   |  14 ++
  src/mesa/drivers/dri/i965/brw_performance_query.c | 267 --
  2 files changed, 208 insertions(+), 73 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 050b656e3da..69bf7530fbc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1160,6 +1160,20 @@ struct brw_context
bool supported;
 } predicate;
  
+   struct {

+  uint8_t slice_mask[4];
+  uint8_t subslice_mask[100];
+  uint8_t eu_mask[100];
+
+  uint16_t max_slices;
+  uint16_t max_subslices;
+  uint16_t max_eus_per_subslice;
+
+  uint16_t subslice_slice_stride;
+  uint16_t eu_slice_stride;
+  uint16_t eu_subslice_stride;
+   } topology;
+

I wonder if such information shouldn't be stored in gen_device_info. But
it seems the rest of the OA code seems to be tied to i965 anyways, so I
guess this should be fine.


Actually putting that into gen_device_info makes a lot of sense.
Thanks for the suggestion, I'll update the series.



In any case, series is:

Acked-by: Rafael Antognolli 


 struct {
/* Variables referenced in the XML meta data for OA performance
 * counters, e.g in the normalization equations.
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index c0bb4442bec..10f519a757f 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1888,6 +1888,192 @@ init_oa_configs(struct brw_context *brw, const char 
*sysfs_dev_dir)
 }
  }
  
+static bool

+query_topology(struct brw_context *brw)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct drm_i915_query_item item = {
+  .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
+   };
+   struct drm_i915_query query = {
+  .num_items = 1,
+  .items_ptr = (uintptr_t) ,
+   };
+
+   return false;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
+  return false;
+
+   struct drm_i915_query_topology_info *topo_info =
+  (struct drm_i915_query_topology_info *) calloc(1, item.length);
+   item.data_ptr = (uintptr_t) topo_info;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
+  return false;
+
+   brw->topology.max_slices = topo_info->max_slices;
+   brw->topology.max_subslices = topo_info->max_subslices;
+   brw->topology.max_eus_per_subslice = topo_info->max_eus_per_subslice;
+
+   brw->topology.subslice_slice_stride =
+  DIV_ROUND_UP(brw->topology.max_subslices, 8);
+   brw->topology.eu_subslice_stride =
+  DIV_ROUND_UP(brw->topology.max_eus_per_subslice, 8);
+   brw->topology.eu_slice_stride = brw->topology.max_subslices *
+  brw->topology.eu_subslice_stride;
+
+   assert(DIV_ROUND_UP(topo_info->max_slices, 8) <=
+  sizeof(brw->topology.slice_mask));
+   memcpy(brw->topology.slice_mask, topo_info->data,
+  DIV_ROUND_UP(topo_info->max_slices, 8));
+
+   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices, 8) <=
+  sizeof(brw->topology.subslice_mask));
+   memcpy(brw->topology.subslice_mask,
+  _info->data[topo_info->subslice_offset],
+  topo_info->max_slices * topo_info->subslice_stride);
+
+   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices *
+   topo_info->max_eus_per_subslice, 8) <=
+  sizeof(brw->topology.eu_mask));
+   memcpy(brw->topology.eu_mask,
+  _info->data[topo_info->eu_offset],
+  topo_info->max_slices * topo_info->max_subslices * 
topo_info->eu_stride);
+
+   free(topo_info);
+
+   return true;
+}
+
+static bool
+getparam_topology(struct brw_context *brw)
+{
+   const struct 

Re: [Mesa-dev] [PATCH 2/4] i965: perf: query topology

2018-03-05 Thread Rafael Antognolli
On Thu, Feb 22, 2018 at 05:24:38PM +, Lionel Landwerlin wrote:
> With the introduction of asymmetric slices in CNL, we cannot rely on
> the previous SUBSLICE_MASK getparam to tell userspace what subslices
> are available.
> 
> We introduce a new uAPI in the kernel driver to report exactly what
> part of the GPU are fused and require this to be available on Gen10+.
> 
> Prior generations can continue to rely on GETPARAM on older kernels.
> 
> This patch is quite a lot of code because we have to support lots of
> different kernel versions, ranging from not providing any information
> (for Haswell on 4.13 through 4.17), to being able to query through
> GETPARAM (for gen8/9 on 4.13 through 4.17), to finally requiring 4.17
> for Gen10+.

I don't think it's that much code. It's reasonable given how many
interfaces we have to query such data.

> This change stores topology information in a unified way on
> brw_context.topology from the various kernel APIs. And then generates
> the appropriate values for the equations from that unified topology.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  14 ++
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 267 
> --
>  2 files changed, 208 insertions(+), 73 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 050b656e3da..69bf7530fbc 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1160,6 +1160,20 @@ struct brw_context
>bool supported;
> } predicate;
>  
> +   struct {
> +  uint8_t slice_mask[4];
> +  uint8_t subslice_mask[100];
> +  uint8_t eu_mask[100];
> +
> +  uint16_t max_slices;
> +  uint16_t max_subslices;
> +  uint16_t max_eus_per_subslice;
> +
> +  uint16_t subslice_slice_stride;
> +  uint16_t eu_slice_stride;
> +  uint16_t eu_subslice_stride;
> +   } topology;
> +

I wonder if such information shouldn't be stored in gen_device_info. But
it seems the rest of the OA code seems to be tied to i965 anyways, so I
guess this should be fine.

In any case, series is:

Acked-by: Rafael Antognolli 

> struct {
>/* Variables referenced in the XML meta data for OA performance
> * counters, e.g in the normalization equations.
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index c0bb4442bec..10f519a757f 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -1888,6 +1888,192 @@ init_oa_configs(struct brw_context *brw, const char 
> *sysfs_dev_dir)
> }
>  }
>  
> +static bool
> +query_topology(struct brw_context *brw)
> +{
> +   __DRIscreen *screen = brw->screen->driScrnPriv;
> +   struct drm_i915_query_item item = {
> +  .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
> +   };
> +   struct drm_i915_query query = {
> +  .num_items = 1,
> +  .items_ptr = (uintptr_t) ,
> +   };
> +
> +   return false;
> +
> +   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
> +  return false;
> +
> +   struct drm_i915_query_topology_info *topo_info =
> +  (struct drm_i915_query_topology_info *) calloc(1, item.length);
> +   item.data_ptr = (uintptr_t) topo_info;
> +
> +   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
> +  return false;
> +
> +   brw->topology.max_slices = topo_info->max_slices;
> +   brw->topology.max_subslices = topo_info->max_subslices;
> +   brw->topology.max_eus_per_subslice = topo_info->max_eus_per_subslice;
> +
> +   brw->topology.subslice_slice_stride =
> +  DIV_ROUND_UP(brw->topology.max_subslices, 8);
> +   brw->topology.eu_subslice_stride =
> +  DIV_ROUND_UP(brw->topology.max_eus_per_subslice, 8);
> +   brw->topology.eu_slice_stride = brw->topology.max_subslices *
> +  brw->topology.eu_subslice_stride;
> +
> +   assert(DIV_ROUND_UP(topo_info->max_slices, 8) <=
> +  sizeof(brw->topology.slice_mask));
> +   memcpy(brw->topology.slice_mask, topo_info->data,
> +  DIV_ROUND_UP(topo_info->max_slices, 8));
> +
> +   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices, 8) 
> <=
> +  sizeof(brw->topology.subslice_mask));
> +   memcpy(brw->topology.subslice_mask,
> +  _info->data[topo_info->subslice_offset],
> +  topo_info->max_slices * topo_info->subslice_stride);
> +
> +   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices *
> +   topo_info->max_eus_per_subslice, 8) <=
> +  sizeof(brw->topology.eu_mask));
> +   memcpy(brw->topology.eu_mask,
> +  _info->data[topo_info->eu_offset],
> +  topo_info->max_slices * topo_info->max_subslices * 
> topo_info->eu_stride);
> +
> +   free(topo_info);
> +
> +   return true;
> +}
> +
> +static bool
> 

[Mesa-dev] [PATCH 2/4] i965: perf: query topology

2018-02-22 Thread Lionel Landwerlin
With the introduction of asymmetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam to tell userspace what subslices
are available.

We introduce a new uAPI in the kernel driver to report exactly what
part of the GPU are fused and require this to be available on Gen10+.

Prior generations can continue to rely on GETPARAM on older kernels.

This patch is quite a lot of code because we have to support lots of
different kernel versions, ranging from not providing any information
(for Haswell on 4.13 through 4.17), to being able to query through
GETPARAM (for gen8/9 on 4.13 through 4.17), to finally requiring 4.17
for Gen10+.

This change stores topology information in a unified way on
brw_context.topology from the various kernel APIs. And then generates
the appropriate values for the equations from that unified topology.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h   |  14 ++
 src/mesa/drivers/dri/i965/brw_performance_query.c | 267 --
 2 files changed, 208 insertions(+), 73 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 050b656e3da..69bf7530fbc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1160,6 +1160,20 @@ struct brw_context
   bool supported;
} predicate;
 
+   struct {
+  uint8_t slice_mask[4];
+  uint8_t subslice_mask[100];
+  uint8_t eu_mask[100];
+
+  uint16_t max_slices;
+  uint16_t max_subslices;
+  uint16_t max_eus_per_subslice;
+
+  uint16_t subslice_slice_stride;
+  uint16_t eu_slice_stride;
+  uint16_t eu_subslice_stride;
+   } topology;
+
struct {
   /* Variables referenced in the XML meta data for OA performance
* counters, e.g in the normalization equations.
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index c0bb4442bec..10f519a757f 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1888,6 +1888,192 @@ init_oa_configs(struct brw_context *brw, const char 
*sysfs_dev_dir)
}
 }
 
+static bool
+query_topology(struct brw_context *brw)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct drm_i915_query_item item = {
+  .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
+   };
+   struct drm_i915_query query = {
+  .num_items = 1,
+  .items_ptr = (uintptr_t) ,
+   };
+
+   return false;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
+  return false;
+
+   struct drm_i915_query_topology_info *topo_info =
+  (struct drm_i915_query_topology_info *) calloc(1, item.length);
+   item.data_ptr = (uintptr_t) topo_info;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
+  return false;
+
+   brw->topology.max_slices = topo_info->max_slices;
+   brw->topology.max_subslices = topo_info->max_subslices;
+   brw->topology.max_eus_per_subslice = topo_info->max_eus_per_subslice;
+
+   brw->topology.subslice_slice_stride =
+  DIV_ROUND_UP(brw->topology.max_subslices, 8);
+   brw->topology.eu_subslice_stride =
+  DIV_ROUND_UP(brw->topology.max_eus_per_subslice, 8);
+   brw->topology.eu_slice_stride = brw->topology.max_subslices *
+  brw->topology.eu_subslice_stride;
+
+   assert(DIV_ROUND_UP(topo_info->max_slices, 8) <=
+  sizeof(brw->topology.slice_mask));
+   memcpy(brw->topology.slice_mask, topo_info->data,
+  DIV_ROUND_UP(topo_info->max_slices, 8));
+
+   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices, 8) <=
+  sizeof(brw->topology.subslice_mask));
+   memcpy(brw->topology.subslice_mask,
+  _info->data[topo_info->subslice_offset],
+  topo_info->max_slices * topo_info->subslice_stride);
+
+   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices *
+   topo_info->max_eus_per_subslice, 8) <=
+  sizeof(brw->topology.eu_mask));
+   memcpy(brw->topology.eu_mask,
+  _info->data[topo_info->eu_offset],
+  topo_info->max_slices * topo_info->max_subslices * 
topo_info->eu_stride);
+
+   free(topo_info);
+
+   return true;
+}
+
+static bool
+getparam_topology(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   drm_i915_getparam_t gp;
+   int ret;
+
+   /* On CNL+ we need to use the query ioctl(). */
+   assert(devinfo->gen < 10);
+
+   int slice_mask = 0;
+   gp.param = I915_PARAM_SLICE_MASK;
+   gp.value = _mask;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
+   if (ret)
+  return false;
+
+   int subslice_mask = 0;
+   gp.param = I915_PARAM_SUBSLICE_MASK;
+   gp.value = _mask;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
+   if (ret)
+  return false;
+
+   brw->topology.max_slices =