[Mesa-dev] [PATCH 10/21] intel/isl: Use ISL_DIM_LAYOUT_GEN9_1D for Yf/Ys

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index fcbe2ad..68a92b8 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -678,8 +678,10 @@ isl_surf_choose_dim_layout(const struct isl_device *dev,
   *
   * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
   * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
+  * Yf and Ys tiled surfaces are considered to be linear for the
+  * purposes of handling 1D surfaces.
   */
- if (tiling == ISL_TILING_LINEAR)
+ if (tiling == ISL_TILING_LINEAR || isl_tiling_is_std_y(tiling))
 return ISL_DIM_LAYOUT_GEN9_1D;
  else
 return ISL_DIM_LAYOUT_GEN4_2D;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/21] intel/isl: Enable Yf tiling

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl_gen7.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
index 4fa9851..2aa3916 100644
--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -205,7 +205,6 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
/* And... clear the Yf and Ys bits anyway because Anvil doesn't support
 * them yet.
 */
-   *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */
*flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */
 
if (isl_surf_usage_is_depth(info->usage)) {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/21] intel/isl: Pull the uncompressed surface view code from anv

2018-02-22 Thread Jason Ekstrand
This adds a helper isl_surf_get_uncompressed_surf for creating a surface
which provides an uncompressed view into a compressed surface.  The code
is basically a direct port of the uncompressed surface code from the
Vulkan driver which, in turn, was a port from BLORP.
---
 src/intel/isl/isl.c  | 68 
 src/intel/isl/isl.h  | 29 +++
 src/intel/vulkan/anv_image.c | 34 ++
 3 files changed, 100 insertions(+), 31 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 0806474..74e248d 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -2393,6 +2393,74 @@ isl_surf_get_image_surf(const struct isl_device *dev,
 }
 
 void
+isl_surf_get_uncompressed_surf(const struct isl_device *dev,
+   const struct isl_surf *surf,
+   const struct isl_view *view,
+   struct isl_surf *ucompr_surf,
+   struct isl_view *ucompr_view,
+   uint32_t *offset_B,
+   uint32_t *x_offset_sa,
+   uint32_t *y_offset_sa)
+{
+   const struct isl_format_layout *fmtl =
+  isl_format_get_layout(surf->format);
+
+   assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1);
+   assert(isl_format_is_compressed(surf->format));
+   assert(!isl_format_is_compressed(view->format));
+   assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb);
+   assert(view->levels == 1);
+
+   const uint32_t view_width =
+  isl_minify(surf->logical_level0_px.width, view->base_level);
+   const uint32_t view_height =
+  isl_minify(surf->logical_level0_px.height, view->base_level);
+
+   const uint32_t ucompr_width = isl_align_div_npot(view_width, fmtl->bw);
+   const uint32_t ucompr_height = isl_align_div_npot(view_height, fmtl->bh);
+
+   {
+  /* For legacy tilings, we just make a new 2D surface which represents
+   * the single slice of the main surface.  Due to hardware restrictions
+   * with intratile offsets, we can only handle a single slice.
+   */
+  assert(view->array_len == 1);
+
+  isl_surf_get_image_surf(dev, surf,
+  view->base_level,
+  surf->dim == ISL_SURF_DIM_3D ?
+ 0 : view->base_array_layer,
+  surf->dim == ISL_SURF_DIM_3D ?
+ view->base_array_layer : 0,
+  ucompr_surf,
+  offset_B, x_offset_sa, y_offset_sa);
+
+  ucompr_surf->format = view->format;
+
+  /* We're making an uncompressed view here.  The image dimensions
+   * need to be scaled down by the block size.
+   */
+  assert(ucompr_surf->logical_level0_px.width == view_width);
+  assert(ucompr_surf->logical_level0_px.height == view_height);
+  assert(ucompr_surf->logical_level0_px.depth == 1);
+  assert(ucompr_surf->logical_level0_px.array_len = 1);
+  ucompr_surf->logical_level0_px.width = ucompr_width;
+  ucompr_surf->logical_level0_px.height = ucompr_height;
+  ucompr_surf->phys_level0_sa.width /= fmtl->bw;
+  ucompr_surf->phys_level0_sa.height /= fmtl->bh;
+  *x_offset_sa /= fmtl->bw;
+  *y_offset_sa /= fmtl->bh;
+
+  /* The newly created image represents the one subimage we're referencing
+   * with this view so it only has one array slice and miplevel.
+   */
+  *ucompr_view = *view;
+  ucompr_view->base_array_layer = 0;
+  ucompr_view->base_level = 0;
+   }
+}
+
+void
 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
enum isl_surf_dim dim,
uint32_t bpb,
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 8333a8c..fcf7ada 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1958,6 +1958,35 @@ isl_surf_get_image_surf(const struct isl_device *dev,
 uint32_t *y_offset_sa);
 
 /**
+ * Create an isl_surf that is an uncompressed view of a compressed isl_surf
+ *
+ * The incoming surface must have a compressed format.  The incoming view must
+ * be a valid view for the given surface with the exception that it's format
+ * is an umcompressed format with the same bpb as the surface format.  The
+ * incoming view must have isl_view::levels == 1.
+ *
+ * When the function returns, the resulting combination of uncompressed_surf
+ * and uncompressed_view will be a valid view giving an uncompressed view of
+ * the incoming surface.  Depending on tiling, uncompressed_surf may have a
+ * different isl_surf::dim from surf and uncompressed_view may or may not have
+ * a zero base_array_layer.  For legacy tiling (not Yf or Ys), an intratile
+ * offset is returned in x_offset_sa and y_offset_sa.  For standard Y tilings
+ * (Yf and Ys), x_offset_sa and 

[Mesa-dev] [PATCH 13/21] intel/isl: Support Yf/Ys in isl_surf_get_image_offset_sa

2018-02-22 Thread Jason Ekstrand
All that's really needed here is to handle the array offsetting by using
an Z or array offset instead of the Y offset.
---
 src/intel/isl/isl.c | 45 ++---
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 35793b3..0806474 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -1987,7 +1987,9 @@ static void
 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
 uint32_t level, uint32_t logical_array_layer,
 uint32_t *x_offset_sa,
-uint32_t *y_offset_sa)
+uint32_t *y_offset_sa,
+uint32_t *z_offset_sa,
+uint32_t *array_offset)
 {
assert(level < surf->levels);
if (surf->dim == ISL_SURF_DIM_3D)
@@ -2004,8 +2006,21 @@ get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
const uint32_t phys_layer = logical_array_layer *
   (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
 
-   uint32_t x = 0;
-   uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
+   uint32_t x = 0, y;
+   if (isl_tiling_is_std_y(surf->tiling)) {
+  y = 0;
+  if (surf->dim == ISL_SURF_DIM_3D) {
+ *z_offset_sa = logical_array_layer;
+ *array_offset = 0;
+  } else {
+ *z_offset_sa = 0;
+ *array_offset = phys_layer;
+  }
+   } else {
+  y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
+  *z_offset_sa = 0;
+  *array_offset = 0;
+   }
 
for (uint32_t l = 0; l < level; ++l) {
   if (l == 1) {
@@ -2145,7 +2160,9 @@ static void
 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
 uint32_t level, uint32_t layer,
 uint32_t *x_offset_sa,
-uint32_t *y_offset_sa)
+uint32_t *y_offset_sa,
+uint32_t *z_offset_sa,
+uint32_t *array_offset)
 {
assert(level < surf->levels);
assert(layer < surf->phys_level0_sa.array_len);
@@ -2167,7 +2184,15 @@ get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
}
 
*x_offset_sa = x;
-   *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
+   *z_offset_sa = 0;
+   if (surf->tiling == ISL_TILING_LINEAR) {
+  *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
+  *array_offset = 0;
+   } else {
+  assert(isl_tiling_is_std_y(surf->tiling));
+  *y_offset_sa = 0;
+  *array_offset = layer;
+   }
 }
 
 /**
@@ -2196,16 +2221,14 @@ isl_surf_get_image_offset_sa(const struct isl_surf 
*surf,
switch (surf->dim_layout) {
case ISL_DIM_LAYOUT_GEN9_1D:
   get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
-  x_offset_sa, y_offset_sa);
-  *z_offset_sa = 0;
-  *array_offset = 0;
+  x_offset_sa, y_offset_sa,
+  z_offset_sa, array_offset);
   break;
case ISL_DIM_LAYOUT_GEN4_2D:
   get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
   + logical_z_offset_px,
-  x_offset_sa, y_offset_sa);
-  *z_offset_sa = 0;
-  *array_offset = 0;
+  x_offset_sa, y_offset_sa,
+  z_offset_sa, array_offset);
   break;
case ISL_DIM_LAYOUT_GEN4_3D:
   get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/21] intel/blorp: Use isl_surf_get_uncompressed_surf

2018-02-22 Thread Jason Ekstrand
---
 src/intel/blorp/blorp_blit.c | 58 +---
 1 file changed, 17 insertions(+), 41 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 876498d..5b63754 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2349,50 +2349,26 @@ blorp_surf_convert_to_uncompressed(const struct 
isl_device *isl_dev,
 
assert(fmtl->bw > 1 || fmtl->bh > 1);
 
-   /* This is a compressed surface.  We need to convert it to a single
-* slice (because compressed layouts don't perfectly match uncompressed
-* ones with the same bpb) and divide x, y, width, and height by the
-* block size.
-*/
-   blorp_surf_convert_to_single_slice(isl_dev, info);
-
-   if (width && height) {
-#ifndef NDEBUG
-  uint32_t right_edge_px = info->tile_x_sa + *x + *width;
-  uint32_t bottom_edge_px = info->tile_y_sa + *y + *height;
-  assert(*width % fmtl->bw == 0 ||
- right_edge_px == info->surf.logical_level0_px.width);
-  assert(*height % fmtl->bh == 0 ||
- bottom_edge_px == info->surf.logical_level0_px.height);
-#endif
-  *width = DIV_ROUND_UP(*width, fmtl->bw);
-  *height = DIV_ROUND_UP(*height, fmtl->bh);
-   }
-
-   if (x && y) {
-  assert(*x % fmtl->bw == 0);
-  assert(*y % fmtl->bh == 0);
-  *x /= fmtl->bw;
-  *y /= fmtl->bh;
-   }
-
-   info->surf.logical_level0_px.width =
-  DIV_ROUND_UP(info->surf.logical_level0_px.width, fmtl->bw);
-   info->surf.logical_level0_px.height =
-  DIV_ROUND_UP(info->surf.logical_level0_px.height, fmtl->bh);
+   /* It's now an uncompressed surface so we need an uncompressed format */
+   info->view.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb);
 
-   assert(info->surf.phys_level0_sa.width % fmtl->bw == 0);
-   assert(info->surf.phys_level0_sa.height % fmtl->bh == 0);
-   info->surf.phys_level0_sa.width /= fmtl->bw;
-   info->surf.phys_level0_sa.height /= fmtl->bh;
+   /* We only one one level and slice */
+   info->view.levels = 1;
+   info->view.array_len = 1;
 
-   assert(info->tile_x_sa % fmtl->bw == 0);
-   assert(info->tile_y_sa % fmtl->bh == 0);
-   info->tile_x_sa /= fmtl->bw;
-   info->tile_y_sa /= fmtl->bh;
+   uint32_t offset_B;
+   isl_surf_get_uncompressed_surf(isl_dev, >surf, >view,
+  >surf, >view, _B,
+  >tile_x_sa, >tile_y_sa);
+   info->addr.offset += offset_B;
 
-   /* It's now an uncompressed surface so we need an uncompressed format */
-   info->surf.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb);
+   /* BLORP doesn't use the actual intratile offsets.  Instead, it needs the
+* surface to be a bit bigger and we offset the vertices instead.
+*/
+   info->surf.logical_level0_px.w += info->tile_x_sa;
+   info->surf.logical_level0_px.h += info->tile_y_sa;
+   info->surf.phys_level0_sa.w += info->tile_x_sa;
+   info->surf.phys_level0_sa.h += info->tile_y_sa;
 }
 
 void
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/21] intel/isl: Support Yf/Ys tiling in emit_depth_stencil_hiz

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl_emit_depth_stencil.c | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/intel/isl/isl_emit_depth_stencil.c 
b/src/intel/isl/isl_emit_depth_stencil.c
index 90ce889..901f406 100644
--- a/src/intel/isl/isl_emit_depth_stencil.c
+++ b/src/intel/isl/isl_emit_depth_stencil.c
@@ -56,6 +56,14 @@ static const uint32_t isl_to_gen_ds_surftype[] = {
[ISL_SURF_DIM_3D] = SURFTYPE_3D,
 };
 
+#if GEN_GEN >= 9
+static const uint8_t isl_tiling_to_gen_trmode[] = {
+   [ISL_TILING_Y0]  = NONE,
+   [ISL_TILING_Yf]  = TILEYF,
+   [ISL_TILING_Ys]  = TILEYS,
+};
+#endif
+
 void
 isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
const struct 
isl_depth_stencil_hiz_emit_info *restrict info)
@@ -95,7 +103,18 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device 
*dev, void *batch,
   db.DepthBufferMOCS = info->mocs;
 #endif
 
-#if GEN_GEN <= 6
+#if GEN_GEN >= 9
+  /* Gen9+ depth is always Y-tiled but it may be Y0, Yf, or Ys. */
+  assert(isl_tiling_is_any_y(info->depth_surf->tiling));
+  db.TiledResourceMode = 
isl_tiling_to_gen_trmode[info->depth_surf->tiling];
+
+  /* We don't use miptails yet.  The PRM recommends that you set "Mip Tail
+   * Start LOD" to 15 to prevent the hardware from trying to use them.
+   */
+  db.MipTailStartLOD = 15;
+#elif GEN_GEN >= 7
+  /* Gen7+ depth is always Y-tiled.  We don't even have a bit for it */
+#else
   db.TiledSurface = info->depth_surf->tiling != ISL_TILING_LINEAR;
   db.TileWalk = info->depth_surf->tiling == ISL_TILING_Y0 ? 
TILEWALK_YMAJOR :
 
TILEWALK_XMAJOR;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/21] intel/isl: Fill out the correct phys_total_extent for Ys/Yf

2018-02-22 Thread Jason Ekstrand
With these tilings, everything is aligned to a tile and the tiled
surface size calculations will handle the array stride for us.  We need
to provide an accurate 4D size so that 3D and multisampled images get
tiled correctly.
---
 src/intel/isl/isl.c | 51 ---
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index d3efc76..35793b3 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -1039,14 +1039,24 @@ isl_calc_phys_total_extent_el_gen4_2d(
image_align_sa, phys_level0_sa,
array_pitch_span,
_slice0_sa);
-   uint32_t array_len = MAX(phys_level0_sa->d, phys_level0_sa->a);
-   *phys_total_el = (struct isl_extent4d) {
-  .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
-  .h = *array_pitch_el_rows * (array_len - 1) +
-   isl_assert_div(phys_slice0_sa.h, fmtl->bh),
-  .d = 1,
-  .a = 1,
-   };
+
+   if (isl_tiling_is_std_y(tile_info->tiling)) {
+  *phys_total_el = (struct isl_extent4d) {
+ .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
+ .h = isl_assert_div(phys_slice0_sa.h, fmtl->bh),
+ .d = isl_assert_div(phys_level0_sa->depth, fmtl->bd),
+ .a = phys_level0_sa->array_len,
+  };
+   } else {
+  uint32_t array_len = MAX(phys_level0_sa->d, phys_level0_sa->a);
+  *phys_total_el = (struct isl_extent4d) {
+ .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
+ .h = *array_pitch_el_rows * (array_len - 1) +
+  isl_assert_div(phys_slice0_sa.h, fmtl->bh),
+ .d = 1,
+ .a = 1,
+  };
+   }
 }
 
 /**
@@ -1186,6 +1196,7 @@ static void
 isl_calc_phys_total_extent_el_gen9_1d(
   const struct isl_device *dev,
   const struct isl_surf_init_info *restrict info,
+  const struct isl_tile_info *tile_info,
   const struct isl_extent3d *image_align_sa,
   const struct isl_extent4d *phys_level0_sa,
   uint32_t *array_pitch_el_rows,
@@ -1209,12 +1220,22 @@ isl_calc_phys_total_extent_el_gen9_1d(
}
 
*array_pitch_el_rows = 1;
-   *phys_total_el = (struct isl_extent4d) {
-  .w = isl_assert_div(slice_w, fmtl->bw),
-  .h = phys_level0_sa->array_len,
-  .d = 1,
-  .a = 1,
-   };
+   if (tile_info->tiling == ISL_TILING_LINEAR) {
+  *phys_total_el = (struct isl_extent4d) {
+ .w = isl_assert_div(slice_w, fmtl->bw),
+ .h = phys_level0_sa->array_len,
+ .d = 1,
+ .a = 1,
+  };
+   } else {
+  assert(isl_tiling_is_std_y(tile_info->tiling));
+  *phys_total_el = (struct isl_extent4d) {
+ .w = isl_assert_div(slice_w, fmtl->bw),
+ .h = 1,
+ .d = 1,
+ .a = phys_level0_sa->array_len,
+  };
+   }
 }
 
 /**
@@ -1236,7 +1257,7 @@ isl_calc_phys_total_extent_el(const struct isl_device 
*dev,
switch (dim_layout) {
case ISL_DIM_LAYOUT_GEN9_1D:
   assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
-  isl_calc_phys_total_extent_el_gen9_1d(dev, info,
+  isl_calc_phys_total_extent_el_gen9_1d(dev, info, tile_info,
 image_align_sa, phys_level0_sa,
 array_pitch_el_rows,
 phys_total_el);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/21] intel/isl: Take a dim and sample count in isl_tiling_get_info

2018-02-22 Thread Jason Ekstrand
The layout of Yf and Ys tiles are dependent on these parameters.
---
 src/intel/blorp/blorp_blit.c   |  5 +++--
 src/intel/isl/isl.c| 15 ++-
 src/intel/isl/isl.h|  8 ++--
 src/mesa/drivers/dri/i965/intel_blit.c |  5 +++--
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 46cfc37..876498d 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1928,8 +1928,9 @@ shrink_surface_params(const struct isl_device *dev,
x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
uint32_t tile_z_sa, tile_a;
-   isl_tiling_get_intratile_offset_sa(info->surf.tiling,
-  info->surf.format, info->surf.row_pitch,
+   isl_tiling_get_intratile_offset_sa(info->surf.tiling, info->surf.dim,
+  info->surf.format, info->surf.samples,
+  info->surf.row_pitch,
   info->surf.array_pitch_el_rows,
   x_offset_sa, y_offset_sa, 0, 0,
   _offset,
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 95c1271..aa56a3c 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -173,7 +173,8 @@ isl_tiling_get_info(enum isl_tiling tiling,
*/
   assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
   assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
-  isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
+  assert(samples == 1);
+  isl_tiling_get_info(tiling, dim, format_bpb / 3, samples, tile_info);
   return;
}
 
@@ -1430,7 +1431,7 @@ isl_surf_init_s(const struct isl_device *dev,
   return false;
 
struct isl_tile_info tile_info;
-   isl_tiling_get_info(tiling, fmtl->bpb, _info);
+   isl_tiling_get_info(tiling, info->dim, fmtl->bpb, info->samples, 
_info);
 
const enum isl_dim_layout dim_layout =
   isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
@@ -2037,7 +2038,7 @@ get_image_offset_sa_gen6_stencil_hiz(const struct 
isl_surf *surf,
   isl_surf_get_image_alignment_sa(surf);
 
struct isl_tile_info tile_info;
-   isl_tiling_get_info(surf->tiling, fmtl->bpb, _info);
+   isl_surf_get_tile_info(surf, _info);
const struct isl_extent2d tile_extent_sa = {
   .w = tile_info.logical_extent_el.w * fmtl->bw,
   .h = tile_info.logical_extent_el.h * fmtl->bh,
@@ -2253,7 +2254,8 @@ isl_surf_get_image_offset_B_tile_el(const struct isl_surf 
*surf,
 _array_offset);
 
uint32_t z_offset_el, array_offset;
-   isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
+   isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
+  fmtl->bpb, surf->samples,
   surf->row_pitch,
   surf->array_pitch_el_rows,
   total_x_offset_el,
@@ -2312,7 +2314,9 @@ isl_surf_get_image_surf(const struct isl_device *dev,
 
 void
 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
+   enum isl_surf_dim dim,
uint32_t bpb,
+   uint32_t samples,
uint32_t row_pitch_B,
uint32_t array_pitch_el_rows,
uint32_t total_x_offset_el,
@@ -2327,6 +2331,7 @@ isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
 {
if (tiling == ISL_TILING_LINEAR) {
   assert(bpb % 8 == 0);
+  assert(samples == 1);
   assert(total_z_offset_el == 0 && total_array_offset == 0);
   *base_address_offset = total_y_offset_el * row_pitch_B +
  total_x_offset_el * (bpb / 8);
@@ -2338,7 +2343,7 @@ isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
}
 
struct isl_tile_info tile_info;
-   isl_tiling_get_info(tiling, bpb, _info);
+   isl_tiling_get_info(tiling, dim, bpb, samples, _info);
 
/* Pitches must make sense with the tiling */
assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index aa5bfed..8333a8c 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1969,7 +1969,9 @@ isl_surf_get_image_surf(const struct isl_device *dev,
  */
 void
 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
+   enum isl_surf_dim dim,
uint32_t bpb,
+   uint32_t samples,
uint32_t row_pitch,
uint32_t array_pitch_el_rows,
uint32_t 

[Mesa-dev] [PATCH 11/21] intel/isl: Use the depth field of phys_level0_sa for GEN4_2D 3D surfaces

2018-02-22 Thread Jason Ekstrand
This makes things a tiny bit stickier in isl_calc_phys_total_extent_el
but will be worth it when we enable Yf and Ys.
---
 src/intel/isl/isl.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 68a92b8..d3efc76 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -827,21 +827,10 @@ isl_calc_phys_level0_extent_sa(const struct isl_device 
*dev,
  unreachable("bad isl_dim_layout");
 
   case ISL_DIM_LAYOUT_GEN4_2D:
- assert(ISL_DEV_GEN(dev) >= 9);
-
+  case ISL_DIM_LAYOUT_GEN4_3D:
  *phys_level0_sa = (struct isl_extent4d) {
 .w = isl_align_npot(info->width, fmtl->bw),
 .h = isl_align_npot(info->height, fmtl->bh),
-.d = 1,
-.a = info->depth,
- };
- break;
-
-  case ISL_DIM_LAYOUT_GEN4_3D:
- assert(ISL_DEV_GEN(dev) < 9);
- *phys_level0_sa = (struct isl_extent4d) {
-.w = isl_align(info->width, fmtl->bw),
-.h = isl_align(info->height, fmtl->bh),
 .d = info->depth,
 .a = 1,
  };
@@ -968,8 +957,6 @@ isl_calc_phys_slice0_extent_sa_gen4_2d(
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
 
-   assert(phys_level0_sa->depth == 1);
-
if (info->levels == 1) {
   /* Do not pad the surface to the image alignment. Instead, pad it only
* to the pixel format's block alignment.
@@ -1052,9 +1039,10 @@ isl_calc_phys_total_extent_el_gen4_2d(
image_align_sa, phys_level0_sa,
array_pitch_span,
_slice0_sa);
+   uint32_t array_len = MAX(phys_level0_sa->d, phys_level0_sa->a);
*phys_total_el = (struct isl_extent4d) {
   .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
-  .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
+  .h = *array_pitch_el_rows * (array_len - 1) +
isl_assert_div(phys_slice0_sa.h, fmtl->bh),
   .d = 1,
   .a = 1,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/21] intel/isl: Don't compute image tiling data for Yf/Ys tiling

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl_storage_image.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/intel/isl/isl_storage_image.c 
b/src/intel/isl/isl_storage_image.c
index 0ae79a2..b0556c7 100644
--- a/src/intel/isl/isl_storage_image.c
+++ b/src/intel/isl/isl_storage_image.c
@@ -233,6 +233,17 @@ isl_surf_fill_image_param(const struct isl_device *dev,
view->base_array_layer;
}
 
+   if (isl_tiling_is_std_y(surf->tiling)) {
+  /* The shader code for doing manual tiling calculations doesn't support
+   * Yf or Ys tiling.  Fortunately, we never need it on gen9 where Yf and
+   * Ys were added.
+   */
+  assert(ISL_DEV_GEN(dev) >= 9);
+  assert(isl_has_matching_typed_storage_image_format(dev->info,
+ view->format));
+  return;
+   }
+
uint32_t tile_z_el, phys_array_layer;
isl_surf_get_image_offset_el(surf, view->base_level,
 surf->dim == ISL_SURF_DIM_3D ?
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/21] intel/isl: Support Ys and Yf in isl_surf_get_uncompressed_surf

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl.c | 47 ++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 74e248d..f93db0c 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -2419,7 +2419,52 @@ isl_surf_get_uncompressed_surf(const struct isl_device 
*dev,
const uint32_t ucompr_width = isl_align_div_npot(view_width, fmtl->bw);
const uint32_t ucompr_height = isl_align_div_npot(view_height, fmtl->bh);
 
-   {
+   if (isl_tiling_is_std_y(surf->tiling)) {
+  /* Offset to the given miplevel.  Because we're using standard tilings
+   * with no miptail, arrays and 3D textures should just work so long as
+   * we have the right array stride in the end.
+   */
+  isl_surf_get_image_offset_B_tile_el(surf, view->base_level, 0, 0,
+  offset_B, x_offset_sa, y_offset_sa);
+  /* Ys and Yf should have no intratile X or Y offset */
+  assert(*x_offset_sa == 0 && *y_offset_sa == 0);
+
+  /* Save off the array pitch */
+  const uint32_t array_pitch_el_rows = surf->array_pitch_el_rows;
+
+  const uint32_t view_depth =
+ isl_minify(surf->logical_level0_px.depth, view->base_level);
+  const uint32_t ucompr_depth = isl_align_div_npot(view_depth, fmtl->bd);
+
+  bool ok UNUSED;
+  ok = isl_surf_init(dev, ucompr_surf,
+ .dim = surf->dim,
+ .format = view->format,
+ .width = ucompr_width,
+ .height = ucompr_height,
+ .depth = ucompr_depth,
+ .levels = 1,
+ .array_len = surf->logical_level0_px.array_len,
+ .samples = surf->samples,
+ .row_pitch = surf->row_pitch,
+ .usage = surf->usage,
+ .tiling_flags = (1u << surf->tiling));
+  assert(ok);
+
+  /* Use the array pitch from the original surface.  This way 2D arrays
+   * and 3D textures should work properly, just with one LOD.
+   */
+  assert(ucompr_surf->array_pitch_el_rows <= array_pitch_el_rows);
+  ucompr_surf->array_pitch_el_rows = array_pitch_el_rows;
+
+  /* The newly created image represents only the one miplevel so we
+   * need to adjust the view accordingly.  Because we offset it to
+   * miplevel but used a Z and array slice of 0, the array range can be
+   * left alone.
+   */
+  *ucompr_view = *view;
+  ucompr_view->base_level = 0;
+   } else {
   /* For legacy tilings, we just make a new 2D surface which represents
* the single slice of the main surface.  Due to hardware restrictions
* with intratile offsets, we can only handle a single slice.
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/21] i965: Explicitly disable Yf and Ys tiling

2018-02-22 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 59dbfb9..978253a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -578,6 +578,11 @@ make_surface(struct brw_context *brw, GLenum target, 
mesa_format format,
num_samples, width0, height0, depth0,
first_level, last_level, mt);
 
+   /* We don't support Yf or Ys in i965 yet because we use the blitter too
+* much and it can't handle them.
+*/
+   tiling_flags &= ~ISL_TILING_STD_Y_MASK;
+
struct isl_surf_init_info init_info = {
   .dim = get_isl_surf_dim(target),
   .format = translate_tex_format(brw, format, false),
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/21] intel/isl: Use the tile size for computing standard Y alignments

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl_gen9.c | 85 +---
 1 file changed, 9 insertions(+), 76 deletions(-)

diff --git a/src/intel/isl/isl_gen9.c b/src/intel/isl/isl_gen9.c
index e5d0f95..f9d391a 100644
--- a/src/intel/isl/isl_gen9.c
+++ b/src/intel/isl/isl_gen9.c
@@ -25,77 +25,6 @@
 #include "isl_gen9.h"
 #include "isl_priv.h"
 
-/**
- * Calculate the surface's subimage alignment, in units of surface samples,
- * for the standard tiling formats Yf and Ys.
- */
-static void
-gen9_calc_std_image_alignment_sa(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict 
info,
- enum isl_tiling tiling,
- enum isl_msaa_layout msaa_layout,
- struct isl_extent3d *align_sa)
-{
-   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
-
-   assert(isl_tiling_is_std_y(tiling));
-
-   const uint32_t bpb = fmtl->bpb;
-   const uint32_t is_Ys = tiling == ISL_TILING_Ys;
-
-   switch (info->dim) {
-   case ISL_SURF_DIM_1D:
-  /* See the Skylake BSpec > Memory Views > Common Surface Formats > 
Surface
-   * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
-   */
-  *align_sa = (struct isl_extent3d) {
- .w = 1 << (12 - (ffs(bpb) - 4) + (4 * is_Ys)),
- .h = 1,
- .d = 1,
-  };
-  return;
-   case ISL_SURF_DIM_2D:
-  /* See the Skylake BSpec > Memory Views > Common Surface Formats >
-   * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment
-   * Requirements.
-   */
-  *align_sa = (struct isl_extent3d) {
- .w = 1 << (6 - ((ffs(bpb) - 4) / 2) + (4 * is_Ys)),
- .h = 1 << (6 - ((ffs(bpb) - 3) / 2) + (4 * is_Ys)),
- .d = 1,
-  };
-
-  if (is_Ys) {
- /* FINISHME(chadv): I don't trust this code. Untested. */
- isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__);
-
- switch (msaa_layout) {
- case ISL_MSAA_LAYOUT_NONE:
- case ISL_MSAA_LAYOUT_INTERLEAVED:
-break;
- case ISL_MSAA_LAYOUT_ARRAY:
-align_sa->w >>= (ffs(info->samples) - 0) / 2;
-align_sa->h >>= (ffs(info->samples) - 1) / 2;
-break;
- }
-  }
-  return;
-
-   case ISL_SURF_DIM_3D:
-  /* See the Skylake BSpec > Memory Views > Common Surface Formats > 
Surface
-   * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
-   */
-  *align_sa = (struct isl_extent3d) {
- .w = 1 << (4 - ((ffs(bpb) - 2) / 3) + (4 * is_Ys)),
- .h = 1 << (4 - ((ffs(bpb) - 4) / 3) + (2 * is_Ys)),
- .d = 1 << (4 - ((ffs(bpb) - 3) / 3) + (2 * is_Ys)),
-  };
-  return;
-   }
-
-   unreachable("bad isl_surface_type");
-}
-
 void
 isl_gen9_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict 
info,
@@ -166,11 +95,15 @@ isl_gen9_choose_image_alignment_el(const struct isl_device 
*dev,
 */
 
if (isl_tiling_is_std_y(tiling)) {
-  struct isl_extent3d image_align_sa;
-  gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout,
- _align_sa);
-
-  *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa);
+  /* Ys and Yf tiled images are aligned to the tile size */
+  struct isl_tile_info tile_info;
+  isl_tiling_get_info(tiling, info->dim, fmtl->bpb,
+  info->samples, _info);
+  *image_align_el = (struct isl_extent3d) {
+ .w = tile_info.logical_extent_el.w,
+ .h = tile_info.logical_extent_el.h,
+ .d = tile_info.logical_extent_el.d,
+  };
   return;
}
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/21] intel/isl: Support Yf/Ys tiling in surf_fill_state

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl_surface_state.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/intel/isl/isl_surface_state.c 
b/src/intel/isl/isl_surface_state.c
index bfb27fa..a1d5fc2 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -74,6 +74,14 @@ static const uint8_t isl_to_gen_tiling[] = {
 };
 #endif
 
+#if GEN_GEN >= 9
+static const uint8_t isl_tiling_to_gen_trmode[] = {
+   [ISL_TILING_Y0]  = NONE,
+   [ISL_TILING_Yf]  = TILEYF,
+   [ISL_TILING_Ys]  = TILEYS,
+};
+#endif
+
 #if GEN_GEN >= 7
 static const uint32_t isl_to_gen_multisample_layout[] = {
[ISL_MSAA_LAYOUT_NONE]   = MSFMT_MSS,
@@ -412,7 +420,6 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
/* We don't use miptails yet.  The PRM recommends that you set "Mip Tail
 * Start LOD" to 15 to prevent the hardware from trying to use them.
 */
-   s.TiledResourceMode = NONE;
s.MipTailStartLOD = 15;
 #endif
 
@@ -438,7 +445,15 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
ISL_ARRAY_PITCH_SPAN_COMPACT;
 #endif
 
-#if GEN_GEN >= 8
+#if GEN_GEN >= 9
+   s.TileMode = isl_to_gen_tiling[info->surf->tiling];
+   if (isl_tiling_is_any_y(info->surf->tiling)) {
+  /* 1D Yf/Ys is supposed to have a tile mode of linear */
+  if (info->surf->dim == ISL_SURF_DIM_1D)
+ s.TileMode = LINEAR;
+  s.TiledResourceMode = isl_tiling_to_gen_trmode[info->surf->tiling];
+   }
+#elif GEN_GEN >= 8
s.TileMode = isl_to_gen_tiling[info->surf->tiling];
 #else
s.TiledSurface = info->surf->tiling != ISL_TILING_LINEAR,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/21] intel/isl: Use a 4D physical total extent for size calculations

2018-02-22 Thread Jason Ekstrand
With Yf and Ys tiling, everything is actually four dimensional because
we can have multiple depth or multisampled array slices in the same
tile.  This commit just enhances the calculations so they can handle it.
---
 src/intel/isl/isl.c | 71 +++--
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 5bdc726..46aff46 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -988,7 +988,7 @@ isl_calc_phys_total_extent_el_gen4_2d(
   const struct isl_extent4d *phys_level0_sa,
   enum isl_array_pitch_span array_pitch_span,
   uint32_t *array_pitch_el_rows,
-  struct isl_extent2d *total_extent_el)
+  struct isl_extent4d *phys_total_el)
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
 
@@ -1001,10 +1001,12 @@ isl_calc_phys_total_extent_el_gen4_2d(
image_align_sa, phys_level0_sa,
array_pitch_span,
_slice0_sa);
-   *total_extent_el = (struct isl_extent2d) {
+   *phys_total_el = (struct isl_extent4d) {
   .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
   .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
isl_assert_div(phys_slice0_sa.h, fmtl->bh),
+  .d = 1,
+  .a = 1,
};
 }
 
@@ -1019,7 +1021,7 @@ isl_calc_phys_total_extent_el_gen4_3d(
   const struct isl_extent3d *image_align_sa,
   const struct isl_extent4d *phys_level0_sa,
   uint32_t *array_pitch_el_rows,
-  struct isl_extent2d *phys_total_el)
+  struct isl_extent4d *phys_total_el)
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
 
@@ -1066,9 +1068,11 @@ isl_calc_phys_total_extent_el_gen4_3d(
 */
*array_pitch_el_rows =
   isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
-   *phys_total_el = (struct isl_extent2d) {
+   *phys_total_el = (struct isl_extent4d) {
   .w = isl_assert_div(total_w, fmtl->bw),
   .h = isl_assert_div(total_h, fmtl->bh),
+  .d = 1,
+  .a = 1,
};
 }
 
@@ -1084,7 +1088,7 @@ isl_calc_phys_total_extent_el_gen6_stencil_hiz(
   const struct isl_extent3d *image_align_sa,
   const struct isl_extent4d *phys_level0_sa,
   uint32_t *array_pitch_el_rows,
-  struct isl_extent2d *phys_total_el)
+  struct isl_extent4d *phys_total_el)
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
 
@@ -1127,9 +1131,11 @@ isl_calc_phys_total_extent_el_gen6_stencil_hiz(
 
*array_pitch_el_rows =
   isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
-   *phys_total_el = (struct isl_extent2d) {
+   *phys_total_el = (struct isl_extent4d) {
   .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
   .h = isl_assert_div(total_h, fmtl->bh),
+  .d = 1,
+  .a = 1,
};
 }
 
@@ -1144,7 +1150,7 @@ isl_calc_phys_total_extent_el_gen9_1d(
   const struct isl_extent3d *image_align_sa,
   const struct isl_extent4d *phys_level0_sa,
   uint32_t *array_pitch_el_rows,
-  struct isl_extent2d *phys_total_el)
+  struct isl_extent4d *phys_total_el)
 {
MAYBE_UNUSED const struct isl_format_layout *fmtl = 
isl_format_get_layout(info->format);
 
@@ -1164,9 +1170,11 @@ isl_calc_phys_total_extent_el_gen9_1d(
}
 
*array_pitch_el_rows = 1;
-   *phys_total_el = (struct isl_extent2d) {
+   *phys_total_el = (struct isl_extent4d) {
   .w = isl_assert_div(slice_w, fmtl->bw),
   .h = phys_level0_sa->array_len,
+  .d = 1,
+  .a = 1,
};
 }
 
@@ -1184,7 +1192,7 @@ isl_calc_phys_total_extent_el(const struct isl_device 
*dev,
   const struct isl_extent4d *phys_level0_sa,
   enum isl_array_pitch_span array_pitch_span,
   uint32_t *array_pitch_el_rows,
-  struct isl_extent2d *total_extent_el)
+  struct isl_extent4d *phys_total_el)
 {
switch (dim_layout) {
case ISL_DIM_LAYOUT_GEN9_1D:
@@ -1192,14 +1200,14 @@ isl_calc_phys_total_extent_el(const struct isl_device 
*dev,
   isl_calc_phys_total_extent_el_gen9_1d(dev, info,
 image_align_sa, phys_level0_sa,
 array_pitch_el_rows,
-total_extent_el);
+phys_total_el);
   return;
case ISL_DIM_LAYOUT_GEN4_2D:
   isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
 image_align_sa, phys_level0_sa,
 array_pitch_span,
 array_pitch_el_rows,
-total_extent_el);
+

[Mesa-dev] [PATCH 04/21] intel/isl: Make tile logical extents four dimensional

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl.c | 36 
 src/intel/isl/isl.h |  2 +-
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index cbe295b..5bdc726 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -160,7 +160,8 @@ isl_tiling_get_info(enum isl_tiling tiling,
 struct isl_tile_info *tile_info)
 {
const uint32_t bs = format_bpb / 8;
-   struct isl_extent2d logical_el, phys_B;
+   struct isl_extent4d logical_el;
+   struct isl_extent2d phys_B;
 
if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
   /* It is possible to have non-power-of-two formats in a tiled buffer.
@@ -177,25 +178,25 @@ isl_tiling_get_info(enum isl_tiling tiling,
switch (tiling) {
case ISL_TILING_LINEAR:
   assert(bs > 0);
-  logical_el = isl_extent2d(1, 1);
+  logical_el = isl_extent4d(1, 1, 1, 1);
   phys_B = isl_extent2d(bs, 1);
   break;
 
case ISL_TILING_X:
   assert(bs > 0);
-  logical_el = isl_extent2d(512 / bs, 8);
+  logical_el = isl_extent4d(512 / bs, 8, 1, 1);
   phys_B = isl_extent2d(512, 8);
   break;
 
case ISL_TILING_Y0:
   assert(bs > 0);
-  logical_el = isl_extent2d(128 / bs, 32);
+  logical_el = isl_extent4d(128 / bs, 32, 1, 1);
   phys_B = isl_extent2d(128, 32);
   break;
 
case ISL_TILING_W:
   assert(bs == 1);
-  logical_el = isl_extent2d(64, 64);
+  logical_el = isl_extent4d(64, 64, 1, 1);
   /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
*
*"If the surface is a stencil buffer (and thus has Tile Mode set
@@ -218,7 +219,7 @@ isl_tiling_get_info(enum isl_tiling tiling,
   unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
   unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
 
-  logical_el = isl_extent2d(width / bs, height);
+  logical_el = isl_extent4d(width / bs, height, 1, 1);
   phys_B = isl_extent2d(width, height);
   break;
}
@@ -229,7 +230,7 @@ isl_tiling_get_info(enum isl_tiling tiling,
* Y-tiling but actually has two HiZ columns per Y-tiled column.
*/
   assert(bs == 16);
-  logical_el = isl_extent2d(16, 16);
+  logical_el = isl_extent4d(16, 16, 1, 1);
   phys_B = isl_extent2d(128, 32);
   break;
 
@@ -252,7 +253,7 @@ isl_tiling_get_info(enum isl_tiling tiling,
* is 128x256 elements.
*/
   assert(format_bpb == 1 || format_bpb == 2);
-  logical_el = isl_extent2d(128, 256 / format_bpb);
+  logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1);
   phys_B = isl_extent2d(128, 32);
   break;
 
@@ -2301,7 +2302,10 @@ isl_tiling_get_intratile_offset_el(enum isl_tiling 
tiling,
struct isl_tile_info tile_info;
isl_tiling_get_info(tiling, bpb, _info);
 
+   /* Pitches must make sense with the tiling */
assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
+   assert(array_pitch_el_rows % tile_info.logical_extent_el.d == 0);
+   assert(array_pitch_el_rows % tile_info.logical_extent_el.a == 0);
 
/* For non-power-of-two formats, we need the address to be both tile and
 * element-aligned.  The easiest way to achieve this is to work with a tile
@@ -2318,14 +2322,22 @@ isl_tiling_get_intratile_offset_el(enum isl_tiling 
tiling,
/* Compute the offset into the tile */
*x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
*y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
-   assert(total_z_offset_el == 0);
-   assert(total_array_offset == 0);
-   *z_offset_el = 0;
-   *array_offset = 0;
+   *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d;
+   *array_offset = total_array_offset % tile_info.logical_extent_el.a;
 
/* Compute the offset of the tile in units of whole tiles */
uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
+   uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d;
+   uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a;
+
+   /* Compute an array pitch in number of tiles */
+   uint32_t array_pitch_tl_rows =
+  array_pitch_el_rows / MAX2(tile_info.logical_extent_el.d,
+ tile_info.logical_extent_el.a);
+
+   /* Add the Z and array offset to the Y offset to get a 2D offset */
+   y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows;
 
*base_address_offset =
   y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 6cee631..cdc76fe 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1065,7 +1065,7 @@ struct isl_tile_info {
 * The exact value of this field depends heavily on the bits-per-block of
 * the format being used.
 */
-   struct isl_extent2d logical_extent_el;
+   

[Mesa-dev] [PATCH 03/21] intel/isl: Make the offset helpers four dimensional

2018-02-22 Thread Jason Ekstrand
We need to do this in order to handle Yf and Ys tiling because they use
a four-dimensional tile instead of laying everything out in two
dimensions.
---
 src/intel/blorp/blorp_blit.c   |  8 ++-
 src/intel/isl/isl.c| 60 ++
 src/intel/isl/isl.h| 37 ++---
 src/intel/isl/isl_storage_image.c  |  6 ++-
 .../isl/tests/isl_surf_get_image_offset_test.c |  4 +-
 src/mesa/drivers/dri/i965/intel_blit.c |  7 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  |  7 ++-
 7 files changed, 102 insertions(+), 27 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 0757db0..46cfc37 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1927,11 +1927,15 @@ shrink_surface_params(const struct isl_device *dev,
 */
x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
+   uint32_t tile_z_sa, tile_a;
isl_tiling_get_intratile_offset_sa(info->surf.tiling,
   info->surf.format, info->surf.row_pitch,
-  x_offset_sa, y_offset_sa,
+  info->surf.array_pitch_el_rows,
+  x_offset_sa, y_offset_sa, 0, 0,
   _offset,
-  >tile_x_sa, >tile_y_sa);
+  >tile_x_sa, >tile_y_sa,
+  _z_sa, _a);
+   assert(tile_z_sa == 0 && tile_a == 0);
 
info->addr.offset += byte_offset;
 
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index bba7310..cbe295b 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -2087,7 +2087,9 @@ isl_surf_get_image_offset_sa(const struct isl_surf *surf,
  uint32_t logical_array_layer,
  uint32_t logical_z_offset_px,
  uint32_t *x_offset_sa,
- uint32_t *y_offset_sa)
+ uint32_t *y_offset_sa,
+ uint32_t *z_offset_sa,
+ uint32_t *array_offset)
 {
assert(level < surf->levels);
assert(logical_array_layer < surf->logical_level0_px.array_len);
@@ -2098,21 +2100,29 @@ isl_surf_get_image_offset_sa(const struct isl_surf 
*surf,
case ISL_DIM_LAYOUT_GEN9_1D:
   get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
   x_offset_sa, y_offset_sa);
+  *z_offset_sa = 0;
+  *array_offset = 0;
   break;
case ISL_DIM_LAYOUT_GEN4_2D:
   get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
   + logical_z_offset_px,
   x_offset_sa, y_offset_sa);
+  *z_offset_sa = 0;
+  *array_offset = 0;
   break;
case ISL_DIM_LAYOUT_GEN4_3D:
   get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
   logical_z_offset_px,
   x_offset_sa, y_offset_sa);
+  *z_offset_sa = 0;
+  *array_offset = 0;
   break;
case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
   get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
logical_z_offset_px,
x_offset_sa, y_offset_sa);
+  *z_offset_sa = 0;
+  *array_offset = 0;
   break;
 
default:
@@ -2126,7 +2136,9 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf,
  uint32_t logical_array_layer,
  uint32_t logical_z_offset_px,
  uint32_t *x_offset_el,
- uint32_t *y_offset_el)
+ uint32_t *y_offset_el,
+ uint32_t *z_offset_el,
+ uint32_t *array_offset)
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
 
@@ -2135,15 +2147,18 @@ isl_surf_get_image_offset_el(const struct isl_surf 
*surf,
assert(logical_z_offset_px
   < isl_minify(surf->logical_level0_px.depth, level));
 
-   uint32_t x_offset_sa, y_offset_sa;
+   uint32_t x_offset_sa, y_offset_sa, z_offset_sa;
isl_surf_get_image_offset_sa(surf, level,
 logical_array_layer,
 logical_z_offset_px,
 _offset_sa,
-_offset_sa);
+_offset_sa,
+_offset_sa,
+array_offset);
 
*x_offset_el = x_offset_sa / fmtl->bw;
*y_offset_el = y_offset_sa / fmtl->bh;
+   *z_offset_el = z_offset_sa / 

[Mesa-dev] [PATCH 08/21] intel/isl: Implement correct tile size calculations for Ys/Yf

2018-02-22 Thread Jason Ekstrand
The tile size calculations use a clever bit of math to make them short
and simple.  We add unit tests to assert that they identically match the
tables in the PRM.
---
 src/intel/Makefile.isl.am |   9 +-
 src/intel/isl/isl.c   |  56 ++-
 src/intel/isl/meson.build |  11 ++
 src/intel/isl/tests/isl_tile_std_y_test.c | 160 ++
 4 files changed, 230 insertions(+), 6 deletions(-)
 create mode 100644 src/intel/isl/tests/isl_tile_std_y_test.c

diff --git a/src/intel/Makefile.isl.am b/src/intel/Makefile.isl.am
index 9525f9e..a498f2f 100644
--- a/src/intel/Makefile.isl.am
+++ b/src/intel/Makefile.isl.am
@@ -75,7 +75,9 @@ isl/isl_format_layout.c: isl/gen_format_layout.py \
 #  Tests
 # 
 
-check_PROGRAMS += isl/tests/isl_surf_get_image_offset_test
+check_PROGRAMS += \
+   isl/tests/isl_surf_get_image_offset_test \
+   isl/tests/isl_tile_std_y_test
 
 TESTS += $(check_PROGRAMS)
 
@@ -84,6 +86,11 @@ isl_tests_isl_surf_get_image_offset_test_LDADD = \
isl/libisl.la \
-lm
 
+isl_tests_isl_tile_std_y_test_LDADD = \
+   common/libintel_common.la \
+   isl/libisl.la \
+   -lm
+
 # 
 
 EXTRA_DIST += \
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index aa56a3c..fcbe2ad 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -217,13 +217,59 @@ isl_tiling_get_info(enum isl_tiling tiling,
case ISL_TILING_Yf:
case ISL_TILING_Ys: {
   bool is_Ys = tiling == ISL_TILING_Ys;
+  assert(format_bpb >= 8);
 
-  assert(bs > 0);
-  unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
-  unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
+  switch (dim) {
+  case ISL_SURF_DIM_1D:
+ /* See the Skylake BSpec > Memory Views > Common Surface Formats >
+  * Surface Layout and Tiling > 1D Surfaces > 1D Alignment
+  * Requirements.
+  */
+ logical_el = (struct isl_extent4d) {
+.w = 1 << (12 - (ffs(format_bpb) - 4) + (4 * is_Ys)),
+.h = 1,
+.d = 1,
+.a = 1,
+ };
+ break;
+
+  case ISL_SURF_DIM_2D:
+ /* See the Skylake BSpec > Memory Views > Common Surface Formats >
+  * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment
+  * Requirements.
+  */
+ logical_el = (struct isl_extent4d) {
+.w = 1 << (6 - ((ffs(format_bpb) - 4) / 2) + (2 * is_Ys)),
+.h = 1 << (6 - ((ffs(format_bpb) - 3) / 2) + (2 * is_Ys)),
+.d = 1,
+.a = 1,
+ };
+
+ if (is_Ys && samples > 1) {
+logical_el.w >>= (ffs(samples) - 0) / 2;
+logical_el.h >>= (ffs(samples) - 1) / 2;
+logical_el.a = samples;
+ }
+ break;
+
+  case ISL_SURF_DIM_3D:
+ /* See the Skylake BSpec > Memory Views > Common Surface Formats >
+  * Surface Layout and Tiling > 3D Surfaces > 3D Alignment
+  * Requirements.
+  */
+ logical_el = (struct isl_extent4d) {
+.w = 1 << (4 - ((ffs(format_bpb) - 2) / 3) + (2 * is_Ys)),
+.h = 1 << (4 - ((ffs(format_bpb) - 4) / 3) + (1 * is_Ys)),
+.d = 1 << (4 - ((ffs(format_bpb) - 3) / 3) + (1 * is_Ys)),
+.a = 1,
+ };
+ break;
+  }
+
+  uint32_t tile_size_B = is_Ys ? (1 << 16) : (1 << 12);
 
-  logical_el = isl_extent4d(width / bs, height, 1, 1);
-  phys_B = isl_extent2d(width, height);
+  phys_B.w = logical_el.width * bs;
+  phys_B.h = tile_size_B / phys_B.w;
   break;
}
 
diff --git a/src/intel/isl/meson.build b/src/intel/isl/meson.build
index 36b8b8f..ad0d5cc 100644
--- a/src/intel/isl/meson.build
+++ b/src/intel/isl/meson.build
@@ -98,4 +98,15 @@ if with_tests
   link_with : [libisl, libintel_common],
 )
   )
+
+  test(
+'isl_tile_std_y',
+executable(
+  'isl_tile_std_y_test',
+  'tests/isl_tile_std_y_test.c',
+  dependencies : dep_m,
+  include_directories : [inc_common, inc_intel],
+  link_with : [libisl, libintel_common],
+)
+  )
 endif
diff --git a/src/intel/isl/tests/isl_tile_std_y_test.c 
b/src/intel/isl/tests/isl_tile_std_y_test.c
new file mode 100644
index 000..25053c6
--- /dev/null
+++ b/src/intel/isl/tests/isl_tile_std_y_test.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished 

[Mesa-dev] [PATCH 00/21] intel/isl: Add support for Yf and Ys tiling

2018-02-22 Thread Jason Ekstrand
This patch series adds support to ISL for Yf and Ys tiling on gen9+.  It
really didn't end up being as bad as I thought it would be.  Most of the
work was in massaging things to handle 4D logical tile sizes so that we can
properly handle the more complex tiles.  There was also a bit of bother
around getting BLORP and ANV to properly be able to view a compressed Yf
texture as uncompressed.  Other than that, most of it is actually fairly
straightforward.

One glaring omission from this series is actually enabling Ys tiling.  I
believe all the calculations are in place and correct for Ys.  However,
enabling Ys and running it through Jenkins led several failures and GPU
hangs which I believe to be a result of not properly aligning the memory
objects to 64KB which is required for Ys.  Beyond that, there are some
cases with 1D surfaces where you cannot use Ys because the alignments are
just too high and we have no code yet for detecting those and choosing Yf
instead.  Finally, it is not at all clear what sort of a heuristic we would
want to use for enabling Ys as I don't think it's really going to buy us
all that much in most cases.

The other obvious omission in this series is miptails.  For that, there
currently is no code.  I intend to implement it but I figured I had enough
patches that it was worth sending it out as is.  Miptails will come either
as a follow-on series or as added patches in some future version of this
series.

Why do we care about Yf and Ys tiling?  There are a couple of reasons.  For
one, I think it will yield slightly better performance for 3D textures and
for small miplevels (once we get miptails).  I have no numbers to back that
up yet but my intuitive understanding of tiling leads me to think that Yf
and Ys will provide better cache locality in these cases.  The second
reason is that it's needed for sparse texturing.  Sparse allows the client
to bind individual pages to the image and not actually have memory bound to
the whole thing.  This requires both page-aligned slices and miptails, both
of which we only get with Yf and Ys.  For sparse multisampled images, Ys
makes everything easier because it puts all the samples in the same 64KB
tile which is helpful because sparse binding has no concept of arrayed
multisampling.

Happy Reviewing!

Nanley, I put you in the Cc but I don't expect you to spend another month
doing nothing but Jason patch review. :-)  I wanted you to be aware but was
hoping that Topi would take this one.

Cc: Nanley Chery 
Cc: Topi Pohjolainen 
Cc: Chad Versace 

Jason Ekstrand (21):
  intel/isl: Add a isl_surf_get_image_offset_B_tile_el helper
  intel/blorp: Use isl_surf_get_image_offset_B_tile_el in ccs_ambiguate
  intel/isl: Make the offset helpers four dimensional
  intel/isl: Make tile logical extents four dimensional
  intel/isl: Use a 4D physical total extent for size calculations
  intel/isl: Expose isl_tiling_get_info
  intel/isl: Take a dim and sample count in isl_tiling_get_info
  intel/isl: Implement correct tile size calculations for Ys/Yf
  intel/isl: Use the tile size for computing standard Y alignments
  intel/isl: Use ISL_DIM_LAYOUT_GEN9_1D for Yf/Ys
  intel/isl: Use the depth field of phys_level0_sa for GEN4_2D 3D
surfaces
  intel/isl: Fill out the correct phys_total_extent for Ys/Yf
  intel/isl: Support Yf/Ys in isl_surf_get_image_offset_sa
  intel/isl: Pull the uncompressed surface view code from anv
  intel/blorp: Use isl_surf_get_uncompressed_surf
  intel/isl: Support Ys and Yf in isl_surf_get_uncompressed_surf
  intel/isl: Don't compute image tiling data for Yf/Ys tiling
  intel/isl: Support Yf/Ys tiling in surf_fill_state
  intel/isl: Support Yf/Ys tiling in emit_depth_stencil_hiz
  i965: Explicitly disable Yf and Ys tiling
  intel/isl: Enable Yf tiling

 src/intel/Makefile.isl.am  |   9 +-
 src/intel/blorp/blorp_blit.c   |  71 ++-
 src/intel/blorp/blorp_clear.c  |   8 +-
 src/intel/isl/isl.c| 481 +
 src/intel/isl/isl.h| 103 -
 src/intel/isl/isl_emit_depth_stencil.c |  21 +-
 src/intel/isl/isl_gen7.c   |   1 -
 src/intel/isl/isl_gen9.c   |  85 +---
 src/intel/isl/isl_storage_image.c  |  17 +-
 src/intel/isl/isl_surface_state.c  |  19 +-
 src/intel/isl/meson.build  |  11 +
 .../isl/tests/isl_surf_get_image_offset_test.c |   4 +-
 src/intel/isl/tests/isl_tile_std_y_test.c  | 160 +++
 src/intel/vulkan/anv_image.c   |  34 +-
 src/mesa/drivers/dri/i965/intel_blit.c |  12 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  |  12 +-
 16 files changed, 778 insertions(+), 270 deletions(-)
 create mode 100644 src/intel/isl/tests/isl_tile_std_y_test.c

-- 

[Mesa-dev] [PATCH 06/21] intel/isl: Expose isl_tiling_get_info

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl.c | 7 +--
 src/intel/isl/isl.h | 7 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 46aff46..95c1271 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -154,9 +154,11 @@ isl_device_get_sample_counts(struct isl_device *dev)
 /**
  * @param[out] info is written only on success
  */
-static void
+void
 isl_tiling_get_info(enum isl_tiling tiling,
+enum isl_surf_dim dim,
 uint32_t format_bpb,
+uint32_t samples,
 struct isl_tile_info *tile_info)
 {
const uint32_t bs = format_bpb / 8;
@@ -1585,7 +1587,8 @@ isl_surf_get_tile_info(const struct isl_surf *surf,
struct isl_tile_info *tile_info)
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
-   isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
+   isl_tiling_get_info(surf->tiling, surf->dim, fmtl->bpb,
+   surf->samples, tile_info);
 }
 
 bool
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index cdc76fe..aa5bfed 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1572,6 +1572,13 @@ isl_tiling_is_std_y(enum isl_tiling tiling)
return (1u << tiling) & ISL_TILING_STD_Y_MASK;
 }
 
+void
+isl_tiling_get_info(enum isl_tiling tiling,
+enum isl_surf_dim dim,
+uint32_t format_bpb,
+uint32_t samples,
+struct isl_tile_info *tile_info);
+
 uint32_t
 isl_tiling_to_i915_tiling(enum isl_tiling tiling);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/21] intel/blorp: Use isl_surf_get_image_offset_B_tile_el in ccs_ambiguate

2018-02-22 Thread Jason Ekstrand
---
 src/intel/blorp/blorp_clear.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index dde116f..2597f30 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -976,12 +976,8 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
}
 
uint32_t offset_B, x_offset_el, y_offset_el;
-   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
-_offset_el, _offset_el);
-   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb,
-  surf->aux_surf->row_pitch,
-  x_offset_el, y_offset_el,
-  _B, _offset_el, _offset_el);
+   isl_surf_get_image_offset_B_tile_el(surf->aux_surf, level, layer, z,
+   _B, _offset_el, _offset_el);
params.dst.addr.offset += offset_B;
 
const uint32_t width_px =
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/21] intel/isl: Add a isl_surf_get_image_offset_B_tile_el helper

2018-02-22 Thread Jason Ekstrand
---
 src/intel/isl/isl.c | 45 -
 src/intel/isl/isl.h | 20 
 2 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 1a32c02..bba7310 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -2157,20 +2157,13 @@ isl_surf_get_image_offset_B_tile_sa(const struct 
isl_surf *surf,
 {
const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
 
-   uint32_t total_x_offset_el, total_y_offset_el;
-   isl_surf_get_image_offset_el(surf, level, logical_array_layer,
-logical_z_offset_px,
-_x_offset_el,
-_y_offset_el);
-
uint32_t x_offset_el, y_offset_el;
-   isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
-  surf->row_pitch,
-  total_x_offset_el,
-  total_y_offset_el,
-  offset_B,
-  _offset_el,
-  _offset_el);
+   isl_surf_get_image_offset_B_tile_el(surf, level,
+   logical_array_layer,
+   logical_z_offset_px,
+   offset_B,
+   _offset_el,
+   _offset_el);
 
if (x_offset_sa) {
   *x_offset_sa = x_offset_el * fmtl->bw;
@@ -2186,6 +2179,32 @@ isl_surf_get_image_offset_B_tile_sa(const struct 
isl_surf *surf,
 }
 
 void
+isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
+uint32_t level,
+uint32_t logical_array_layer,
+uint32_t logical_z_offset_px,
+uint32_t *offset_B,
+uint32_t *x_offset_el,
+uint32_t *y_offset_el)
+{
+   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
+
+   uint32_t total_x_offset_el, total_y_offset_el;
+   isl_surf_get_image_offset_el(surf, level, logical_array_layer,
+logical_z_offset_px,
+_x_offset_el,
+_y_offset_el);
+
+   isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
+  surf->row_pitch,
+  total_x_offset_el,
+  total_y_offset_el,
+  offset_B,
+  x_offset_el,
+  y_offset_el);
+}
+
+void
 isl_surf_get_image_surf(const struct isl_device *dev,
 const struct isl_surf *surf,
 uint32_t level,
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 209769a..142a92c 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1904,6 +1904,26 @@ isl_surf_get_image_offset_B_tile_sa(const struct 
isl_surf *surf,
 uint32_t *y_offset_sa);
 
 /**
+ * Calculate the offset, in bytes and intratile surface elements, to a
+ * subimage in the surface.
+ *
+ * This is equivalent to calling isl_surf_get_image_offset_el, passing the
+ * result to isl_tiling_get_intratile_offset_el.
+ *
+ * @invariant level < surface levels
+ * @invariant logical_array_layer < logical array length of surface
+ * @invariant logical_z_offset_px < logical depth of surface at level
+ */
+void
+isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
+uint32_t level,
+uint32_t logical_array_layer,
+uint32_t logical_z_offset_px,
+uint32_t *offset_B,
+uint32_t *x_offset_el,
+uint32_t *y_offset_el);
+
+/**
  * Create an isl_surf that represents a particular subimage in the surface.
  *
  * The newly created surface will have a single miplevel and array slice.  The
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/13] mesa: Introduce a yet unused _DrawVAO.

2018-02-22 Thread Mathias Fröhlich
Hi Brian,

On Thursday, 22 February 2018 23:32:23 CET Brian Paul wrote:
> Just a few little nit-picks.  With those fixed, the series is: 
> Reviewed-by: Brian Paul 
> 
> Nice work!

Thanks!

Thank you for the review!
And is pushed now!

best

Mathias


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi/nir: enable lowering of fpow

2018-02-22 Thread Timothy Arceri
Lowering fpow in NIR rather than LLVM can be beneficial.

Polaris results:

Totals from affected shaders:
SGPRS: 124928 -> 124896 (-0.03 %)
VGPRS: 68616 -> 68332 (-0.41 %)
Spilled SGPRs: 394 -> 413 (4.82 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 3668912 -> 3658368 (-0.29 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 18575 -> 18593 (0.10 %)
Wait states: 0 -> 0 (0.00 %)

Fixes: d6b753920677 "ac/nir: remove emission of nir_op_fpow"

Cc: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_get.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index 18d9cec414..6e799534b2 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -492,6 +492,7 @@ static const struct nir_shader_compiler_options nir_options 
= {
.lower_scmp = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
+   .lower_fpow = true,
.lower_fsat = true,
.lower_fdiv = true,
.lower_sub = true,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac: make use of ac_get_llvm_num_components() helper

2018-02-22 Thread Timothy Arceri
---
 src/amd/common/ac_nir_to_llvm.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 50f3a4f69e..444107d14e 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1198,11 +1198,7 @@ static LLVMValueRef get_alu_src(struct ac_nir_context 
*ctx,
bool need_swizzle = false;
 
assert(value);
-   LLVMTypeRef type = LLVMTypeOf(value);
-   unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
- ? LLVMGetVectorSize(type)
- : 1;
-
+   unsigned src_components = ac_get_llvm_num_components(value);
for (unsigned i = 0; i < num_components; ++i) {
assert(src.swizzle[i] < src_components);
if (src.swizzle[i] != i)
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] gallium/tgsi: remove is_msaa_sampler array from tgsi_shader_info

2018-02-22 Thread Timothy Arceri
Seems to have not been used since 16be87c90429
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 --
 src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 -
 2 files changed, 7 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 989c57ebee..c35eff25ba 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -267,7 +267,6 @@ scan_src_operand(struct tgsi_shader_info *info,
   const unsigned index = src->Register.Index;
 
   assert(fullinst->Instruction.Texture);
-  assert(index < ARRAY_SIZE(info->is_msaa_sampler));
   assert(index < PIPE_MAX_SAMPLERS);
 
   if (is_texture_inst(fullinst->Instruction.Opcode)) {
@@ -286,11 +285,6 @@ scan_src_operand(struct tgsi_shader_info *info,
  */
 assert(info->sampler_targets[index] == target);
  }
- /* MSAA samplers */
- if (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-info->is_msaa_sampler[src->Register.Index] = TRUE;
- }
   }
}
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 0f872b0022..1887ff342d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -132,7 +132,6 @@ struct tgsi_shader_info
boolean writes_viewport_index;
boolean writes_layer;
boolean writes_memory; /**< contains stores or atomics to buffers or images 
*/
-   boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
boolean uses_doubles; /**< uses any of the double instructions */
boolean uses_derivatives;
boolean uses_bindless_samplers;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105208] [regression] (e5ff036c67 st/dri: Add support for BGR[A/X]1010102 formats) broke the mouse in mutter/shell

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105208

Jens Petersen  changed:

   What|Removed |Added

 CC||peter...@redhat.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/tgsi: remove is_msaa_sampler array from tgsi_shader_info

2018-02-22 Thread Timothy Arceri
Seems to have not been used since 16be87c90429
---
 src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 0f872b0022..1887ff342d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -132,7 +132,6 @@ struct tgsi_shader_info
boolean writes_viewport_index;
boolean writes_layer;
boolean writes_memory; /**< contains stores or atomics to buffers or images 
*/
-   boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
boolean uses_doubles; /**< uses any of the double instructions */
boolean uses_derivatives;
boolean uses_bindless_samplers;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] virgl: add offset alignment values to to v2 caps struct

2018-02-22 Thread Gurchetan Singh
From: "gurchetansi...@chromium.org" 

glBindBufferRange(..) in vrend_draw_bind_ubo is failing with
more than one uniform block. This is due to improper alignment
of the start of the second block. Let's query the proper
alignment from the driver and pass it back to Mesa.

Let's query for the texture alignment too, even though the Virgl
renderer doesn't call glTexBufferRange yet.

The default values are the widest workable range possible (for example,
GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT on Nvidia is 256).

Fixes:
dEQP-GLES3.functional.ubo.* on Nvidia

Example test:
dEQP-GLES3.functional.ubo.multi_basic_types.single_buffer.shared_vertex

Note: This is based on "virgl: reduce some default capset limits.",
which hasn't landed in Mesa yet but should relatively soon.
---
 src/gallium/drivers/virgl/virgl_hw.h | 2 ++
 src/gallium/drivers/virgl/virgl_screen.c | 4 ++--
 src/gallium/drivers/virgl/virgl_winsys.h | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/virgl/virgl_hw.h 
b/src/gallium/drivers/virgl/virgl_hw.h
index 833ab91eee..93849c03dd 100644
--- a/src/gallium/drivers/virgl/virgl_hw.h
+++ b/src/gallium/drivers/virgl/virgl_hw.h
@@ -284,6 +284,8 @@ struct virgl_caps_v2 {
 int32_t max_texel_offset;
 int32_t min_texture_gather_offset;
 int32_t max_texture_gather_offset;
+uint32_t texture_buffer_offset_alignment;
+uint32_t uniform_buffer_offset_alignment;
 };
 
 union virgl_caps {
diff --git a/src/gallium/drivers/virgl/virgl_screen.c 
b/src/gallium/drivers/virgl/virgl_screen.c
index 22a694ea27..49a0c57cda 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -140,7 +140,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
   return 0;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-  return 16;
+  return vscreen->caps.caps.v2.uniform_buffer_offset_alignment;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
   return vscreen->caps.caps.v1.bset.streamout_pause_resume;
@@ -163,7 +163,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
   return vscreen->caps.caps.v1.max_tbo_size > 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-  return 0;
+  return vscreen->caps.caps.v2.texture_buffer_offset_alignment;
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
   return 0;
case PIPE_CAP_CUBE_MAP_ARRAY:
diff --git a/src/gallium/drivers/virgl/virgl_winsys.h 
b/src/gallium/drivers/virgl/virgl_winsys.h
index 95e21a8afd..99e98ad9c9 100644
--- a/src/gallium/drivers/virgl/virgl_winsys.h
+++ b/src/gallium/drivers/virgl/virgl_winsys.h
@@ -132,5 +132,7 @@ static inline void virgl_ws_fill_new_caps_defaults(struct 
virgl_drm_caps *caps)
caps->caps.v2.max_texel_offset = 7;
caps->caps.v2.min_texture_gather_offset = -8;
caps->caps.v2.max_texture_gather_offset = 7;
+   caps->caps.v2.texture_buffer_offset_alignment = 32;
+   caps->caps.v2.uniform_buffer_offset_alignment = 256;
 }
 #endif
-- 
2.13.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] i965: perf: ensure reading config IDs from sysfs isn't interrupted

2018-02-22 Thread Lionel Landwerlin

On 22/02/18 19:26, Emil Velikov wrote:

Hi Lionel,

On 9 February 2018 at 10:03, Lionel Landwerlin
 wrote:

Fixes: 458468c136e "i965: Expose OA counters via INTEL_performance_query"
Signed-off-by: Lionel Landwerlin 
Cc: "18.0" 
---
  src/mesa/drivers/dri/i965/brw_performance_query.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)


Just a friendly reminder that the patch hasn't landed in master yet.
AFAICT it was reviewed by Eric Engestrom.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Thanks Emil, pushed.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeonsi/nir: fix loading of doubles for tess varyings

2018-02-22 Thread Timothy Arceri
---
 src/gallium/drivers/radeonsi/si_shader.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2e398f6335..49db4d6367 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1273,7 +1273,11 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct 
ac_shader_abi *abi,
 
LLVMValueRef value[4];
for (unsigned i = 0; i < num_components + component; i++) {
-   value[i] = lds_load(bld_base, type, i, dw_addr);
+   unsigned offset = i;
+   if (llvm_type_is_64bit(ctx, type))
+   offset *= 2;
+
+   value[i] = lds_load(bld_base, type, offset, dw_addr);
}
 
return ac_build_varying_gather_values(>ac, value, num_components, 
component);
@@ -1360,7 +1364,11 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi 
*abi,
 */
LLVMValueRef value[4];
for (unsigned i = component; i < num_components + component; i++) {
-   value[i] = buffer_load(>bld_base, type, i, buffer, base, 
addr, true);
+   unsigned offset = i;
+   if (llvm_type_is_64bit(ctx, type))
+   offset *= 2;
+
+   value[i] = buffer_load(>bld_base, type, offset, buffer, 
base, addr, true);
}
 
return ac_build_varying_gather_values(>ac, value, num_components, 
component);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeonsi/nir: fix lds store in tcs outputs handling

2018-02-22 Thread Timothy Arceri
We were ignoring the channel offset.
---
 src/gallium/drivers/radeonsi/si_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 9ccae9f18d..2e398f6335 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1548,7 +1548,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi 
*abi,
 
/* Skip LDS stores if there is no LDS read of this output. */
if (!skip_lds_store)
-   ac_lds_store(>ac, dw_addr, value);
+   lds_store(ctx, chan, dw_addr, value);
 
value = ac_to_integer(>ac, value);
values[chan] = value;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Adding a SPIR-V target to libclc

2018-02-22 Thread Francisco Jerez
Jan Vesely  writes:

> On Thu, 2018-02-15 at 22:50 -0800, Francisco Jerez wrote:
>> Jan Vesely  writes:
>> 
>> > On Thu, 2018-02-15 at 20:36 -0800, Francisco Jerez wrote:
>> > > Jan Vesely  writes:
>> > > 
>> > > > On Thu, 2018-02-08 at 15:56 -0800, Francisco Jerez wrote:
>> > > > > Jan Vesely  writes:
>> > > > > 
>> > > > > > On Thu, 2018-02-08 at 23:16 +0100, Pierre Moreau wrote:
>> > > > > > > (Moving the conversation to its own thread.)
>> > > > > > > 
>> > > > > > > > target agnostic libclc is rather difficult to do. CLC includes 
>> > > > > > > > 3 levels
>> > > > > > > > of precision on float (fp32) operands; full, half, native. The
>> > > > > > > > implementation of each depends on capabilites of specific 
>> > > > > > > > device (e.g.
>> > > > > > > > vega(VI+?) can do 1 ULP log2/exp2 in hw, other targets need sw
>> > > > > > > > implementation to meet CLC requirement of 3ulp). Any 
>> > > > > > > > conversion backend
>> > > > > > > > would thus need to implement sw versions of math builtins for 
>> > > > > > > > targets
>> > > > > > > > that can't perform the op in HW.
>> > > > > > > 
>> > > > > > > My initial thought for the target agnostic libclc, was to just 
>> > > > > > > provide some
>> > > > > > > (fake?) implementations of OpenCL built-in functions to make 
>> > > > > > > clang happy and
>> > > > > > > let me compile kernels using “get_global_id()”, as well as 
>> > > > > > > include headers
>> > > > > > > defining OpenCL specific types like “float4” or others. If there 
>> > > > > > > is another
>> > > > > > > (better?) way to achieve this, I am all ears. (There is probably 
>> > > > > > > one, as I had
>> > > > > > > no issues when using the Khronos LLVM/clang fork rather than 
>> > > > > > > Tomeu’s
>> > > > > > > out-of-tree module, the former having also some bits and pieces 
>> > > > > > > in clang.)
>> > > > > > 
>> > > > > > I don't think you need libclc for this. workitem IDs are
>> > > > > > platform/device specific, and iiuc SPIR-V builtins should handle 
>> > > > > > it in
>> > > > > > an abstract way [0]. any conversion consuming SPIR-V needs to 
>> > > > > > replace
>> > > > > > those with device/platform specific way of obtaining the 
>> > > > > > information.
>> > > > > > you can also use clang's clc header to provide data types [1].
>> > > > > > 
>> > > > > > 
>> > > > > > [0] 
>> > > > > > https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#B
>> > > > > > uiltIn
>> > > > > > [1] 
>> > > > > > https://github.com/llvm-mirror/clang/blob/master/lib/Headers/opencl
>> > > > > > -c.h
>> > > > > > 
>> > > > > > > 
>> > > > > > > > Extending the current libclc to provide target specific SPIR-V 
>> > > > > > > > binaries
>> > > > > > > > in addition to/in place of LLVM IR is rather straightforward. 
>> > > > > > > > Adding
>> > > > > > > > additional targets it's more work since it relies on clang to 
>> > > > > > > > support
>> > > > > > > > those targets.
>> > > > > > > 
>> > > > > > > I’m curious how those target specific SPIR-V binaries would look 
>> > > > > > > like. I can
>> > > > > > > imagine how some functions like “OpSign” could be implemented 
>> > > > > > > using other
>> > > > > > > SPIR-V functions, but how would you handle something like 
>> > > > > > > “get_local_id()”? If
>> > > > > > > you define it as the built-in “LocalInvocationId” and don’t 
>> > > > > > > supply an
>> > > > > > > implementation of it, then you lose the target specificness. On 
>> > > > > > > the other hand,
>> > > > > > > if you want to keep it device-specific, how would you express 
>> > > > > > > that in SPIR-V?
>> > > > > > 
>> > > > > > getting IDs is not a problem. SPIR-V should provide builtins for 
>> > > > > > that.
>> > > > > > 
>> > > > > > The problem I had in mind is when SPIR-V binary calls e.g. exp2(). 
>> > > > > > You
>> > > > > > can either assume that the op needs CLC precision (3 ulp), or 
>> > > > > > device
>> > > > > > native precision.
>> > > > > 
>> > > > > That's up to the SPIR-V extended instruction set specification to 
>> > > > > define
>> > > > > what precision the exp2 built-in is supposed to have.  
>> > > > > 
>> > > > > > SPIR-V binary can also call exp2(fp64), which does not have an
>> > > > > > equivalent GPU instruction.
>> > > > > 
>> > > > > Then it should probably be lowered by the SPIR-V front-end, right?
>> > > > 
>> > > > I'm not sure what you mean by "spir-v frontend". If it's the tool that
>> > > > generates SPIR-V, then no, not really. 
>> > > 
>> > > No, I meant the SPIR-V front-end of the driver (or whatever translation
>> > > pass in control of the driver is translating machine-agnostic SPIR-V
>> > > into some other more hardware-specific representation of the program).
>> > 
>> > OK. my question still stands. How does generic SPIR-V based libclc
>> > help the process?
>> > 
>> 
>> That I can think of now, it would remove the need for 

Re: [Mesa-dev] [PATCH 05/17] intel/compiler: Add Gen11+ native float type

2018-02-22 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:12 PM PST Matt Turner wrote:
> @@ -306,6 +312,7 @@ unsigned
>  brw_reg_type_to_size(enum brw_reg_type type)
>  {
> static const unsigned type_size[] = {
> +  [BRW_REGISTER_TYPE_NF] = 8,

This is a bit of a fib, given that NF is 66 bits, but...probably good
enough for our purposes here...

Patches 3-4 are:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105211] build failure after zwp_dmabuf commit if wayland-protocols is not installed

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105211

--- Comment #4 from Dieter Nützel  ---
(In reply to Daniel Stone from comment #1)
> Yes, sorry about this, the autotools build is quite funny with
> wayland-protocols.
> 
> At the moment, you can solve this by simply removing the linux-dmabuf lines
> from src/vulkan/Makefile.sources, or you can install wayland-protocols. The
> protocols package is purely a bunch of XML files, with no dependencies. You
> don't need to actually enable Wayland support to bypass this build failure,
> just to have wayland-protocols present somewhere in the pkg-config search
> path.

Moin, Daniel ;-)

On my (current) openSUSE Tumbleweed
wayland-protocols-devel-1.13-1.1.noarch
did the trick.

Thank you!

Dieter

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Ilia Mirkin
On Thu, Feb 22, 2018 at 7:41 PM, Ilia Mirkin  wrote:
> On Thu, Feb 22, 2018 at 7:39 PM, Emil Velikov  
> wrote:
>> On 23 February 2018 at 00:17, Ilia Mirkin  wrote:
>>> On Thu, Feb 22, 2018 at 7:11 PM, Emil Velikov  
>>> wrote:
> diff --git a/src/mesa/state_tracker/st_context.c 
> b/src/mesa/state_tracker/st_context.c
> index d3e7d3fb7fa..4c14245a38f 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -518,6 +518,15 @@ st_create_context_priv(struct gl_context *ctx, 
> struct pipe_context *pipe,
>   ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = 
> true;
> }
>
> +   /* The QT framework has a bug in their shader program cache, which is 
> built
> +* on GL_ARB_get_program_binary. In an effort to allow them to fix 
> the bug
> +* we don't enable binary formats for compatibility profiles.
> +* This is only being done on the 18.0 release branch.
> +*/
> +   if (ctx->API == API_OPENGL_COMPAT) {
> +  ctx->Const.NumProgramBinaryFormats = 0;
> +   }
> +
 Please move this alike the i965 instance -> adjust the constant within
 st_init_limits.
>>>
>>> Nope. It's in the right place where it is. (I put it there, Jordan
>>> just added it to his patch.)
>>>
>> Silly question incoming - Why? It seems misleading to set the const in
>> one place, only to change it elsewhere?
>
> Doing what you suggest won't compile. Limits (and extensions) are used
> to infer ctx->API. So you can't set them based on ctx->API, it's
> circular. So this, like a handful of other things, are
> post-auto-everything fixups.

Actually not strictly correct -- st_init_limits is used as part of the
GLX_MESA_query_renderer dance. It does not currently receive a ctx
because one doesn't necessarily exist, but it definitely knows what
API it is in all cases, so that could be passed in as an argument.

However that would be more change for what seems like no reason. Other
dependencies can be more complicated (the circular dependencies start
hitting when you start depending on versions of things), so the
post-fixup approach is still going to be needed. And this will all get
backed out, so no need to modify a bunch of surrounding code.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Fix autotools build.

2018-02-22 Thread Bas Nieuwenhuizen
Somewhere along the way the Makefile changes got lost ...

Fixes: 4db78f3a6b "radv: Put supported extensions in a struct."
---
 src/amd/vulkan/Makefile.am | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index 6102596894..80937e38d3 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -129,7 +129,9 @@ radv_extensions.c: radv_extensions.py \
$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_extensions.py \
--xml $(vulkan_api_xml) \
--xml $(vk_android_native_buffer_xml) \
-   --out $@
+   --out-c radv_extensions.c \
+   --out-h radv_extensions.h
+radv_extensions.h: radv_extensions.c
 
 vk_format_table.c: vk_format_table.py \
   vk_format_parse.py \
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Ilia Mirkin
On Thu, Feb 22, 2018 at 7:39 PM, Emil Velikov  wrote:
> On 23 February 2018 at 00:17, Ilia Mirkin  wrote:
>> On Thu, Feb 22, 2018 at 7:11 PM, Emil Velikov  
>> wrote:
 diff --git a/src/mesa/state_tracker/st_context.c 
 b/src/mesa/state_tracker/st_context.c
 index d3e7d3fb7fa..4c14245a38f 100644
 --- a/src/mesa/state_tracker/st_context.c
 +++ b/src/mesa/state_tracker/st_context.c
 @@ -518,6 +518,15 @@ st_create_context_priv(struct gl_context *ctx, struct 
 pipe_context *pipe,
   ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
 }

 +   /* The QT framework has a bug in their shader program cache, which is 
 built
 +* on GL_ARB_get_program_binary. In an effort to allow them to fix the 
 bug
 +* we don't enable binary formats for compatibility profiles.
 +* This is only being done on the 18.0 release branch.
 +*/
 +   if (ctx->API == API_OPENGL_COMPAT) {
 +  ctx->Const.NumProgramBinaryFormats = 0;
 +   }
 +
>>> Please move this alike the i965 instance -> adjust the constant within
>>> st_init_limits.
>>
>> Nope. It's in the right place where it is. (I put it there, Jordan
>> just added it to his patch.)
>>
> Silly question incoming - Why? It seems misleading to set the const in
> one place, only to change it elsewhere?

Doing what you suggest won't compile. Limits (and extensions) are used
to infer ctx->API. So you can't set them based on ctx->API, it's
circular. So this, like a handful of other things, are
post-auto-everything fixups.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Emil Velikov
On 23 February 2018 at 00:17, Ilia Mirkin  wrote:
> On Thu, Feb 22, 2018 at 7:11 PM, Emil Velikov  
> wrote:
>> Hi Jordan,
>>
>> On 22 February 2018 at 19:59, Jordan Justen  
>> wrote:
>>> The QT framework has a bug in their shader program cache, which is
>>> built on GL_ARB_get_program_binary.
>>>
>>> In an effort to allow them to fix the bug we don't enable more than 1
>>> binary format for compatibility profiles.
>>>
>> Doing NumProgramBinaryFormats = 0; does not disable the extension.
>> So either the commit summary is off, or you'd want to actually disable
>> the extension.
>
> Desire is to leave the ext enabled, just go back to not supporting any
> binary formats. Summary could be updated to say "Disable binary
> formats for compat profiles" or something.
>
>> Latter of which could be achieved by dropping the GLC
>> instance in src/mesa/main/extensions_table.h.
>> Note: i'm not 100% sure on that last one.
>
> I looked at doing that, but it's a lot more of a pain than it's worth.
> (You also have to teach the functions to not do anything useful, or
> figure out how to get them out of the dispatch table for compat.)
> There could also be software that really wants those entrypoints but
> deals with the lack of formats -- going back to the "old thing" seemed
> safer (i.e. ext enabled, zero formats).
>
Indeed disabling the extension might lead to some hasty side-effects.

>>
>> Note that NumProgramBinaryFormats also impacts the ES2 version of the
>> extension OES_get_program_binary.
>> Hence, I'd use explicit if (ctx->API == API_OPENGL_COMPAT) checks.
>
> That's precisely what the patch does... am I missing something?
>
Nope, you're not. I've got confused by the i965/gallium asymmetry.

>>
>>> This is only being done on the 18.0 release branch.
>>>
>> Perhaps tad silly question: why do we want this only for the 18.0
>> series and not in master?
>
> Reasonable question, one I don't have a perfect answer to. It's a QT
> bug, so I guess the hope is that it'll be fixed by the time next
> release rolls around? FWIW I don't have a huge amount of preference on
> this.
>
Certainly hope so. Yet anyone testing Mesa from git will get the
lovely issue. Perhaps they also use patched QT?

>>
>>> Ref: https://bugreports.qt.io/browse/QTBUG-66420
>>> Ref: https://bugs.freedesktop.org/show_bug.cgi?id=105065
>>> Cc: "18.0" 
>>> Cc: Mark Janes 
>>> Cc: Kenneth Graunke 
>>> Cc: Scott D Phillips 
>>> Cc: Ilia Mirkin 
>>> Signed-off-by: Jordan Justen 
>>> [imir...@alum.mit.edu: Added st_context.c (gallium) change]
>>> ---
>>>  docs/relnotes/17.4.0.html   | 2 +-
>>>  src/mesa/drivers/dri/i965/brw_context.c | 9 -
>>>  src/mesa/state_tracker/st_context.c | 9 +
>>>  3 files changed, 18 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
>>> index 412c0fc455e..fecdfe77969 100644
>>> --- a/docs/relnotes/17.4.0.html
>>> +++ b/docs/relnotes/17.4.0.html
>>> @@ -53,7 +53,7 @@ Note: some of the new features are only available with 
>>> certain drivers.
>>>  GL_ARB_enhanced_layouts on r600/evergreen+
>>>  GL_ARB_bindless_texture on nvc0/kepler
>>>  OpenGL 4.3 on r600/evergreen with hw fp64 support
>>> -Support 1 binary format for GL_ARB_get_program_binary on i965
>>> +Support 1 binary format for GL_ARB_get_program_binary on i965 (except 
>>> in GL compatibility profiles)
>>>  
>>>
>>>  Bug fixes
>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
>>> b/src/mesa/drivers/dri/i965/brw_context.c
>>> index e9358b7bc9c..58527d77263 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>>> @@ -704,7 +704,14 @@ brw_initialize_context_constants(struct brw_context 
>>> *brw)
>>>ctx->Const.AllowMappedBuffersDuringExecution = true;
>>>
>>> /* GL_ARB_get_program_binary */
>>> -   ctx->Const.NumProgramBinaryFormats = 1;
>>> +   /* The QT framework has a bug in their shader program cache, which is 
>>> built
>>> +* on GL_ARB_get_program_binary. In an effort to allow them to fix the 
>>> bug
>>> +* we don't enable more than 1 binary format for compatibility profiles.
>>> +* This is only being done on the 18.0 release branch.
>>> +*/
>>> +   if (ctx->API != API_OPENGL_COMPAT) {
>>> +  ctx->Const.NumProgramBinaryFormats = 1;
>>> +   }
>>>  }
>>>
>>>  static void
>>> diff --git a/src/mesa/state_tracker/st_context.c 
>>> b/src/mesa/state_tracker/st_context.c
>>> index d3e7d3fb7fa..4c14245a38f 100644
>>> --- a/src/mesa/state_tracker/st_context.c
>>> +++ b/src/mesa/state_tracker/st_context.c
>>> @@ -518,6 +518,15 @@ st_create_context_priv(struct gl_context *ctx, struct 
>>> pipe_context *pipe,
>>>   

Re: [Mesa-dev] [PATCH 3/3] mesa: skip validation of legality of size/type queries for format queries

2018-02-22 Thread Roland Scheidegger
Am 22.02.2018 um 08:58 schrieb Alejandro Piñeiro:
> On 30/01/18 01:24, Roland Scheidegger wrote:
>> Am 29.01.2018 um 17:03 schrieb Alejandro Piñeiro:
>>> On 29/01/18 16:38, Roland Scheidegger wrote:
 Am 29.01.2018 um 09:09 schrieb Alejandro Piñeiro:
> On 27/01/18 12:09, Roland Scheidegger wrote:
>> Am 27.01.2018 um 09:52 schrieb Alejandro Piñeiro:
>>> On 27/01/18 01:59, srol...@vmware.com wrote:
 From: Roland Scheidegger 

 The size/type query is always legal (if we made it that far).
 This causes a difference for GL_TEXTURE_BUFFER - the reason is that 
 these
 parameters are valid only with GetTexLevelParameter() if gl 3.1 is 
 supported,
 but not if only ARB_texture_buffer_object is supported.
 However, while the spec says that these queries return "the same 
 information
 as querying GetTexLevelParameter" I believe we're not expected to 
 return just
 zeros here. By definition, these pnames are always valid (unlike for 
 the
 GetTexLevelParameter() function which would return an error without GL 
 3.1),
 so returning 0 but no error makes no sense to me.
>>> But in general, AFAIU, this is how GetInternalFormat works. The
>>> extension only raises an error if their API is not used properly.  But
>>> not if the combination being requested doesn't make sense.
>>>
>>> For example, the pname GET_TEXTURE_IMAGE_FORMAT, would return the same
>>> value that using GetTexImage. But if the resource is not supported by
>>> GetTextImage, it should return NONE, but not raising an error. So for
>>> example, you could use as target GL_RENDERBUFFER for
>>> GET_TEXTURE_IMAGE_FORMAT query, and  just get a GL_NONE, but no an
>>> error. While that one would raise an error on GetTexImage.
>> This is correct, but I don't think that's really comparable. The case
>> you cited is if something isn't supported - this is not the case here at
>> all, if some format with TEXTURE_BUFFER is supported, we're just lying
>> about the size/type because ARB_tbo made these properties non-queryable.
> Are not queryable for specific GL versions, as you said. For me it would
> be inconsistent if you are able to query the tbo sizes with
> GetInternalformat, but not with gettexlevelparameter on a given opengl
> version. Raising or not an error should not be a reason to discard it,
> because as I said, ARB_internalformat_query2 in general avoid raising GL
> error for unsupported cases.
>
> What I'm trying to say is that the current implementation is not wrong,
> just more literal to the current spec wording. Yes, perhaps too literal
> in some cases. But I also have the feeling that your patch pushes too
> much on the supposed original intention of this query.
>
> Having said so, this is just my opinion. So if anyone else agrees with
> your interpretation of the desired behaviour on this query, I don't
> think that it is a big deal to include this patch.
 There's actually a specific bit in the internalformat_query2 which makes
 me think is a pretty good hint that "return the same information as
 querying GetTeXLevelParameter" should not be taken literally and is just
 informational: The part about GL_INTERNALFORMAT_STENCIL_TYPE - since the
 language applies to that as well, however an equivalent pname for
 GetTeXLevelParameter does not even exist.
>>> Hmm, good point. And as the code is right now, it filters the target
>>> based on being supported or not by GetTexLevelParameter, but then (if
>>> the target is not filtered), returns a value for STENCIL_TYPE, even
>>> although GetTexLevelParameter doesn't have a equivalent pname/query.
>>>
 And imho even if you take it literally, it's ambiguos at best, since the
 same paragraph doesn't just mention the "return the same as..." part,
 but also explicitly says that 0/none is returned if the format is
 unsupported, which contradicts this part (well you can of course
 interpret it that this is not a sufficient condition to return 0, but I
 don't think that was the intention).
>>> In general, this spec has a lot (too much?) of room for interpretation.
>> Yes, I agree with that. (Of course it doesn't help that the dependency
>> section is absolutely massive.)
>>
 I agree though my interpretation is more in line what I think was
 probably the intention and can't directly be derived from the wording -
 in general however you can always use all pname/target bits and get
 valid answers iff the format/pname combination is supported, so this not
 working texture_buffer would be very awkward imho.

 But maybe someone more familiar with the spec could chime in...
>>> I will open a spec issue for this. Meanwhile they reply:
>>>
>>> Reviewed-by: Alejandro 

Re: [Mesa-dev] vulkan/wsi/wayland: Add support for zwp_dmabuf

2018-02-22 Thread Dieter Nützel

Am 22.02.2018 10:49, schrieb Daniel Stone:

Hi Dieter,

2018-02-22 0:57 GMT+00:00 Dieter Nützel :

Making all in vulkan
make[3]: Verzeichnis „/opt/mesa/src/vulkan“ wird betreten
make[3]: *** Keine Regel vorhanden, um das Ziel
„/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml“,
  benötigt von „wsi/linux-dmabuf-unstable-v1-protocol.c“, zu 
erstellen.

Schluss.
make[3]: Verzeichnis „/opt/mesa/src/vulkan“ wird verlassen
make[2]: *** [Makefile:862: all-recursive] Fehler 1
make[2]: Verzeichnis „/opt/mesa/src“ wird verlassen
make[1]: *** [Makefile:653: all] Fehler 2
make[1]: Verzeichnis „/opt/mesa/src“ wird verlassen
make: *** [Makefile:666: all-recursive] Fehler 1

I do not have (do not know of) any wayland stuff running, here.


Thanks for reporting this, and sorry for the brekage. I did test with
a no-Wayland autotool before pushing, but that succeeded for me. To
fix the build, you can either use Meson or just install
wayland-protocols on your system: no need to enable the Wayland build
in Mesa or use it at all, just have it available in $PKG_CONFIG_PATH.
(It's just a pile of XML, no need to compile anything.)


Moin, Daniel ;-)

On my (current) openSUSE Tumbleweed
wayland-protocols-devel-1.13-1.1.noarch
did the trick.

Thank you!

Dieter

[-]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Ilia Mirkin
On Thu, Feb 22, 2018 at 7:11 PM, Emil Velikov  wrote:
> Hi Jordan,
>
> On 22 February 2018 at 19:59, Jordan Justen  wrote:
>> The QT framework has a bug in their shader program cache, which is
>> built on GL_ARB_get_program_binary.
>>
>> In an effort to allow them to fix the bug we don't enable more than 1
>> binary format for compatibility profiles.
>>
> Doing NumProgramBinaryFormats = 0; does not disable the extension.
> So either the commit summary is off, or you'd want to actually disable
> the extension.

Desire is to leave the ext enabled, just go back to not supporting any
binary formats. Summary could be updated to say "Disable binary
formats for compat profiles" or something.

> Latter of which could be achieved by dropping the GLC
> instance in src/mesa/main/extensions_table.h.
> Note: i'm not 100% sure on that last one.

I looked at doing that, but it's a lot more of a pain than it's worth.
(You also have to teach the functions to not do anything useful, or
figure out how to get them out of the dispatch table for compat.)
There could also be software that really wants those entrypoints but
deals with the lack of formats -- going back to the "old thing" seemed
safer (i.e. ext enabled, zero formats).

>
> Note that NumProgramBinaryFormats also impacts the ES2 version of the
> extension OES_get_program_binary.
> Hence, I'd use explicit if (ctx->API == API_OPENGL_COMPAT) checks.

That's precisely what the patch does... am I missing something?

>
>> This is only being done on the 18.0 release branch.
>>
> Perhaps tad silly question: why do we want this only for the 18.0
> series and not in master?

Reasonable question, one I don't have a perfect answer to. It's a QT
bug, so I guess the hope is that it'll be fixed by the time next
release rolls around? FWIW I don't have a huge amount of preference on
this.

>
>> Ref: https://bugreports.qt.io/browse/QTBUG-66420
>> Ref: https://bugs.freedesktop.org/show_bug.cgi?id=105065
>> Cc: "18.0" 
>> Cc: Mark Janes 
>> Cc: Kenneth Graunke 
>> Cc: Scott D Phillips 
>> Cc: Ilia Mirkin 
>> Signed-off-by: Jordan Justen 
>> [imir...@alum.mit.edu: Added st_context.c (gallium) change]
>> ---
>>  docs/relnotes/17.4.0.html   | 2 +-
>>  src/mesa/drivers/dri/i965/brw_context.c | 9 -
>>  src/mesa/state_tracker/st_context.c | 9 +
>>  3 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
>> index 412c0fc455e..fecdfe77969 100644
>> --- a/docs/relnotes/17.4.0.html
>> +++ b/docs/relnotes/17.4.0.html
>> @@ -53,7 +53,7 @@ Note: some of the new features are only available with 
>> certain drivers.
>>  GL_ARB_enhanced_layouts on r600/evergreen+
>>  GL_ARB_bindless_texture on nvc0/kepler
>>  OpenGL 4.3 on r600/evergreen with hw fp64 support
>> -Support 1 binary format for GL_ARB_get_program_binary on i965
>> +Support 1 binary format for GL_ARB_get_program_binary on i965 (except 
>> in GL compatibility profiles)
>>  
>>
>>  Bug fixes
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
>> b/src/mesa/drivers/dri/i965/brw_context.c
>> index e9358b7bc9c..58527d77263 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> @@ -704,7 +704,14 @@ brw_initialize_context_constants(struct brw_context 
>> *brw)
>>ctx->Const.AllowMappedBuffersDuringExecution = true;
>>
>> /* GL_ARB_get_program_binary */
>> -   ctx->Const.NumProgramBinaryFormats = 1;
>> +   /* The QT framework has a bug in their shader program cache, which is 
>> built
>> +* on GL_ARB_get_program_binary. In an effort to allow them to fix the 
>> bug
>> +* we don't enable more than 1 binary format for compatibility profiles.
>> +* This is only being done on the 18.0 release branch.
>> +*/
>> +   if (ctx->API != API_OPENGL_COMPAT) {
>> +  ctx->Const.NumProgramBinaryFormats = 1;
>> +   }
>>  }
>>
>>  static void
>> diff --git a/src/mesa/state_tracker/st_context.c 
>> b/src/mesa/state_tracker/st_context.c
>> index d3e7d3fb7fa..4c14245a38f 100644
>> --- a/src/mesa/state_tracker/st_context.c
>> +++ b/src/mesa/state_tracker/st_context.c
>> @@ -518,6 +518,15 @@ st_create_context_priv(struct gl_context *ctx, struct 
>> pipe_context *pipe,
>>   ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
>> }
>>
>> +   /* The QT framework has a bug in their shader program cache, which is 
>> built
>> +* on GL_ARB_get_program_binary. In an effort to allow them to fix the 
>> bug
>> +* we don't enable binary formats for compatibility profiles.
>> +* This is only being done on the 18.0 release branch.
>> +*/
>> +   if (ctx->API == API_OPENGL_COMPAT) {
>> +  ctx->Const.NumProgramBinaryFormats = 0;
>> +   }
>> +
> 

Re: [Mesa-dev] [PATCH] radv: Really use correct HTILE expanded words.

2018-02-22 Thread Grazvydas Ignotas
Seems to fix dxvk, nice! Also tested DOOM which still works.
Tested-by: Grazvydas Ignotas 

On Thu, Feb 22, 2018 at 6:57 PM, James Legg  wrote:
> When transitioning to an htile compressed depth format, Set the full
> depth range, so later rasterization can pass HiZ. Previously, for depth
> only formats, the depth range was set to 0 to 0. This caused unwanted
> HiZ rejections with a VK_FORMAT_D16_UNORM depth buffer
> (VK_FORMAT_D32_SFLOAT was not affected somehow).
>
> These values are derived from PAL [0], since I can't find the
> specification describing the htile values.
>
> Fixes 5158603182fe7435: radv: Use correct HTILE expanded words.

Please put this as "Fixes: ..." below along with other tags.

>
> [0] 
> https://github.com/GPUOpen-Drivers/pal/blob/5cba4ecbda9452773f59692f5915301e7db4a183/src/core/hw/gfxip/gfx9/gfx9MaskRam.cpp#L1500
>
> CC: Dave Airlie 
> CC: Bas Nieuwenhuizen 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 8a384b114c..2b41baea3d 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -3440,8 +3440,8 @@ void radv_CmdEndRenderPass(
>
>  /*
>   * For HTILE we have the following interesting clear words:
> - *   0x030f: Uncompressed for depth+stencil HTILE.
> - *   0x000f: Uncompressed for depth only HTILE.
> + *   0xf30f: Uncompressed, full depth range, for depth+stencil HTILE
> + *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
>   *   0xfff0: Clear depth to 1.0
>   *   0x: Clear depth to 0.0
>   */
> @@ -3489,7 +3489,7 @@ static void radv_handle_depth_image_transition(struct 
> radv_cmd_buffer *cmd_buffe
> radv_initialize_htile(cmd_buffer, image, range, 0);
> } else if (!radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
>radv_layout_is_htile_compressed(image, dst_layout, 
> dst_queue_mask)) {
> -   uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0x30f : 0xf;
> +   uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0xf30f : 0xfffc000f;
> radv_initialize_htile(cmd_buffer, image, range, clear_value);
> } else if (radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
>!radv_layout_is_htile_compressed(image, dst_layout, 
> dst_queue_mask)) {
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Emil Velikov
Hi Jordan,

On 22 February 2018 at 19:59, Jordan Justen  wrote:
> The QT framework has a bug in their shader program cache, which is
> built on GL_ARB_get_program_binary.
>
> In an effort to allow them to fix the bug we don't enable more than 1
> binary format for compatibility profiles.
>
Doing NumProgramBinaryFormats = 0; does not disable the extension.
So either the commit summary is off, or you'd want to actually disable
the extension. Latter of which could be achieved by dropping the GLC
instance in src/mesa/main/extensions_table.h.
Note: i'm not 100% sure on that last one.

Note that NumProgramBinaryFormats also impacts the ES2 version of the
extension OES_get_program_binary.
Hence, I'd use explicit if (ctx->API == API_OPENGL_COMPAT) checks.

> This is only being done on the 18.0 release branch.
>
Perhaps tad silly question: why do we want this only for the 18.0
series and not in master?

> Ref: https://bugreports.qt.io/browse/QTBUG-66420
> Ref: https://bugs.freedesktop.org/show_bug.cgi?id=105065
> Cc: "18.0" 
> Cc: Mark Janes 
> Cc: Kenneth Graunke 
> Cc: Scott D Phillips 
> Cc: Ilia Mirkin 
> Signed-off-by: Jordan Justen 
> [imir...@alum.mit.edu: Added st_context.c (gallium) change]
> ---
>  docs/relnotes/17.4.0.html   | 2 +-
>  src/mesa/drivers/dri/i965/brw_context.c | 9 -
>  src/mesa/state_tracker/st_context.c | 9 +
>  3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
> index 412c0fc455e..fecdfe77969 100644
> --- a/docs/relnotes/17.4.0.html
> +++ b/docs/relnotes/17.4.0.html
> @@ -53,7 +53,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  GL_ARB_enhanced_layouts on r600/evergreen+
>  GL_ARB_bindless_texture on nvc0/kepler
>  OpenGL 4.3 on r600/evergreen with hw fp64 support
> -Support 1 binary format for GL_ARB_get_program_binary on i965
> +Support 1 binary format for GL_ARB_get_program_binary on i965 (except in 
> GL compatibility profiles)
>  
>
>  Bug fixes
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index e9358b7bc9c..58527d77263 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -704,7 +704,14 @@ brw_initialize_context_constants(struct brw_context *brw)
>ctx->Const.AllowMappedBuffersDuringExecution = true;
>
> /* GL_ARB_get_program_binary */
> -   ctx->Const.NumProgramBinaryFormats = 1;
> +   /* The QT framework has a bug in their shader program cache, which is 
> built
> +* on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
> +* we don't enable more than 1 binary format for compatibility profiles.
> +* This is only being done on the 18.0 release branch.
> +*/
> +   if (ctx->API != API_OPENGL_COMPAT) {
> +  ctx->Const.NumProgramBinaryFormats = 1;
> +   }
>  }
>
>  static void
> diff --git a/src/mesa/state_tracker/st_context.c 
> b/src/mesa/state_tracker/st_context.c
> index d3e7d3fb7fa..4c14245a38f 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -518,6 +518,15 @@ st_create_context_priv(struct gl_context *ctx, struct 
> pipe_context *pipe,
>   ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
> }
>
> +   /* The QT framework has a bug in their shader program cache, which is 
> built
> +* on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
> +* we don't enable binary formats for compatibility profiles.
> +* This is only being done on the 18.0 release branch.
> +*/
> +   if (ctx->API == API_OPENGL_COMPAT) {
> +  ctx->Const.NumProgramBinaryFormats = 0;
> +   }
> +
Please move this alike the i965 instance -> adjust the constant within
st_init_limits.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/allocator: Unconditionally call futex_wake

2018-02-22 Thread Jason Ekstrand
There is a potential race between the __sync_fetch_and_add and the
futex_wake where another thread could come in and start waiting.  If we
hit this case, the other thread will never get woken back up because the
futex_wake doesn't get called.  We can fix this by calling futex_wake
unconditionally.

There was another potential bug because __sync_fetch_and_add does not
guarantee that previous writes are globally visible.  In particular, the
updates to the pool caused by growing it may not be visible.  If memory
writes from the growing thread happen out-of-order, this could cause a
waiting thread to come in and try to pull a block before the grow has
completed.  Now that we are no longer predicating the futex_wake, we no
longer need the result of the __sync_fetch_and_add and it can be
replaced with a __sync_synchronize and a regular 64-bit write.
---
 src/intel/vulkan/anv_allocator.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index fe14d6c..a64ebd0 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -546,7 +546,7 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
  struct anv_block_state *pool_state,
  uint32_t block_size)
 {
-   struct anv_block_state state, old, new;
+   struct anv_block_state state, new;
 
while (1) {
   state.u64 = __sync_fetch_and_add(_state->u64, block_size);
@@ -564,9 +564,9 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
 new.end = anv_block_pool_grow(pool, pool_state);
  } while (new.end < new.next);
 
- old.u64 = __sync_lock_test_and_set(_state->u64, new.u64);
- if (old.next != state.next)
-futex_wake(_state->end, INT_MAX);
+ __sync_synchronize();
+ pool_state->u64 = new.u64;
+ futex_wake(_state->end, INT_MAX);
  return state.next;
   } else {
  futex_wait(_state->end, state.end, NULL);
@@ -645,7 +645,7 @@ anv_fixed_size_state_pool_alloc_new(struct 
anv_fixed_size_state_pool *pool,
 uint32_t state_size,
 uint32_t block_size)
 {
-   struct anv_block_state block, old, new;
+   struct anv_block_state block, new;
uint32_t offset;
 
/* If our state is large, we don't need any sub-allocation from a block.
@@ -663,9 +663,10 @@ anv_fixed_size_state_pool_alloc_new(struct 
anv_fixed_size_state_pool *pool,
   offset = anv_block_pool_alloc(block_pool, block_size);
   new.next = offset + state_size;
   new.end = offset + block_size;
-  old.u64 = __sync_lock_test_and_set(>block.u64, new.u64);
-  if (old.next != block.next)
- futex_wake(>block.end, INT_MAX);
+
+  __sync_synchronize();
+  pool->block.u64 = new.u64;
+  futex_wake(>block.end, INT_MAX);
   return offset;
} else {
   futex_wait(>block.end, block.end, NULL);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105207] The Talos Principle freezes system using radv

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105207

--- Comment #3 from pritzl3...@gmail.com ---
Created attachment 137546
  --> https://bugs.freedesktop.org/attachment.cgi?id=137546=edit
tracefile

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105207] The Talos Principle freezes system using radv

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105207

--- Comment #2 from pritzl3...@gmail.com ---
I'm not sure for how long this has been happening. Definitely for as long as I
had my Vega, so at least since the middle of october.
I almost always run the latest rc kernels and used to use Mesa from git but
have been using 17.3 and now 18.0-rcs lately.

When I add RADV_TRACE_FILE=/home/erik/temp/tracefile
RADV_DEBUG=allbos,syncshaders %command% starting the game freezes the desktop.
The screen turns black as the game starts but nothing more happens.

I can ssh in and see this in dmesg:

[  142.816155] [drm:amdgpu_job_timedout] *ERROR* ring comp_1.1.0 timeout, last
signaled seq=2, last emitted seq=3
[  142.816162] [drm] No hardware hang detected. Did some blocks stall?

I dont know if the tracefile is useful in this case but I'm attaching it to the
bug.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104654] r600/sb: Alien Isolation GPU lock

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104654

--- Comment #8 from Darius Spitznagel  ---
Maybe this helps.

Every "older" feral port has it's own "shader warmer".

Disable it in the prefereces file in
~/.local/share/feral-interactive/AlienIsolation like below...

0

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/13] mesa: Use atomics for shared VAO reference counts.

2018-02-22 Thread Brian Paul

On 02/21/2018 10:45 PM, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

VAOs will be used in the next change as immutable object across multiple
contexts. Only reference counting may write concurrently on the VAO. So,
make the reference count thread safe for those and only those VAO objects.

Signed-off-by: Mathias Fröhlich 
---
  src/mesa/main/arrayobj.c | 35 ++-
  src/mesa/main/arrayobj.h |  9 +
  src/mesa/main/mtypes.h   |  7 +++
  3 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index cf9c5d7ecc..063856d984 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -53,6 +53,7 @@
  #include "varray.h"
  #include "main/dispatch.h"
  #include "util/bitscan.h"
+#include "util/u_atomic.h"
  
  
  const GLubyte

@@ -331,10 +332,16 @@ _mesa_reference_vao_(struct gl_context *ctx,
/* Unreference the old array object */
struct gl_vertex_array_object *oldObj = *ptr;
  
-  assert(oldObj->RefCount > 0);

-  oldObj->RefCount--;
+  bool deleteFlag;
+  if (oldObj->SharedAndImmutable) {
+ deleteFlag = p_atomic_dec_zero(>RefCount);
+  } else {
+ assert(oldObj->RefCount > 0);
+ oldObj->RefCount--;
+ deleteFlag = (oldObj->RefCount == 0);
+  }
  
-  if (oldObj->RefCount == 0)

+  if (deleteFlag)
   _mesa_delete_vao(ctx, oldObj);
  
*ptr = NULL;

@@ -343,9 +350,13 @@ _mesa_reference_vao_(struct gl_context *ctx,
  
 if (vao) {

/* reference new array object */
-  assert(vao->RefCount > 0);
+  if (vao->SharedAndImmutable) {
+ p_atomic_inc(>RefCount);
+  } else {
+ assert(vao->RefCount > 0);
+ vao->RefCount++;
+  }
  
-  vao->RefCount++;

*ptr = vao;
 }
  }
@@ -407,6 +418,7 @@ _mesa_initialize_vao(struct gl_context *ctx,
 vao->Name = name;
  
 vao->RefCount = 1;

+   vao->SharedAndImmutable = GL_FALSE;
  
 /* Init the individual arrays */

 for (i = 0; i < ARRAY_SIZE(vao->VertexAttrib); i++) {
@@ -452,6 +464,9 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx,
  {
 GLbitfield arrays = vao->NewArrays;
  
+   /* Make sure we do not run into problems with shared objects */

+   assert(!vao->SharedAndImmutable || vao->NewArrays == 0);
+
 while (arrays) {
const int attrib = u_bit_scan();
struct gl_vertex_array *array = >_VertexArray[attrib];
@@ -465,6 +480,16 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx,
  }
  
  
+void

+_mesa_set_vao_immutable(struct gl_context *ctx,
+struct gl_vertex_array_object *vao)
+{
+   _mesa_update_vao_derived_arrays(ctx, vao);
+   vao->NewArrays = 0;
+   vao->SharedAndImmutable = GL_TRUE;
+}
+
+
  bool
  _mesa_all_varyings_in_vbos(const struct gl_vertex_array_object *vao)
  {
diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h
index 5de74505bb..8da5c9ffe0 100644
--- a/src/mesa/main/arrayobj.h
+++ b/src/mesa/main/arrayobj.h
@@ -81,6 +81,15 @@ extern void
  _mesa_update_vao_derived_arrays(struct gl_context *ctx,
  struct gl_vertex_array_object *vao);
  
+

+/**
+ * Mark the vao as shared and immutable, do remaining updates.
+ */
+extern void
+_mesa_set_vao_immutable(struct gl_context *ctx,
+struct gl_vertex_array_object *vao);
+
+
  /* Returns true if all varying arrays reside in vbos */
  extern bool
  _mesa_all_varyings_in_vbos(const struct gl_vertex_array_object *vao);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index bdecd422a9..9965cf8447 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1607,6 +1607,13 @@ struct gl_vertex_array_object
  */
 GLboolean EverBound;
  
+   /**

+* Marked to true if the object is shared between contexts and immutable.
+* Then reference counting is done using atomics and thread safe.
+* Is used for dlist VAOs.
+*/
+   GLboolean SharedAndImmutable;


We could actually use bool/true/false for this field.  It doesn't 
correspond to any public GL state.


-Brian


+
 /**
  * Derived vertex attribute arrays
  *



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/13] mesa: Introduce a yet unused _DrawVAO.

2018-02-22 Thread Brian Paul
Just a few little nit-picks.  With those fixed, the series is: 
Reviewed-by: Brian Paul 


Nice work!

On 02/21/2018 10:45 PM, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

During the patch series this VAO gets populated with either the currently
bound VAO or an internal VAO that will be used for immediate mode and
dlist rendering.

v2: More comments about the _DrawVAO, filter and enabled mask.
 Rename _DrawVAOEnabled to _DrawVAOEnabledAttribs.

Signed-off-by: Mathias Fröhlich 
---
  src/mesa/main/arrayobj.c |  4 
  src/mesa/main/attrib.c   |  2 ++
  src/mesa/main/context.c  |  2 ++
  src/mesa/main/mtypes.h   | 22 ++
  src/mesa/main/state.c| 21 +
  src/mesa/main/state.h| 13 +
  src/mesa/main/varray.c   |  2 ++
  7 files changed, 66 insertions(+)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index a6fa33c82c..cf9c5d7ecc 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -49,6 +49,7 @@
  #include "arrayobj.h"
  #include "macros.h"
  #include "mtypes.h"
+#include "state.h"
  #include "varray.h"
  #include "main/dispatch.h"
  #include "util/bitscan.h"
@@ -578,6 +579,7 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, bool 
no_error)
  * deleted.
  */
 _mesa_set_drawing_arrays(ctx, NULL);
+   _mesa_set_draw_vao(ctx, ctx->Array._EmptyVAO, 0);
  
 ctx->NewState |= _NEW_ARRAY;

 _mesa_reference_vao(ctx, >Array.VAO, newObj);
@@ -629,6 +631,8 @@ delete_vertex_arrays(struct gl_context *ctx, GLsizei n, 
const GLuint *ids)
  
   if (ctx->Array.LastLookedUpVAO == obj)

  _mesa_reference_vao(ctx, >Array.LastLookedUpVAO, NULL);
+ if (ctx->Array._DrawVAO == obj)
+_mesa_set_draw_vao(ctx, ctx->Array._EmptyVAO, 0);
  
   /* Unreference the array object.

* If refcount hits zero, the object will be deleted.
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 398ff653b7..dd6b98ce04 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -57,6 +57,7 @@
  #include "viewport.h"
  #include "mtypes.h"
  #include "main/dispatch.h"
+#include "state.h"
  #include "hash.h"
  #include 
  
@@ -1548,6 +1549,7 @@ copy_array_attrib(struct gl_context *ctx,
  
 /* Invalidate array state. It will be updated during the next draw. */

 _mesa_set_drawing_arrays(ctx, NULL);
+   _mesa_set_draw_vao(ctx, ctx->Array._EmptyVAO, 0);
  }
  
  /**

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 0aa2e3639f..e13343b5e6 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1335,6 +1335,8 @@ _mesa_free_context_data( struct gl_context *ctx )
  
 _mesa_reference_vao(ctx, >Array.VAO, NULL);

 _mesa_reference_vao(ctx, >Array.DefaultVAO, NULL);
+   _mesa_reference_vao(ctx, >Array._EmptyVAO, NULL);
+   _mesa_reference_vao(ctx, >Array._DrawVAO, NULL);
  
 _mesa_free_attrib_data(ctx);

 _mesa_free_buffer_objects(ctx);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 41df04d38d..bdecd422a9 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1691,6 +1691,28 @@ struct gl_array_attrib
 /* GL_ARB_vertex_buffer_object */
 struct gl_buffer_object *ArrayBufferObj;
  
+   /**

+* Vertex array object that is used with the currently active draw command.
+* The _DrawVAO is either set to the currently bound VAO for array type
+* draws or to internal VAO's set up by the vbo module to execute immediate
+* mode or display list draws.
+*/
+   struct gl_vertex_array_object *_DrawVAO;
+   /**
+* The VERT_BIT_* bits effectively enabled from the current _DrawVAO.
+* This is always a subset of _mesa_get_vao_vp_inputs(_DrawVAO)
+* but may omit those arrays that shall not be referenced by the current
+* gl_vertex_program_state::_VPMode. For example the generic attributes are
+* maked out form the _DrawVAO's enabled arrays when a fixed function
+* array draw is executed.
+*/
+   GLbitfield _DrawVAOEnabledAttribs;
+   /**
+* Initially or if the VAO referenced by _DrawVAO is deleted the _DrawVAO
+* pointer is set to the _EmptyVAO which is just an empty VAO all the time.
+*/
+   struct gl_vertex_array_object *_EmptyVAO;
+
 /**
  * Vertex arrays as consumed by a driver.
  * The array pointer is set up only by the VBO module.
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 2fd4fb9d32..5b1b3322e2 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -479,3 +479,24 @@ _mesa_update_vertex_processing_mode(struct gl_context *ctx)
 else
ctx->VertexProgram._VPMode = VP_MODE_FF;
  }
+
+
+void
+_mesa_set_draw_vao(struct gl_context *ctx, struct gl_vertex_array_object *vao,
+   GLbitfield filter)
+{
+   struct gl_vertex_array_object **ptr = >Array._DrawVAO;
+   if (*ptr != 

Re: [Mesa-dev] [PATCH 08/13] mesa: Make _mesa_vertex_attrib_binding public.

2018-02-22 Thread Brian Paul

On 02/21/2018 10:45 PM, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

Change vertex_attrib_binding() to _mesa_vertex_attrib_binding(), add a
flush_vertices argument, and make it publically available.


"publicly"

-Brian


The function will be needed later in the series.

Signed-off-by: Mathias Fröhlich 
---
  src/mesa/main/varray.c | 38 --
  src/mesa/main/varray.h |  7 +++
  2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 90b874aa49..f7d32fdbef 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -155,11 +155,11 @@ update_attribute_map_mode(const struct gl_context *ctx,
   * Sets the BufferBindingIndex field for the vertex attribute given by
   * attribIndex.
   */
-static void
-vertex_attrib_binding(struct gl_context *ctx,
-  struct gl_vertex_array_object *vao,
-  gl_vert_attrib attribIndex,
-  GLuint bindingIndex)
+void
+_mesa_vertex_attrib_binding(struct gl_context *ctx,
+struct gl_vertex_array_object *vao,
+gl_vert_attrib attribIndex,
+GLuint bindingIndex, bool flush_vertices)
  {
 struct gl_array_attributes *array = >VertexAttrib[attribIndex];
  
@@ -171,7 +171,9 @@ vertex_attrib_binding(struct gl_context *ctx,

else
   vao->VertexAttribBufferMask &= ~array_bit;
  
-  FLUSH_VERTICES(ctx, _NEW_ARRAY);

+  if (flush_vertices) {
+ FLUSH_VERTICES(ctx, _NEW_ARRAY);
+  }
  
vao->BufferBinding[array->BufferBindingIndex]._BoundArrays &= ~array_bit;

vao->BufferBinding[bindingIndex]._BoundArrays |= array_bit;
@@ -592,7 +594,7 @@ update_array(struct gl_context *ctx,
   normalized, integer, doubles, 0);
  
 /* Reset the vertex attrib binding */

-   vertex_attrib_binding(ctx, vao, attrib, attrib);
+   _mesa_vertex_attrib_binding(ctx, vao, attrib, attrib, true);
  
 /* The Stride and Ptr fields are not set by update_array_format() */

 struct gl_array_attributes *array = >VertexAttrib[attrib];
@@ -2030,7 +2032,7 @@ _mesa_VertexAttribDivisor_no_error(GLuint index, GLuint 
divisor)
  *   VertexAttribBinding(index, index);
  *   VertexBindingDivisor(index, divisor);"
  */
-   vertex_attrib_binding(ctx, vao, genericIndex, genericIndex);
+   _mesa_vertex_attrib_binding(ctx, vao, genericIndex, genericIndex, true);
 vertex_binding_divisor(ctx, vao, genericIndex, divisor);
  }
  
@@ -2072,7 +2074,7 @@ _mesa_VertexAttribDivisor(GLuint index, GLuint divisor)

  *   VertexAttribBinding(index, index);
  *   VertexBindingDivisor(index, divisor);"
  */
-   vertex_attrib_binding(ctx, vao, genericIndex, genericIndex);
+   _mesa_vertex_attrib_binding(ctx, vao, genericIndex, genericIndex, true);
 vertex_binding_divisor(ctx, vao, genericIndex, divisor);
  }
  
@@ -2674,9 +2676,9 @@ vertex_array_attrib_binding(struct gl_context *ctx,
  
 assert(VERT_ATTRIB_GENERIC(attribIndex) < ARRAY_SIZE(vao->VertexAttrib));
  
-   vertex_attrib_binding(ctx, vao,

- VERT_ATTRIB_GENERIC(attribIndex),
- VERT_ATTRIB_GENERIC(bindingIndex));
+   _mesa_vertex_attrib_binding(ctx, vao,
+   VERT_ATTRIB_GENERIC(attribIndex),
+   VERT_ATTRIB_GENERIC(bindingIndex), true);
  }
  
  
@@ -2684,9 +2686,9 @@ void GLAPIENTRY

  _mesa_VertexAttribBinding_no_error(GLuint attribIndex, GLuint bindingIndex)
  {
 GET_CURRENT_CONTEXT(ctx);
-   vertex_attrib_binding(ctx, ctx->Array.VAO,
- VERT_ATTRIB_GENERIC(attribIndex),
- VERT_ATTRIB_GENERIC(bindingIndex));
+   _mesa_vertex_attrib_binding(ctx, ctx->Array.VAO,
+   VERT_ATTRIB_GENERIC(attribIndex),
+   VERT_ATTRIB_GENERIC(bindingIndex), true);
  }
  
  
@@ -2720,9 +2722,9 @@ _mesa_VertexArrayAttribBinding_no_error(GLuint vaobj, GLuint attribIndex,

 GET_CURRENT_CONTEXT(ctx);
  
 struct gl_vertex_array_object *vao = _mesa_lookup_vao(ctx, vaobj);

-   vertex_attrib_binding(ctx, vao,
- VERT_ATTRIB_GENERIC(attribIndex),
- VERT_ATTRIB_GENERIC(bindingIndex));
+   _mesa_vertex_attrib_binding(ctx, vao,
+   VERT_ATTRIB_GENERIC(attribIndex),
+   VERT_ATTRIB_GENERIC(bindingIndex), true);
  }
  
  
diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h

index 93ffb37a0d..6585355771 100644
--- a/src/mesa/main/varray.h
+++ b/src/mesa/main/varray.h
@@ -120,6 +120,13 @@ _mesa_disable_vertex_array_attrib(struct gl_context *ctx,
gl_vert_attrib attrib, bool flush_vertices);
  
  
+extern void


Re: [Mesa-dev] [PATCH] radv: Really use correct HTILE expanded words.

2018-02-22 Thread Bas Nieuwenhuizen
yeah, looks like I may have lifted the values from a driver which set
the ZRANGE_PRECISION to 0, but this at least mirrors PAL and fixes one
of the remaining transition issues there that I was aware of.

Reviewed-by: Bas Nieuwenhuizen 

since IIRC the last change was also done due to Feral noticing and we
are clearly lacking testcases in this area, can you check that that
case still works for you? Thanks a lot!



On Thu, Feb 22, 2018 at 5:57 PM, James Legg  wrote:
> When transitioning to an htile compressed depth format, Set the full
> depth range, so later rasterization can pass HiZ. Previously, for depth
> only formats, the depth range was set to 0 to 0. This caused unwanted
> HiZ rejections with a VK_FORMAT_D16_UNORM depth buffer
> (VK_FORMAT_D32_SFLOAT was not affected somehow).
>
> These values are derived from PAL [0], since I can't find the
> specification describing the htile values.
>
> Fixes 5158603182fe7435: radv: Use correct HTILE expanded words.
>
> [0] 
> https://github.com/GPUOpen-Drivers/pal/blob/5cba4ecbda9452773f59692f5915301e7db4a183/src/core/hw/gfxip/gfx9/gfx9MaskRam.cpp#L1500
>
> CC: Dave Airlie 
> CC: Bas Nieuwenhuizen 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 8a384b114c..2b41baea3d 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -3440,8 +3440,8 @@ void radv_CmdEndRenderPass(
>
>  /*
>   * For HTILE we have the following interesting clear words:
> - *   0x030f: Uncompressed for depth+stencil HTILE.
> - *   0x000f: Uncompressed for depth only HTILE.
> + *   0xf30f: Uncompressed, full depth range, for depth+stencil HTILE
> + *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
>   *   0xfff0: Clear depth to 1.0
>   *   0x: Clear depth to 0.0
>   */
> @@ -3489,7 +3489,7 @@ static void radv_handle_depth_image_transition(struct 
> radv_cmd_buffer *cmd_buffe
> radv_initialize_htile(cmd_buffer, image, range, 0);
> } else if (!radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
>radv_layout_is_htile_compressed(image, dst_layout, 
> dst_queue_mask)) {
> -   uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0x30f : 0xf;
> +   uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0xf30f : 0xfffc000f;
> radv_initialize_htile(cmd_buffer, image, range, clear_value);
> } else if (radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
>!radv_layout_is_htile_compressed(image, dst_layout, 
> dst_queue_mask)) {
> --
> 2.14.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glxinfo/wglinfo: print list of 4.3 shading language versions

2018-02-22 Thread Andreas Boll
Reviewed-by: Andreas Boll 

2018-02-21 21:09 GMT+01:00 Brian Paul :
> v2: fix indentation, prefix with "4.3:" like other sections.
> ---
>  src/xdemos/glinfo_common.c | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/src/xdemos/glinfo_common.c b/src/xdemos/glinfo_common.c
> index efa96e6..3668026 100644
> --- a/src/xdemos/glinfo_common.c
> +++ b/src/xdemos/glinfo_common.c
> @@ -795,6 +795,20 @@ print_limits(const char *extensions, const char 
> *oglstring, int version,
>}
>free(formats);
> }
> +
> +#if defined(GL_VERSION_4_3)
> +   if (version >= 43) {
> +  GLint i, n = 0;
> +  printf("  4.3:\n");
> +  glGetIntegerv(GL_NUM_SHADING_LANGUAGE_VERSIONS, );
> +  printf("GL_NUM_SHADING_LANGUAGE_VERSIONS = %d\n", n);
> +  for (i = 0; i < n; i++) {
> + printf("  %s\n", (const char *)
> +extfuncs->GetStringi(GL_SHADING_LANGUAGE_VERSION, i));
> +  }
> +   }
> +#endif
> +
>  #if defined(GL_ARB_vertex_program)
> if (extension_supported("GL_ARB_vertex_program", extensions)) {
>print_program_limits(GL_VERTEX_PROGRAM_ARB, extfuncs);
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glxinfo/wglinfo: fix indentation for listing compression formats

2018-02-22 Thread Andreas Boll
Reviewed-by: Andreas Boll 

2018-02-21 21:09 GMT+01:00 Brian Paul :
> And prefix with "GL_ARB_texture_compression:" like other sections.
> ---
>  src/xdemos/glinfo_common.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/xdemos/glinfo_common.c b/src/xdemos/glinfo_common.c
> index 0411891..efa96e6 100644
> --- a/src/xdemos/glinfo_common.c
> +++ b/src/xdemos/glinfo_common.c
> @@ -785,12 +785,13 @@ print_limits(const char *extensions, const char 
> *oglstring, int version,
> if (extension_supported("GL_ARB_texture_compression", extensions)) {
>GLint i, n;
>GLint *formats;
> +  printf("  GL_ARB_texture_compression:\n");
>glGetIntegerv(GL_NUM_COMPRESSED_TEXTURE_FORMATS, );
>printf("GL_NUM_COMPRESSED_TEXTURE_FORMATS = %d\n", n);
>formats = (GLint *) malloc(n * sizeof(GLint));
>glGetIntegerv(GL_COMPRESSED_TEXTURE_FORMATS, formats);
>for (i = 0; i < n; i++) {
> - printf("%s\n", enum_name(formats[i]));
> + printf("  %s\n", enum_name(formats[i]));
>}
>free(formats);
> }
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Scott D Phillips
Jordan Justen  writes:

> The QT framework has a bug in their shader program cache, which is
> built on GL_ARB_get_program_binary.
>
> In an effort to allow them to fix the bug we don't enable more than 1
> binary format for compatibility profiles.
>
> This is only being done on the 18.0 release branch.
>
> Ref: https://bugreports.qt.io/browse/QTBUG-66420
> Ref: https://bugs.freedesktop.org/show_bug.cgi?id=105065
> Cc: "18.0" 
> Cc: Mark Janes 
> Cc: Kenneth Graunke 
> Cc: Scott D Phillips 
> Cc: Ilia Mirkin 
> Signed-off-by: Jordan Justen 
> [imir...@alum.mit.edu: Added st_context.c (gallium) change]

for the i965 part:

Tested-by: Scott D Phillips 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2018-02-22 Thread Alex Deucher
On Thu, Feb 22, 2018 at 1:49 PM, Bas Nieuwenhuizen
 wrote:
> On Thu, Feb 22, 2018 at 7:04 PM, Kristian Høgsberg  
> wrote:
>> On Wed, Feb 21, 2018 at 4:00 PM Alex Deucher  wrote:
>>
>>> On Wed, Feb 21, 2018 at 1:14 AM, Chad Versace 
>> wrote:
>>> > On Thu 21 Dec 2017, Daniel Vetter wrote:
>>> >> On Thu, Dec 21, 2017 at 12:22 AM, Kristian Kristensen <
>> hoegsb...@google.com> wrote:
>>> >>> On Wed, Dec 20, 2017 at 12:41 PM, Miguel Angel Vico <
>> mvicom...@nvidia.com> wrote:
>>>  On Wed, 20 Dec 2017 11:54:10 -0800 Kristian Høgsberg <
>> hoegsb...@gmail.com> wrote:
>>> > I'd like to see concrete examples of actual display controllers
>>> > supporting more format layouts than what can be specified with a 64
>>> > bit modifier.
>>> 
>>>  The main problem is our tiling and other metadata parameters can't
>>>  generally fit in a modifier, so we find passing a blob of metadata a
>>>  more suitable mechanism.
>>> >>>
>>> >>> I understand that you may have n knobs with a total of more than a
>> total of
>>> >>> 56 bits that configure your tiling/swizzling for color buffers. What
>> I don't
>>> >>> buy is that you need all those combinations when passing buffers
>> around
>>> >>> between codecs, cameras and display controllers. Even if you're
>> sharing
>>> >>> between the same 3D drivers in different processes, I expect just
>> locking
>>> >>> down, say, 64 different combinations (you can add more over time) and
>>> >>> assigning each a modifier would be sufficient. I doubt you'd extract
>>> >>> meaningful performance gains from going all the way to a blob.
>>> >
>>> > I agree with Kristian above. In my opinion, choosing to encode in
>>> > modifiers a precise description of every possible tiling/compression
>>> > layout is not technically incorrect, but I believe it misses the point.
>>> > The intention behind modifiers is not to exhaustively describe all
>>> > possibilites.
>>> >
>>> > I summarized this opinion in VK_EXT_image_drm_format_modifier,
>>> > where I wrote an "introdution to modifiers" section. Here's an excerpt:
>>> >
>>> > One goal of modifiers in the Linux ecosystem is to enumerate for
>> each
>>> > vendor a reasonably sized set of tiling formats that are
>> appropriate for
>>> > images shared across processes, APIs, and/or devices, where each
>>> > participating component may possibly be from different vendors.
>>> > A non-goal is to enumerate all tiling formats supported by all
>> vendors.
>>> > Some tiling formats used internally by vendors are inappropriate for
>>> > sharing; no modifiers should be assigned to such tiling formats.
>>
>>> Where it gets tricky is how to select that subset?  Our tiling mode
>>> are defined more by the asic specific constraints than the tiling mode
>>> itself.  At a high level we have basically 3 tiling modes (out of 16
>>> possible) that would be the minimum we'd want to expose for gfx6-8.
>>> gfx9 uses a completely new scheme.
>>> 1. Linear (per asic stride requirements, not usable by many hw blocks)
>>> 2. 1D Thin (5 layouts, displayable, depth, thin, rotated, thick)
>>> 3. 2D Thin (1D tiling constraints, plus pipe config (18 possible),
>>> tile split (7 possible), sample split (4 possible), num banks (4
>>> possible), bank width (4 possible), bank height (4 possible), macro
>>> tile aspect (4 possible) all of which are asic config specific)
>>
>>> I guess we could do something like:
>>> AMD_GFX6_LINEAR_ALIGNED_64B
>>> AMD_GFX6_LINEAR_ALIGNED_256B
>>> AMD_GFX6_LINEAR_ALIGNED_512B
>>> AMD_GFX6_1D_THIN_DISPLAY
>>> AMD_GFX6_1D_THIN_DEPTH
>>> AMD_GFX6_1D_THIN_ROTATED
>>> AMD_GFX6_1D_THIN_THIN
>>> AMD_GFX6_1D_THIN_THICK
>>
>> AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>>
>> 

[Mesa-dev] [Bug 105207] The Talos Principle freezes system using radv

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105207

--- Comment #1 from Bas Nieuwenhuizen  ---
Curious, I haven't noticed that and that sounds really similar to my
benchmarking setup.

Not being able to reboot is a generic amdgpu problem after a hang.

It happening for so long as you can remember, how long are we talking about?
What kernels have you used before where you had the issue and what mesa
versions?

What could be useful is starting Talos with

RADV_TRACE_FILE=/some/file/name RADV_DEBUG=allbos,syncshaders

You can do this for example by right clicking on the game name in the list ->
properties -> set launch options ->

RADV_TRACE_FILE=/some/file/name RADV_DEBUG=allbos,syncshaders %command%

Given the nature of the issue, it is possible we crash after the hang while
trying to produce the trace file. In that case a stacktrace would be useful to
narrow down which packet we define illegally (though may need debug symbols to
be useful).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104654] r600/sb: Alien Isolation GPU lock

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104654

--- Comment #7 from russianneuroman...@ya.ru ---
> but apparently they are just compiled but not run, because the scene shows 
> visible artifacts that are not there when the compute shaders are enabled

Does it looks like on screenshot in bug 105213 by any chance?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4] anv/blorp: multisample resolve all attachment layers

2018-02-22 Thread Nanley Chery
On Thu, Feb 22, 2018 at 08:23:30AM +0100, Iago Toral wrote:
> On Wed, 2018-02-21 at 09:58 -0800, Nanley Chery wrote:
> > On Wed, Feb 21, 2018 at 09:18:49AM +0100, Iago Toral Quiroga wrote:
> > > We were only resolving the first.
> > > 
> > > v2:
> > >   - Do not require that the number of layers on dst and src are an
> > > exact match, it is okay if the dst has more layers so long as
> > > it has at least the same that we are going to resolve.
> > >   - Do not always resolve array_len layers, we should resolve
> > > only from base_array_layer to array_len.
> > > 
> > > v3:
> > >   - v2 was assuming that array_len represented the total number of
> > > layers in the image, but it represents the number of layers
> > > starting at the base array ayer.
> > > 
> > > v4:
> > >  - The number of layers to resolve should be taken from the
> > >framebuffer (Nanley).
> > > 
> > > Fixes new CTS tests for multisampled layered rendering:
> > > dEQP-VK.renderpass.multisample_resolve.layers_*
> > > ---
> > >  src/intel/vulkan/anv_blorp.c | 31 ---
> > >  1 file changed, 20 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/src/intel/vulkan/anv_blorp.c
> > > b/src/intel/vulkan/anv_blorp.c
> > > index bee51e0cdf..efa2ced7f2 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1329,25 +1329,34 @@ anv_cmd_buffer_resolve_subpass(struct
> > > anv_cmd_buffer *cmd_buffer)
> > >VK_IMAGE_ASPECT_COLOR_BIT,
> > >ANV_IMAGE_LAYOUT_EXPLICIT_AU
> > > X,
> > >dst_aux_usage, _surf);
> > > +
> > > + uint32_t base_src_layer = src_iview-
> > > >planes[0].isl.base_array_layer;
> > > + uint32_t base_dst_layer = dst_iview-
> > > >planes[0].isl.base_array_layer;
> > > +
> > 
> > I'm not sure we benefit from storing these fields in new variables,
> > but
> > it's not a big issue. 
> > With or without the new variables, this patch is
> > Reviewed-by: Nanley Chery 
> 
> Thanks Nanley.
> 
> I don't have a strong reason to keep the variables but I prefer to keep
> them for two reasons: without them the call to 
> anv_cmd_buffer_mark_image_written was stretching a bit over 80
> characters long and I also find it easier to read something like
> 'base_dst_layer' than something like 'dst_iview-
> >planes[0].isl.base_array_layer' when browsing the code.
> 

Oh, I wasn't aware of the 80-char limit being passed. Thanks for letting
me know.

-Nanley

> > 
> > > + assert(src_iview->planes[0].isl.array_len >= fb->layers);
> > > + assert(dst_iview->planes[0].isl.array_len >= fb->layers);
> > > +
> > >   anv_cmd_buffer_mark_image_written(cmd_buffer, dst_iview-
> > > >image,
> > > VK_IMAGE_ASPECT_COLOR_B
> > > IT,
> > > dst_surf.aux_usage,
> > > dst_iview-
> > > >planes[0].isl.base_level,
> > > -   dst_iview-
> > > >planes[0].isl.base_array_layer, 1);
> > > +   base_dst_layer, fb-
> > > >layers);
> > >  
> > >   assert(!src_iview->image->format->can_ycbcr);
> > >   assert(!dst_iview->image->format->can_ycbcr);
> > >  
> > > - resolve_surface(,
> > > - _surf,
> > > - src_iview->planes[0].isl.base_level,
> > > - src_iview-
> > > >planes[0].isl.base_array_layer,
> > > - _surf,
> > > - dst_iview->planes[0].isl.base_level,
> > > - dst_iview-
> > > >planes[0].isl.base_array_layer,
> > > - render_area.offset.x,
> > > render_area.offset.y,
> > > - render_area.offset.x,
> > > render_area.offset.y,
> > > - render_area.extent.width,
> > > render_area.extent.height);
> > > + for (uint32_t i = 0; i < fb->layers; i++) {
> > > +resolve_surface(,
> > > +_surf,
> > > +src_iview->planes[0].isl.base_level,
> > > +base_src_layer + i,
> > > +_surf,
> > > +dst_iview->planes[0].isl.base_level,
> > > +base_dst_layer + i,
> > > +render_area.offset.x,
> > > render_area.offset.y,
> > > +render_area.offset.x,
> > > render_area.offset.y,
> > > +render_area.extent.width,
> > > render_area.extent.height);
> > > + }
> > >}
> > >  
> > >blorp_batch_finish();
> > > -- 
> > > 2.14.1
> > > 
> > 
> > 
___
mesa-dev mailing list

[Mesa-dev] [PATCH 18.0 v2] i965, gallium: Disable ARB_get_program_binary for compat profiles

2018-02-22 Thread Jordan Justen
The QT framework has a bug in their shader program cache, which is
built on GL_ARB_get_program_binary.

In an effort to allow them to fix the bug we don't enable more than 1
binary format for compatibility profiles.

This is only being done on the 18.0 release branch.

Ref: https://bugreports.qt.io/browse/QTBUG-66420
Ref: https://bugs.freedesktop.org/show_bug.cgi?id=105065
Cc: "18.0" 
Cc: Mark Janes 
Cc: Kenneth Graunke 
Cc: Scott D Phillips 
Cc: Ilia Mirkin 
Signed-off-by: Jordan Justen 
[imir...@alum.mit.edu: Added st_context.c (gallium) change]
---
 docs/relnotes/17.4.0.html   | 2 +-
 src/mesa/drivers/dri/i965/brw_context.c | 9 -
 src/mesa/state_tracker/st_context.c | 9 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
index 412c0fc455e..fecdfe77969 100644
--- a/docs/relnotes/17.4.0.html
+++ b/docs/relnotes/17.4.0.html
@@ -53,7 +53,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_enhanced_layouts on r600/evergreen+
 GL_ARB_bindless_texture on nvc0/kepler
 OpenGL 4.3 on r600/evergreen with hw fp64 support
-Support 1 binary format for GL_ARB_get_program_binary on i965
+Support 1 binary format for GL_ARB_get_program_binary on i965 (except in 
GL compatibility profiles)
 
 
 Bug fixes
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index e9358b7bc9c..58527d77263 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -704,7 +704,14 @@ brw_initialize_context_constants(struct brw_context *brw)
   ctx->Const.AllowMappedBuffersDuringExecution = true;
 
/* GL_ARB_get_program_binary */
-   ctx->Const.NumProgramBinaryFormats = 1;
+   /* The QT framework has a bug in their shader program cache, which is built
+* on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
+* we don't enable more than 1 binary format for compatibility profiles.
+* This is only being done on the 18.0 release branch.
+*/
+   if (ctx->API != API_OPENGL_COMPAT) {
+  ctx->Const.NumProgramBinaryFormats = 1;
+   }
 }
 
 static void
diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index d3e7d3fb7fa..4c14245a38f 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -518,6 +518,15 @@ st_create_context_priv(struct gl_context *ctx, struct 
pipe_context *pipe,
  ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
}
 
+   /* The QT framework has a bug in their shader program cache, which is built
+* on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
+* we don't enable binary formats for compatibility profiles.
+* This is only being done on the 18.0 release branch.
+*/
+   if (ctx->API == API_OPENGL_COMPAT) {
+  ctx->Const.NumProgramBinaryFormats = 0;
+   }
+
/* Set which shader types can be compiled at link time. */
st->shader_has_one_variant[MESA_SHADER_VERTEX] =
  st->has_shareable_shaders &&
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #6992 completed

2018-02-22 Thread AppVeyor


Build mesa 6992 completed



Commit d6b7539206 by Samuel Pitoiset on 2/5/2018 2:51 PM:

ac/nir: remove emission of nir_op_fpow\n\nfpow is now lowered at NIR level.\n\nSigned-off-by: Samuel Pitoiset \nReviewed-by: Bas Nieuwenhuizen 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #6991 failed

2018-02-22 Thread AppVeyor



Build mesa 6991 failed


Commit 7aa008d1d7 by Samuel Pitoiset on 2/2/2018 6:04 PM:

radv: enable lowering of fpow to fexp2 and flog2\n\nThere is no fpow in hardware, so it's always lowered somewhere,\nbut it appears that lowering at NIR level is better. Figured while\ncomparing compute shaders between RadeonSI and RADV.\n\nPolaris10:\nTotals from affected shaders:\nSGPRS: 18936 -> 18904 (-0.17 %)\nVGPRS: 12240 -> 12220 (-0.16 %)\nSpilled SGPRs: 2809 -> 2809 (0.00 %)\nCode Size: 718116 -> 719848 (0.24 %) bytes\nMax Waves: 1409 -> 1410 (0.07 %)\n\nVega10:\nTotals from affected shaders:\nSGPRS: 18392 -> 18392 (0.00 %)\nVGPRS: 12008 -> 11920 (-0.73 %)\nSpilled SGPRs: 3001 -> 2981 (-0.67 %)\nCode Size: 777444 -> 778788 (0.17 %) bytes\nMax Waves: 1503 -> 1504 (0.07 %)\n\nSigned-off-by: Samuel Pitoiset \nReviewed-by: Bas Nieuwenhuizen 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] i965: perf: ensure reading config IDs from sysfs isn't interrupted

2018-02-22 Thread Emil Velikov
Hi Lionel,

On 9 February 2018 at 10:03, Lionel Landwerlin
 wrote:
> Fixes: 458468c136e "i965: Expose OA counters via INTEL_performance_query"
> Signed-off-by: Lionel Landwerlin 
> Cc: "18.0" 
> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
Just a friendly reminder that the patch hasn't landed in master yet.
AFAICT it was reviewed by Eric Engestrom.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] intel/compiler: Memory fence commit must always be enabled for gen10+

2018-02-22 Thread Emil Velikov
Hi Anuj,

On 7 February 2018 at 01:09, Anuj Phogat  wrote:
> Commit bit in the message descriptor (Bit 13) must be always set
> to true in CNL+ for memory fence messages. It also fixes a piglit
> GPU hang on cnl+ in simulation environment.
> Piglit test: arb_shader_image_load_store-shader-mem-barrier
> See HSD ES # 1404612949
>
> Signed-off-by: Anuj Phogat 
> Cc: mesa-sta...@lists.freedesktop.org

While patch 2/2 from the series was dropped/superseded, this one
doesn't seem to have landed in master.
Has it been superseded as well or simply fell through the cracks?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] anv: enable VK_EXT_shader_stencil_export

2018-02-22 Thread Gustavo Lima Chaves
v2:
An attempt to support SpvExecutionModeStencilRefReplacingEXT's behavior
also follows, with the interpretation to said mode being we prevent
writes to the built-in FragStencilRefEXT variable when the execution
mode isn't set.

v3:
A more cautious reading of 1db44252d01bf7539452ccc2b5210c74b8dcd573 led
me to a missing change that would stop (what I later discovered were)
GPU hangs on the CTS test written to exercize this.
---
 src/compiler/shader_info.h | 2 ++
 src/compiler/spirv/spirv_to_nir.c  | 4 
 src/compiler/spirv/vtn_variables.c | 4 
 src/intel/vulkan/anv_extensions.py | 2 ++
 src/intel/vulkan/anv_pipeline.c| 1 +
 src/intel/vulkan/genX_pipeline.c   | 1 +
 6 files changed, 14 insertions(+)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index 6de707f672..f99cbc27a7 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -162,6 +162,8 @@ typedef struct shader_info {
 
  bool pixel_center_integer;
 
+ bool outputs_stencil;
+
  /** gl_FragDepth layout for ARB_conservative_depth. */
  enum gl_frag_depth_layout depth_layout;
   } fs;
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index e00dcafa12..dcb8b31967 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3395,6 +3395,10 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct 
vtn_value *entry_point,
case SpvExecutionModeContractionOff:
   break; /* OpenCL */
 
+   case SpvExecutionModeStencilRefReplacingEXT:
+  b->shader->info.fs.outputs_stencil = true;
+  break;
+
default:
   vtn_fail("Unhandled execution mode");
}
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 36976798e9..42f915d434 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1373,6 +1373,10 @@ apply_var_decoration(struct vtn_builder *b, nir_variable 
*nir_var,
   case SpvBuiltInFragCoord:
  nir_var->data.pixel_center_integer = b->pixel_center_integer;
  break;
+  case SpvBuiltInFragStencilRefEXT:
+ if (!b->shader->info.fs.outputs_stencil)
+ nir_var->data.read_only = true;
+ break;
   default:
  break;
   }
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 581921e62a..cd90c6ae52 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -86,6 +86,8 @@ EXTENSIONS = [
 Extension('VK_KHX_multiview', 1, True),
 Extension('VK_EXT_debug_report',  8, True),
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
+Extension('VK_EXT_shader_stencil_export', 1,
+  'device->info.gen >= 9'),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e16a7a1994..ed63fa42cd 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -143,6 +143,7 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
  .multiview = true,
  .variable_pointers = true,
  .storage_16bit = device->instance->physicalDevice.info.gen >= 8,
+ .stencil_export = device->instance->physicalDevice.info.gen >= 9,
   },
};
 
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 89cbe293b8..683a4607e6 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1600,6 +1600,7 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
  ps.PixelShaderHasUAV = true;
 
 #if GEN_GEN >= 9
+  ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
   ps.PixelShaderPullsBary= wm_prog_data->pulls_bary;
   ps.InputCoverageMaskState  = wm_prog_data->uses_sample_mask ?
ICMS_INNER_CONSERVATIVE : ICMS_NONE;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2018-02-22 Thread Eric Anholt
Kristian Høgsberg  writes:

> On Wed, Feb 21, 2018 at 4:00 PM Alex Deucher  wrote:
>
>> On Wed, Feb 21, 2018 at 1:14 AM, Chad Versace 
> wrote:
>> > On Thu 21 Dec 2017, Daniel Vetter wrote:
>> >> On Thu, Dec 21, 2017 at 12:22 AM, Kristian Kristensen <
> hoegsb...@google.com> wrote:
>> >>> On Wed, Dec 20, 2017 at 12:41 PM, Miguel Angel Vico <
> mvicom...@nvidia.com> wrote:
>>  On Wed, 20 Dec 2017 11:54:10 -0800 Kristian Høgsberg <
> hoegsb...@gmail.com> wrote:
>> > I'd like to see concrete examples of actual display controllers
>> > supporting more format layouts than what can be specified with a 64
>> > bit modifier.
>> 
>>  The main problem is our tiling and other metadata parameters can't
>>  generally fit in a modifier, so we find passing a blob of metadata a
>>  more suitable mechanism.
>> >>>
>> >>> I understand that you may have n knobs with a total of more than a
> total of
>> >>> 56 bits that configure your tiling/swizzling for color buffers. What
> I don't
>> >>> buy is that you need all those combinations when passing buffers
> around
>> >>> between codecs, cameras and display controllers. Even if you're
> sharing
>> >>> between the same 3D drivers in different processes, I expect just
> locking
>> >>> down, say, 64 different combinations (you can add more over time) and
>> >>> assigning each a modifier would be sufficient. I doubt you'd extract
>> >>> meaningful performance gains from going all the way to a blob.
>> >
>> > I agree with Kristian above. In my opinion, choosing to encode in
>> > modifiers a precise description of every possible tiling/compression
>> > layout is not technically incorrect, but I believe it misses the point.
>> > The intention behind modifiers is not to exhaustively describe all
>> > possibilites.
>> >
>> > I summarized this opinion in VK_EXT_image_drm_format_modifier,
>> > where I wrote an "introdution to modifiers" section. Here's an excerpt:
>> >
>> > One goal of modifiers in the Linux ecosystem is to enumerate for
> each
>> > vendor a reasonably sized set of tiling formats that are
> appropriate for
>> > images shared across processes, APIs, and/or devices, where each
>> > participating component may possibly be from different vendors.
>> > A non-goal is to enumerate all tiling formats supported by all
> vendors.
>> > Some tiling formats used internally by vendors are inappropriate for
>> > sharing; no modifiers should be assigned to such tiling formats.
>
>> Where it gets tricky is how to select that subset?  Our tiling mode
>> are defined more by the asic specific constraints than the tiling mode
>> itself.  At a high level we have basically 3 tiling modes (out of 16
>> possible) that would be the minimum we'd want to expose for gfx6-8.
>> gfx9 uses a completely new scheme.
>> 1. Linear (per asic stride requirements, not usable by many hw blocks)
>> 2. 1D Thin (5 layouts, displayable, depth, thin, rotated, thick)
>> 3. 2D Thin (1D tiling constraints, plus pipe config (18 possible),
>> tile split (7 possible), sample split (4 possible), num banks (4
>> possible), bank width (4 possible), bank height (4 possible), macro
>> tile aspect (4 possible) all of which are asic config specific)
>
>> I guess we could do something like:
>> AMD_GFX6_LINEAR_ALIGNED_64B
>> AMD_GFX6_LINEAR_ALIGNED_256B
>> AMD_GFX6_LINEAR_ALIGNED_512B
>> AMD_GFX6_1D_THIN_DISPLAY
>> AMD_GFX6_1D_THIN_DEPTH
>> AMD_GFX6_1D_THIN_ROTATED
>> AMD_GFX6_1D_THIN_THIN
>> AMD_GFX6_1D_THIN_THICK
>
> AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>> etc.
>
>> We only probably need 40 bits 

Re: [Mesa-dev] [PATCH v3 13/21] configure.ac, meson: Check for SPIRV-Tools and llvm-spirv

2018-02-22 Thread Francisco Jerez
Pierre Moreau  writes:

>> do we already have an upstream version of both dependencies we could
>> just use? Or do we still need special branches?
>
> For both we can use the master branch of the upstream version. SPIRV-Tools
> validator is still not perfect, but it does already catch issues in OpenCL
> SPIR-V binaries. And llvm-spirv needs to be “rebased” as some commits went
> missing during the update process, but that is not going to impact the API
> used; we can absolutely wait for that to finish before merging the series.
>

Maybe it would make more sense to make these dependencies optional
(since it may take a while for distros to package them) and expose
cl_khr_il_program conditionally only if they are present?

> Pierre
>
>> On Wed, Feb 21, 2018 at 11:50 PM, Pierre Moreau  
>> wrote:
>> > Signed-off-by: Pierre Moreau 
>> > ---
>> >
>> > Notes:
>> > v3:
>> > * Bump the required version of SPIRV-Tools to the latest release;
>> > * Add a dependency on llvm-spirv.
>> >
>> >  configure.ac | 10 ++
>> >  meson.build  |  4 
>> >  2 files changed, 14 insertions(+)
>> >
>> > diff --git a/configure.ac b/configure.ac
>> > index 8a9172690a..1d393a5234 100644
>> > --- a/configure.ac
>> > +++ b/configure.ac
>> > @@ -2386,6 +2386,16 @@ AM_CONDITIONAL(HAVE_CLOVER_ICD, test 
>> > "x$enable_opencl_icd" = xyes)
>> >  AC_SUBST([OPENCL_LIBNAME])
>> >  AC_SUBST([CLANG_RESOURCE_DIR])
>> >
>> > +AS_IF([test "x$enable_opencl" = xyes], [
>> > +PKG_CHECK_MODULES([SPIRV_TOOLS], [SPIRV-Tools >= 2018.0])])
>> > +AC_SUBST([SPIRV_TOOLS_CFLAGS])
>> > +AC_SUBST([SPIRV_TOOLS_LIBS])
>> > +
>> > +AS_IF([test "x$enable_opencl" = xyes], [
>> > +PKG_CHECK_MODULES([LLVM_SPIRV], [llvm-spirv])])
>> > +AC_SUBST([LLVM_SPIRV_CFLAGS])
>> > +AC_SUBST([LLVM_SPIRV_LIBS])
>> > +
>> >  dnl
>> >  dnl Gallium configuration
>> >  dnl
>> > diff --git a/meson.build b/meson.build
>> > index 8cf67b8171..c67bd32d0f 100644
>> > --- a/meson.build
>> > +++ b/meson.build
>> > @@ -596,10 +596,14 @@ if _opencl != 'disabled'
>> >
>> ># TODO: alitvec?
>> >dep_clc = dependency('libclc')
>> > +  dep_spirv_tools = dependency('SPIRV-Tools', version : '>= 2018.0')
>> > +  dep_llvm_spirv = dependency('llvm-spirv')
>> >with_gallium_opencl = true
>> >with_opencl_icd = _opencl == 'icd'
>> >  else
>> >dep_clc = []
>> > +  dep_spirv_tools = []
>> > +  dep_llvm_spirv = []
>> >with_gallium_opencl = false
>> >with_gallium_icd = false
>> >  endif
>> > --
>> > 2.16.2
>> >


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 14/21] clover/llvm: Allow translating from SPIR-V to LLVM IR

2018-02-22 Thread Francisco Jerez
Pierre Moreau  writes:

> Signed-off-by: Pierre Moreau 
> ---
>  .../state_trackers/clover/llvm/invocation.cpp  | 26 
> ++
>  .../state_trackers/clover/llvm/invocation.hpp  |  4 
>  src/gallium/state_trackers/clover/meson.build  |  2 +-
>  3 files changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
> b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index e4ca5fa444..8fcf93eefd 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -24,12 +24,15 @@
>  // OTHER DEALINGS IN THE SOFTWARE.
>  //
>  
> +#include 
> +
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -301,3 +304,26 @@ clover::llvm::link_program(const std::vector 
> ,
>unreachable("Unsupported IR.");
> }
>  }
> +
> +module
> +clover::llvm::compile_from_spirv(const std::vector ,
> + const device ,
> + std::string _log) {
> +   auto ctx = create_context(r_log);
> +
> +   ::llvm::Module *unsafe_mod;
> +   std::string error_msg;
> +   std::stringstream input(std::ios_base::binary);
> +   std::copy(binary.begin(), binary.end(), 
> std::ostream_iterator(input, ""));

Why do you need to std::copy the bytecode into a bidirectional
stringstream?  Can't you use an std::istringstream initialized to the
binary contents instead?

> +   if (!::llvm::ReadSPIRV(*ctx, input, unsafe_mod, error_msg)) {
> +  r_log += "Failed to convert SPIR-V to LLVM IR: " + error_msg + ".\n";
> +  throw error(CL_INVALID_VALUE);
> +   }
> +
> +   std::unique_ptr<::llvm::Module> mod(unsafe_mod);
> +
> +   if (has_flag(debug::llvm))
> +  debug::log(".ll", print_module_bitcode(*mod));
> +
> +   return build_module_library(*mod, module::section::text_intermediate);
> +}
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.hpp 
> b/src/gallium/state_trackers/clover/llvm/invocation.hpp
> index 5b3530c382..17fa416136 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.hpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.hpp
> @@ -41,6 +41,10 @@ namespace clover {
>const std::string ,
>const std::string ,
>std::string _log);
> +
> +  module compile_from_spirv(const std::vector ,
> +const device ,
> +std::string _log);
> }
>  }
>  
> diff --git a/src/gallium/state_trackers/clover/meson.build 
> b/src/gallium/state_trackers/clover/meson.build
> index c52f0faa40..bffd0df11d 100644
> --- a/src/gallium/state_trackers/clover/meson.build
> +++ b/src/gallium/state_trackers/clover/meson.build
> @@ -48,7 +48,7 @@ libclllvm = static_library(
>dep_llvm.get_configtool_variable('version'), 'include',
>  )),
>],
> -  dependencies : [dep_llvm, dep_elf],
> +  dependencies : [dep_llvm, dep_elf, dep_llvm_spirv],
>  )
>  
>  clover_files = files(
> -- 
> 2.16.2


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 06/21] clover/api: Rework the validation of devices for building

2018-02-22 Thread Francisco Jerez
Pierre Moreau  writes:

> Signed-off-by: Pierre Moreau 
> ---
>  src/gallium/state_trackers/clover/api/program.cpp  | 39 
> +-
>  src/gallium/state_trackers/clover/core/program.cpp |  3 +-
>  2 files changed, 25 insertions(+), 17 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
> b/src/gallium/state_trackers/clover/api/program.cpp
> index 9d59668f8f..babe45ccde 100644
> --- a/src/gallium/state_trackers/clover/api/program.cpp
> +++ b/src/gallium/state_trackers/clover/api/program.cpp
> @@ -29,9 +29,10 @@
>  using namespace clover;
>  
>  namespace {
> -   void
> +   ref_vector
> validate_build_common(const program , cl_uint num_devs,
>   const cl_device_id *d_devs,
> + ref_vector _devs,
>   void (*pfn_notify)(cl_program, void *),
>   void *user_data) {
>if (!pfn_notify && user_data)
> @@ -40,10 +41,16 @@ namespace {
>if (prog.kernel_ref_count())
>   throw error(CL_INVALID_OPERATION);
>  
> +  if ((!d_devs && num_devs > 0u) || (d_devs && num_devs == 0u))
> + throw error(CL_INVALID_VALUE);
> +

This check shouldn't be necessary, it was provided by the call to the
objs() CL argument processing helper you removed below.

> +  auto devs = (d_devs ? objs(d_devs, num_devs) : valid_devs);
>if (any_of([&](const device ) {
> -   return !count(dev, prog.context().devices());
> -}, objs(d_devs, num_devs)))
> +   return !count(dev, valid_devs);

This should probably be '!count(dev, prog.devices())'.

> +}, devs))
>   throw error(CL_INVALID_DEVICE);
> +
> +  return devs;

The benefit from calculating the device list in validate_build_common()
seems a bit dubious to me, but if you want to share the one ternary
operator I'd split the current validate_build_common() into two
functions: 'void validate_build_common(prog, pfn_notify, user_data)'
that only validates the program object and pfn_notify closure and
'ref_vector validate_build_devices(prog, num_devs, d_devs)' that
does the device checks and returns the correct device set (Note that
there is no need for the caller to provide the set of valid devices as
argument as you're doing here, it should always be equal to
prog.devices()).  Then I'd replace the all_devs argument of
validate_link_devices() with a num_devs/d_devs pair and call
validate_build_devices() from there.

> }
>  }
>  
> @@ -176,13 +183,12 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs,
> void (*pfn_notify)(cl_program, void *),
> void *user_data) try {
> auto  = obj(d_prog);
> -   auto devs = (d_devs ? objs(d_devs, num_devs) :
> -ref_vector(prog.context().devices()));
> +   auto valid_devs = ref_vector(prog.devices());
> +   auto devs = validate_build_common(prog, num_devs, d_devs, valid_devs,
> + pfn_notify, user_data);
> const auto opts = std::string(p_opts ? p_opts : "") + " " +
>   debug_get_option("CLOVER_EXTRA_BUILD_OPTIONS", "");
>  
> -   validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data);
> -
> if (prog.has_source) {
>prog.compile(devs, opts);
>prog.link(devs, opts, { prog });
> @@ -202,14 +208,13 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs,
>   void (*pfn_notify)(cl_program, void *),
>   void *user_data) try {
> auto  = obj(d_prog);
> -   auto devs = (d_devs ? objs(d_devs, num_devs) :
> -ref_vector(prog.context().devices()));
> +   auto valid_devs = ref_vector(prog.devices());
> +   auto devs = validate_build_common(prog, num_devs, d_devs, valid_devs,
> + pfn_notify, user_data);
> const auto opts = std::string(p_opts ? p_opts : "") + " " +
>   debug_get_option("CLOVER_EXTRA_COMPILE_OPTIONS", "");
> header_map headers;
>  
> -   validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data);
> -
> if (bool(num_headers) != bool(header_names))
>throw error(CL_INVALID_VALUE);
>  
> @@ -275,16 +280,18 @@ clLinkProgram(cl_context d_ctx, cl_uint num_devs, const 
> cl_device_id *d_devs,
>const char *p_opts, cl_uint num_progs, const cl_program 
> *d_progs,
>void (*pfn_notify) (cl_program, void *), void *user_data,
>cl_int *r_errcode) try {
> +   if (num_progs == 0u || (num_progs != 0u && !d_progs))
> +  throw error(CL_INVALID_VALUE);
> +

This check is already taken care of by the common CL argument
validation, please drop it.

> auto  = obj(d_ctx);
> const auto opts = std::string(p_opts ? p_opts : "") + " " +
>   debug_get_option("CLOVER_EXTRA_LINK_OPTIONS", "");
> auto progs = objs(d_progs, num_progs);
> auto prog = 

Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2018-02-22 Thread Bas Nieuwenhuizen
On Thu, Feb 22, 2018 at 7:04 PM, Kristian Høgsberg  wrote:
> On Wed, Feb 21, 2018 at 4:00 PM Alex Deucher  wrote:
>
>> On Wed, Feb 21, 2018 at 1:14 AM, Chad Versace 
> wrote:
>> > On Thu 21 Dec 2017, Daniel Vetter wrote:
>> >> On Thu, Dec 21, 2017 at 12:22 AM, Kristian Kristensen <
> hoegsb...@google.com> wrote:
>> >>> On Wed, Dec 20, 2017 at 12:41 PM, Miguel Angel Vico <
> mvicom...@nvidia.com> wrote:
>>  On Wed, 20 Dec 2017 11:54:10 -0800 Kristian Høgsberg <
> hoegsb...@gmail.com> wrote:
>> > I'd like to see concrete examples of actual display controllers
>> > supporting more format layouts than what can be specified with a 64
>> > bit modifier.
>> 
>>  The main problem is our tiling and other metadata parameters can't
>>  generally fit in a modifier, so we find passing a blob of metadata a
>>  more suitable mechanism.
>> >>>
>> >>> I understand that you may have n knobs with a total of more than a
> total of
>> >>> 56 bits that configure your tiling/swizzling for color buffers. What
> I don't
>> >>> buy is that you need all those combinations when passing buffers
> around
>> >>> between codecs, cameras and display controllers. Even if you're
> sharing
>> >>> between the same 3D drivers in different processes, I expect just
> locking
>> >>> down, say, 64 different combinations (you can add more over time) and
>> >>> assigning each a modifier would be sufficient. I doubt you'd extract
>> >>> meaningful performance gains from going all the way to a blob.
>> >
>> > I agree with Kristian above. In my opinion, choosing to encode in
>> > modifiers a precise description of every possible tiling/compression
>> > layout is not technically incorrect, but I believe it misses the point.
>> > The intention behind modifiers is not to exhaustively describe all
>> > possibilites.
>> >
>> > I summarized this opinion in VK_EXT_image_drm_format_modifier,
>> > where I wrote an "introdution to modifiers" section. Here's an excerpt:
>> >
>> > One goal of modifiers in the Linux ecosystem is to enumerate for
> each
>> > vendor a reasonably sized set of tiling formats that are
> appropriate for
>> > images shared across processes, APIs, and/or devices, where each
>> > participating component may possibly be from different vendors.
>> > A non-goal is to enumerate all tiling formats supported by all
> vendors.
>> > Some tiling formats used internally by vendors are inappropriate for
>> > sharing; no modifiers should be assigned to such tiling formats.
>
>> Where it gets tricky is how to select that subset?  Our tiling mode
>> are defined more by the asic specific constraints than the tiling mode
>> itself.  At a high level we have basically 3 tiling modes (out of 16
>> possible) that would be the minimum we'd want to expose for gfx6-8.
>> gfx9 uses a completely new scheme.
>> 1. Linear (per asic stride requirements, not usable by many hw blocks)
>> 2. 1D Thin (5 layouts, displayable, depth, thin, rotated, thick)
>> 3. 2D Thin (1D tiling constraints, plus pipe config (18 possible),
>> tile split (7 possible), sample split (4 possible), num banks (4
>> possible), bank width (4 possible), bank height (4 possible), macro
>> tile aspect (4 possible) all of which are asic config specific)
>
>> I guess we could do something like:
>> AMD_GFX6_LINEAR_ALIGNED_64B
>> AMD_GFX6_LINEAR_ALIGNED_256B
>> AMD_GFX6_LINEAR_ALIGNED_512B
>> AMD_GFX6_1D_THIN_DISPLAY
>> AMD_GFX6_1D_THIN_DEPTH
>> AMD_GFX6_1D_THIN_ROTATED
>> AMD_GFX6_1D_THIN_THIN
>> AMD_GFX6_1D_THIN_THICK
>
> AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>
> AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
>> etc.
>

[Mesa-dev] [PATCH v2 5/6] tegra: Initial support

2018-02-22 Thread Thierry Reding
Tegra K1 and later use a GPU that can be driven by the Nouveau driver.
But the GPU is a pure render node and has no display engine, hence the
scanout needs to happen on the Tegra display hardware. The GPU and the
display engine each have a separate DRM device node exposed by the
kernel.

To make the setup appear as a single device, this driver instantiates
a Nouveau screen with each instance of a Tegra screen and forwards GPU
requests to the Nouveau screen. For purposes of scanout it will import
buffers created on the GPU into the display driver. Handles that
userspace requests are those of the display driver so that they can be
used to create framebuffers.

This has been tested with some GBM test programs, as well as kmscube and
weston. All of those run without modifications, but I'm sure there is a
lot that can be improved.

Some fixes contributed by Hector Martin .

Changes in v2:
- duplicate file descriptor in winsys to avoid potential issues
- require nouveau when building the tegra driver
- check for nouveau driver name on render node
- remove unneeded dependency on libdrm_tegra
- remove zombie references to libudev
- add missing headers to C_SOURCES variable
- drop unneeded tegra/ prefix for includes
- open device files with O_CLOEXEC
- update copyrights

Reviewed-by: Emil Velikov 
Acked-by: Emil Velikov 
Tested-by: Andre Heider 
Signed-off-by: Thierry Reding 
---
 configure.ac   |   12 +-
 include/drm-uapi/tegra_drm.h   |  225 
 meson.build|7 +-
 src/gallium/Makefile.am|5 +
 .../auxiliary/pipe-loader/pipe_loader_drm.c|7 +-
 src/gallium/auxiliary/target-helpers/drm_helper.h  |   23 +
 .../auxiliary/target-helpers/drm_helper_public.h   |3 +
 src/gallium/drivers/tegra/Automake.inc |   11 +
 src/gallium/drivers/tegra/Makefile.am  |   11 +
 src/gallium/drivers/tegra/Makefile.sources |6 +
 src/gallium/drivers/tegra/meson.build  |   41 +
 src/gallium/drivers/tegra/tegra_context.c  | 1294 
 src/gallium/drivers/tegra/tegra_context.h  |   81 ++
 src/gallium/drivers/tegra/tegra_resource.h |   76 ++
 src/gallium/drivers/tegra/tegra_screen.c   |  692 +++
 src/gallium/drivers/tegra/tegra_screen.h   |   45 +
 src/gallium/meson.build|6 +
 src/gallium/targets/dri/Makefile.am|2 +
 src/gallium/targets/dri/meson.build|4 +-
 src/gallium/targets/dri/target.c   |4 +
 src/gallium/targets/vdpau/Makefile.am  |2 +
 src/gallium/winsys/tegra/drm/Makefile.am   |   10 +
 src/gallium/winsys/tegra/drm/Makefile.sources  |2 +
 src/gallium/winsys/tegra/drm/meson.build   |   33 +
 src/gallium/winsys/tegra/drm/tegra_drm_public.h|   31 +
 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c|   49 +
 26 files changed, 2678 insertions(+), 4 deletions(-)
 create mode 100644 include/drm-uapi/tegra_drm.h
 create mode 100644 src/gallium/drivers/tegra/Automake.inc
 create mode 100644 src/gallium/drivers/tegra/Makefile.am
 create mode 100644 src/gallium/drivers/tegra/Makefile.sources
 create mode 100644 src/gallium/drivers/tegra/meson.build
 create mode 100644 src/gallium/drivers/tegra/tegra_context.c
 create mode 100644 src/gallium/drivers/tegra/tegra_context.h
 create mode 100644 src/gallium/drivers/tegra/tegra_resource.h
 create mode 100644 src/gallium/drivers/tegra/tegra_screen.c
 create mode 100644 src/gallium/drivers/tegra/tegra_screen.h
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.am
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.sources
 create mode 100644 src/gallium/winsys/tegra/drm/meson.build
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_public.h
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c

diff --git a/configure.ac b/configure.ac
index 8a9172690a87..60a671046016 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1350,7 +1350,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
 [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
 [comma delimited Gallium drivers list, e.g.
-
"i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,vc5,virgl,etnaviv,imx"
+
"i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,tegra,vc4,vc5,virgl,etnaviv,imx"
 @<:@default=r300,r600,svga,swrast@:>@])],
 [with_gallium_drivers="$withval"],
 [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -2595,6 +2595,10 @@ if test -n "$with_gallium_drivers"; then
ximx)
 HAVE_GALLIUM_IMX=yes
 ;;
+xtegra)
+HAVE_GALLIUM_TEGRA=yes
+

[Mesa-dev] [PATCH v2 6/6] autotools: Add tegra to AM_DISTCHECK_CONFIGURE_FLAGS

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

This allows the driver to be built on a make distcheck and makes sure
that it properly builds when a distribution tarball is made.

Suggested-by: Emil Velikov 
Signed-off-by: Thierry Reding 
---
 Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index 5c3a6717d34e..de6921bf1fcb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -45,7 +45,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
--enable-libunwind \
--with-platforms=x11,wayland,drm,surfaceless \
--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
-   
--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx
 \
+   
--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,tegra,virgl,swr,etnaviv,imx
 \
--with-vulkan-drivers=intel,radeon
 
 ACLOCAL_AMFLAGS = -I m4
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/6] nouveau/nvc0: Extract common tile mode macro

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

Add a new macro that can be used to extract the tiling mode from a
tile_mode value. This is will be used to determine the number of GOBs
used in block linear mode.

Acked-by: Emil Velikov 
Tested-by: Andre Heider 
Signed-off-by: Thierry Reding 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.h | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
index 0d5f026d6e1c..c68a50948360 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
@@ -6,14 +6,17 @@
 
 #define NVC0_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0)
 
+#define NVC0_TILE_MODE_X(m) (((m) >> 0) & 0xf)
+#define NVC0_TILE_MODE_Y(m) (((m) >> 4) & 0xf)
+#define NVC0_TILE_MODE_Z(m) (((m) >> 8) & 0xf)
 
-#define NVC0_TILE_SHIFT_X(m) m) >> 0) & 0xf) + 6)
-#define NVC0_TILE_SHIFT_Y(m) m) >> 4) & 0xf) + 3)
-#define NVC0_TILE_SHIFT_Z(m) m) >> 8) & 0xf) + 0)
+#define NVC0_TILE_SHIFT_X(m) (NVC0_TILE_MODE_X(m) + 6)
+#define NVC0_TILE_SHIFT_Y(m) (NVC0_TILE_MODE_Y(m) + 3)
+#define NVC0_TILE_SHIFT_Z(m) (NVC0_TILE_MODE_Z(m) + 0)
 
-#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf))
-#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf))
-#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf))
+#define NVC0_TILE_SIZE_X(m) (64 << NVC0_TILE_MODE_X(m))
+#define NVC0_TILE_SIZE_Y(m) ( 8 << NVC0_TILE_MODE_Y(m))
+#define NVC0_TILE_SIZE_Z(m) ( 1 << NVC0_TILE_MODE_Z(m))
 
 /* it's ok to mask only in the end because max value is 3 * 5 */
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/6] nouveau: Add framebuffer modifier support

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

This adds support for framebuffer modifiers to Nouveau. This will be
used by the Tegra driver to share metadata about the format of buffers
(such as the tiling mode or compression).

Changes in v2:
- remove unused parameters to nouveau_buffer_create()
- move format modifier query code to nvc0 backend
- restrict format modifiers to 2D textures
- implement ->query_dmabuf_modifiers()

Acked-by: Emil Velikov 
Tested-by: Andre Heider 
Signed-off-by: Thierry Reding 
---
 src/gallium/drivers/nouveau/Android.mk   |  3 +
 src/gallium/drivers/nouveau/Makefile.am  |  1 +
 src/gallium/drivers/nouveau/nouveau_screen.c |  4 ++
 src/gallium/drivers/nouveau/nv30/nv30_resource.c |  2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c  | 81 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.c | 59 -
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.h |  3 +-
 7 files changed, 149 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/Android.mk 
b/src/gallium/drivers/nouveau/Android.mk
index 2de22e73ec18..a446774a86e8 100644
--- a/src/gallium/drivers/nouveau/Android.mk
+++ b/src/gallium/drivers/nouveau/Android.mk
@@ -36,6 +36,9 @@ LOCAL_SRC_FILES := \
$(NVC0_CODEGEN_SOURCES) \
$(NVC0_C_SOURCES)
 
+LOCAL_C_INCLUDES := \
+   $(MESA_TOP)/include/drm-uapi
+
 LOCAL_SHARED_LIBRARIES := libdrm_nouveau
 LOCAL_MODULE := libmesa_pipe_nouveau
 
diff --git a/src/gallium/drivers/nouveau/Makefile.am 
b/src/gallium/drivers/nouveau/Makefile.am
index 91547178e397..f6126b544811 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -24,6 +24,7 @@ include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CPPFLAGS = \
+   -I$(top_srcdir)/include/drm-uapi \
$(GALLIUM_DRIVER_CFLAGS) \
$(LIBDRM_CFLAGS) \
$(NOUVEAU_CFLAGS)
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index c144b39b2dd2..b84ef13ebe7f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -1,3 +1,5 @@
+#include 
+
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
@@ -23,6 +25,8 @@
 #include "nouveau_mm.h"
 #include "nouveau_buffer.h"
 
+#include "nvc0/nvc0_resource.h"
+
 /* XXX this should go away */
 #include "state_tracker/drm_driver.h"
 
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.c 
b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
index ff34f6e5f9fa..386bd3459bd3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include 
+
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 27674f72a7c0..7983c4030876 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -20,8 +20,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include 
+
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
+#include "state_tracker/drm_driver.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 
@@ -233,9 +236,79 @@ nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt)
}
 }
 
+static uint64_t nvc0_miptree_get_modifier(struct nv50_miptree *mt)
+{
+   union nouveau_bo_config *config = >base.bo->config;
+   uint64_t modifier;
+
+   if (mt->layout_3d)
+  return DRM_FORMAT_MOD_INVALID;
+
+   switch (config->nvc0.memtype) {
+   case 0x00:
+  modifier = DRM_FORMAT_MOD_LINEAR;
+  break;
+
+   case 0xfe:
+  switch (NVC0_TILE_MODE_Y(config->nvc0.tile_mode)) {
+  case 0:
+ modifier = DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB;
+ break;
+
+  case 1:
+ modifier = DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB;
+ break;
+
+  case 2:
+ modifier = DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB;
+ break;
+
+  case 3:
+ modifier = DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB;
+ break;
+
+  case 4:
+ modifier = DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB;
+ break;
+
+  case 5:
+ modifier = DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB;
+ break;
+
+  default:
+ modifier = DRM_FORMAT_MOD_INVALID;
+ break;
+  }
+  break;
+
+   default:
+  modifier = DRM_FORMAT_MOD_INVALID;
+  break;
+   }
+
+   return modifier;
+}
+
+static boolean
+nvc0_miptree_get_handle(struct pipe_screen *pscreen,
+struct pipe_resource *pt,
+struct winsys_handle *whandle)
+{
+   struct nv50_miptree *mt = nv50_miptree(pt);
+   boolean ret;
+
+   ret = nv50_miptree_get_handle(pscreen, pt, 

[Mesa-dev] [PATCH v2 1/6] drm/fourcc: Fix fourcc_mod_code() definition

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

Avoid a compiler warnings when the val parameter is an expression.

This is based on commit 5843f4e02fbe86a59981e35adc6cabebee46fdc0 from
Linux v4.16-rc1.

Acked-by: Emil Velikov 
Tested-by: Andre Heider 
Signed-off-by: Thierry Reding 
---
 include/drm-uapi/drm_fourcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index 3ad838d3f93f..a76ed8f9e383 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -188,7 +188,7 @@ extern "C" {
 #define DRM_FORMAT_RESERVED  ((1ULL << 56) - 1)
 
 #define fourcc_mod_code(vendor, val) \
-   __u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 
0x00ffULL))
+   __u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 
0x00ffULL))
 
 /*
  * Format Modifier tokens:
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/6] drm/tegra: Sanitize format modifiers

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

The existing format modifier definitions were merged prematurely, and
recent work has unveiled that the definitions are suboptimal in several
ways:

  - The format specifiers, except for one, are not Tegra specific, but
the names don't reflect that.
  - The number space is split into two, reserving 32 bits for some
"parameter" which most of the modifiers are not going to have.
  - Symbolic names for the modifiers are not using the standard
DRM_FORMAT_MOD_* prefix, which makes them awkward to use.
  - The vendor prefix NV is somewhat ambiguous.

Fortunately, nobody's started using these modifiers, so we can still fix
the above issues. Do so by using the standard prefix. Also, remove TEGRA
from the name of those modifiers that exist on NVIDIA GPUs as well. In
case of the block linear modifiers, make the "parameter" smaller (4
bits, though only 6 values are valid) and don't let that leak into any
of the other modifiers.

Finally, also use the more canonical NVIDIA instead of the ambiguous NV
prefix.

This is based on commit 268892cb63a822315921a8dab48ac3e4abf7dd03 from
Linux v4.16-rc1.

Acked-by: Emil Velikov 
Tested-by: Andre Heider 
Signed-off-by: Thierry Reding 
---
 include/drm-uapi/drm_fourcc.h | 36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index a76ed8f9e383..e04613d30a13 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -178,7 +178,7 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_NONE0
 #define DRM_FORMAT_MOD_VENDOR_INTEL   0x01
 #define DRM_FORMAT_MOD_VENDOR_AMD 0x02
-#define DRM_FORMAT_MOD_VENDOR_NV  0x03
+#define DRM_FORMAT_MOD_VENDOR_NVIDIA  0x03
 #define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04
 #define DRM_FORMAT_MOD_VENDOR_QCOM0x05
 #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
@@ -338,29 +338,17 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
 
-/* NVIDIA Tegra frame buffer modifiers */
-
-/*
- * Some modifiers take parameters, for example the number of vertical GOBs in
- * a block. Reserve the lower 32 bits for parameters
- */
-#define __fourcc_mod_tegra_mode_shift 32
-#define fourcc_mod_tegra_code(val, params) \
-   fourcc_mod_code(NV, __u64)val) << __fourcc_mod_tegra_mode_shift) | 
params))
-#define fourcc_mod_tegra_mod(m) \
-   (m & ~((1ULL << __fourcc_mod_tegra_mode_shift) - 1))
-#define fourcc_mod_tegra_param(m) \
-   (m & ((1ULL << __fourcc_mod_tegra_mode_shift) - 1))
+/* NVIDIA frame buffer modifiers */
 
 /*
  * Tegra Tiled Layout, used by Tegra 2, 3 and 4.
  *
  * Pixels are arranged in simple tiles of 16 x 16 bytes.
  */
-#define NV_FORMAT_MOD_TEGRA_TILED fourcc_mod_tegra_code(1, 0)
+#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1)
 
 /*
- * Tegra 16Bx2 Block Linear layout, used by TK1/TX1
+ * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later
  *
  * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked
  * vertically by a power of 2 (1 to 32 GOBs) to form a block.
@@ -380,7 +368,21 @@ extern "C" {
  * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
  * in full detail.
  */
-#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \
+   fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf))
+
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \
+   fourcc_mod_code(NVIDIA, 0x10)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \
+   fourcc_mod_code(NVIDIA, 0x11)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \
+   fourcc_mod_code(NVIDIA, 0x12)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \
+   fourcc_mod_code(NVIDIA, 0x13)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \
+   fourcc_mod_code(NVIDIA, 0x14)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
+   fourcc_mod_code(NVIDIA, 0x15)
 
 /*
  * Broadcom VC4 "T" format
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/6] NVIDIA Tegra support

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

This series of patches implements initial support for Tegra. The first
two patches import DRM UAPI from v4.16-rc1 that provide framebuffer
modifiers that can be used to specify buffers shared between Nouveau
and the Tegra DRM driver.

Patches 3 and 4 add support for framebuffer modifiers to Nouveau and
patch 5 build on top of those to provide initial Tegra support in Mesa.
The current patches allow running common use-cases such as Wayland,
kmscube, etc.

Patch 6 adds the Tegra driver to the list of gallium drivers built
during a `make distcheck'.

Some people have been using earlier versions of these patches to run a
completely open-source graphics stack on various Tegra210 devices. I've
Cc'ed some of them so that they can provide feedback.

This series is also available in a git repository here:

https://cgit.freedesktop.org/~tagr/mesa #tegra-v2

though that also contains the Nouveau syncfd patches that are still work
in progress and which require new kernel/userspace ABI. The patches here
work on top of a vanilla recent (v4.16-rc1) Linux kernel.

Thierry

Thierry Reding (6):
  drm/fourcc: Fix fourcc_mod_code() definition
  drm/tegra: Sanitize format modifiers
  nouveau/nvc0: Extract common tile mode macro
  nouveau: Add framebuffer modifier support
  tegra: Initial support
  autotools: Add tegra to AM_DISTCHECK_CONFIGURE_FLAGS

 Makefile.am|2 +-
 configure.ac   |   12 +-
 include/drm-uapi/drm_fourcc.h  |   38 +-
 include/drm-uapi/tegra_drm.h   |  225 
 meson.build|7 +-
 src/gallium/Makefile.am|5 +
 .../auxiliary/pipe-loader/pipe_loader_drm.c|7 +-
 src/gallium/auxiliary/target-helpers/drm_helper.h  |   23 +
 .../auxiliary/target-helpers/drm_helper_public.h   |3 +
 src/gallium/drivers/nouveau/Android.mk |3 +
 src/gallium/drivers/nouveau/Makefile.am|1 +
 src/gallium/drivers/nouveau/nouveau_screen.c   |4 +
 src/gallium/drivers/nouveau/nv30/nv30_resource.c   |2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c|   81 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.c   |   59 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.h   |   18 +-
 src/gallium/drivers/tegra/Automake.inc |   11 +
 src/gallium/drivers/tegra/Makefile.am  |   11 +
 src/gallium/drivers/tegra/Makefile.sources |6 +
 src/gallium/drivers/tegra/meson.build  |   41 +
 src/gallium/drivers/tegra/tegra_context.c  | 1294 
 src/gallium/drivers/tegra/tegra_context.h  |   81 ++
 src/gallium/drivers/tegra/tegra_resource.h |   76 ++
 src/gallium/drivers/tegra/tegra_screen.c   |  692 +++
 src/gallium/drivers/tegra/tegra_screen.h   |   45 +
 src/gallium/meson.build|6 +
 src/gallium/targets/dri/Makefile.am|2 +
 src/gallium/targets/dri/meson.build|4 +-
 src/gallium/targets/dri/target.c   |4 +
 src/gallium/targets/vdpau/Makefile.am  |2 +
 src/gallium/winsys/tegra/drm/Makefile.am   |   10 +
 src/gallium/winsys/tegra/drm/Makefile.sources  |2 +
 src/gallium/winsys/tegra/drm/meson.build   |   33 +
 src/gallium/winsys/tegra/drm/tegra_drm_public.h|   31 +
 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c|   49 +
 35 files changed, 2857 insertions(+), 33 deletions(-)
 create mode 100644 include/drm-uapi/tegra_drm.h
 create mode 100644 src/gallium/drivers/tegra/Automake.inc
 create mode 100644 src/gallium/drivers/tegra/Makefile.am
 create mode 100644 src/gallium/drivers/tegra/Makefile.sources
 create mode 100644 src/gallium/drivers/tegra/meson.build
 create mode 100644 src/gallium/drivers/tegra/tegra_context.c
 create mode 100644 src/gallium/drivers/tegra/tegra_context.h
 create mode 100644 src/gallium/drivers/tegra/tegra_resource.h
 create mode 100644 src/gallium/drivers/tegra/tegra_screen.c
 create mode 100644 src/gallium/drivers/tegra/tegra_screen.h
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.am
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.sources
 create mode 100644 src/gallium/winsys/tegra/drm/meson.build
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_public.h
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c

-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105211] build failure after zwp_dmabuf commit if wayland-protocols is not installed

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105211

--- Comment #3 from Emil Velikov  ---
Daniel, let's opt for your earlier suggestion and stop shipping those generated
sources. It's causing more problems than what it's worth.

There was some fun when building/using the correct wayland-scanner in
cross-compilation, but that should be ironed out by now.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105211] build failure after zwp_dmabuf commit if wayland-protocols is not installed

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105211

--- Comment #2 from Alan Swanson  ---
No worries. But just to note that wayland-protocols does have a dependency on
wayland-scanner from the main wayland build - or at least an autotools
configure failure if not present (though scan.sh test script would later skip
if wayland-scanner was not present).

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] fixup! tegra: Initial support

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

---
 configure.ac|  5 +++--
 src/gallium/drivers/tegra/Makefile.am   |  9 +---
 src/gallium/drivers/tegra/Makefile.sources  |  5 -
 src/gallium/drivers/tegra/tegra_context.c   |  8 +++
 src/gallium/drivers/tegra/tegra_context.h   |  2 +-
 src/gallium/drivers/tegra/tegra_resource.h  |  2 +-
 src/gallium/drivers/tegra/tegra_screen.c| 30 -
 src/gallium/drivers/tegra/tegra_screen.h|  2 +-
 src/gallium/winsys/tegra/drm/Makefile.am|  3 +--
 src/gallium/winsys/tegra/drm/tegra_drm_public.h |  2 +-
 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c | 20 +++--
 11 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7f83fb9ee17e..60a671046016 100644
--- a/configure.ac
+++ b/configure.ac
@@ -80,7 +80,6 @@ LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
 LIBDRM_FREEDRENO_REQUIRED=2.4.89
 LIBDRM_ETNAVIV_REQUIRED=2.4.82
-LIBDRM_TEGRA_REQUIRED=2.4.58
 
 dnl Versions for external dependencies
 DRI2PROTO_REQUIRED=2.8
@@ -2598,7 +2597,6 @@ if test -n "$with_gallium_drivers"; then
 ;;
 xtegra)
 HAVE_GALLIUM_TEGRA=yes
-PKG_CHECK_MODULES([TEGRA], [libdrm_tegra >= 
$LIBDRM_TEGRA_REQUIRED])
 require_libdrm "tegra"
 ;;
 xswrast)
@@ -2725,6 +2723,9 @@ if test "x$HAVE_GALLIUM_VC4" != xyes -a 
"x$HAVE_GALLIUM_PL111" = xyes  ; then
 AC_MSG_ERROR([Building with pl111 requires vc4])
 fi
 
+if test "x$HAVE_GALLIUM_NOUVEAU" != xyes -a "x$HAVE_GALLIUM_TEGRA" = xyes; then
+AC_MSG_ERROR([Building with tegra requires nouveau])
+fi
 
 detect_old_buggy_llvm() {
 dnl llvm-config may not give the right answer when llvm is a built as a
diff --git a/src/gallium/drivers/tegra/Makefile.am 
b/src/gallium/drivers/tegra/Makefile.am
index 1375ee97814f..7e87ea048733 100644
--- a/src/gallium/drivers/tegra/Makefile.am
+++ b/src/gallium/drivers/tegra/Makefile.am
@@ -1,18 +1,11 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CFLAGS = \
-I$(top_srcdir)/include/drm-uapi \
-   $(GALLIUM_DRIVER_CFLAGS) \
-   $(LIBUDEV_CFLAGS) \
-   $(TEGRA_CFLAGS)
+   $(GALLIUM_DRIVER_CFLAGS)
 
 noinst_LTLIBRARIES = libtegra.la
 
 libtegra_la_SOURCES = \
$(C_SOURCES)
-
-libtegra_la_LIBADD = \
-   $(LIBUDEV_LIBS)
diff --git a/src/gallium/drivers/tegra/Makefile.sources 
b/src/gallium/drivers/tegra/Makefile.sources
index 655c60ab6853..af4ff838c7ca 100644
--- a/src/gallium/drivers/tegra/Makefile.sources
+++ b/src/gallium/drivers/tegra/Makefile.sources
@@ -1,3 +1,6 @@
 C_SOURCES := \
tegra_context.c \
-   tegra_screen.c
+   tegra_context.h \
+   tegra_resource.h \
+   tegra_screen.c \
+   tegra_screen.h
diff --git a/src/gallium/drivers/tegra/tegra_context.c 
b/src/gallium/drivers/tegra/tegra_context.c
index feaa5138c95d..38e6e59b31ff 100644
--- a/src/gallium/drivers/tegra/tegra_context.c
+++ b/src/gallium/drivers/tegra/tegra_context.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2014-2016 NVIDIA Corporation
+ * Copyright © 2014-2018 NVIDIA Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -27,9 +27,9 @@
 #include "util/u_debug.h"
 #include "util/u_inlines.h"
 
-#include "tegra/tegra_context.h"
-#include "tegra/tegra_resource.h"
-#include "tegra/tegra_screen.h"
+#include "tegra_context.h"
+#include "tegra_resource.h"
+#include "tegra_screen.h"
 
 static void
 tegra_destroy(struct pipe_context *pcontext)
diff --git a/src/gallium/drivers/tegra/tegra_context.h 
b/src/gallium/drivers/tegra/tegra_context.h
index 669ae1c0c4ab..4869b0913a6f 100644
--- a/src/gallium/drivers/tegra/tegra_context.h
+++ b/src/gallium/drivers/tegra/tegra_context.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2014-2016 NVIDIA Corporation
+ * Copyright © 2014-2018 NVIDIA Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/src/gallium/drivers/tegra/tegra_resource.h 
b/src/gallium/drivers/tegra/tegra_resource.h
index 43265211be1e..67507d64590d 100644
--- a/src/gallium/drivers/tegra/tegra_resource.h
+++ b/src/gallium/drivers/tegra/tegra_resource.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2014-2016 NVIDIA Corporation
+ * Copyright © 2014-2018 NVIDIA Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/src/gallium/drivers/tegra/tegra_screen.c 
b/src/gallium/drivers/tegra/tegra_screen.c
index 2bb6014238af..5ff79a5a7d66 100644
--- a/src/gallium/drivers/tegra/tegra_screen.c
+++ 

[Mesa-dev] [PATCH] fixup! nouveau: Add framebuffer modifier support

2018-02-22 Thread Thierry Reding
From: Thierry Reding 

---
 src/gallium/drivers/nouveau/nouveau_buffer.c |  3 +-
 src/gallium/drivers/nouveau/nouveau_buffer.h |  3 +-
 src/gallium/drivers/nouveau/nouveau_screen.c | 10 ---
 src/gallium/drivers/nouveau/nv30/nv30_resource.c |  4 +-
 src/gallium/drivers/nouveau/nv50/nv50_resource.c |  5 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c  | 73 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.c | 85 +---
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.h |  2 -
 8 files changed, 109 insertions(+), 76 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 73afff961115..2c604419ce05 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -636,8 +636,7 @@ const struct u_resource_vtbl nouveau_buffer_vtbl =
 
 struct pipe_resource *
 nouveau_buffer_create(struct pipe_screen *pscreen,
-  const struct pipe_resource *templ,
-  const uint64_t *modifiers, unsigned int count)
+  const struct pipe_resource *templ)
 {
struct nouveau_screen *screen = nouveau_screen(pscreen);
struct nv04_resource *buffer;
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h 
b/src/gallium/drivers/nouveau/nouveau_buffer.h
index 466f8cc2b466..3a33fae9ce2f 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -89,8 +89,7 @@ nouveau_resource_mapped_by_gpu(struct pipe_resource *resource)
 
 struct pipe_resource *
 nouveau_buffer_create(struct pipe_screen *pscreen,
-  const struct pipe_resource *templ,
-  const uint64_t *modifiers, unsigned int count);
+  const struct pipe_resource *templ);
 
 struct pipe_resource *
 nouveau_user_buffer_create(struct pipe_screen *screen, void *ptr,
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index d651cc7f4b8c..b84ef13ebe7f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -128,15 +128,6 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
return bo;
 }
 
-static uint64_t nouveau_bo_get_modifier(struct nouveau_bo *bo)
-{
-   struct nouveau_device *dev = bo->device;
-
-   if (dev->chipset >= 0xc0)
-  return nvc0_bo_get_modifier(bo);
-
-   return DRM_FORMAT_MOD_INVALID;
-}
 
 bool
 nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
@@ -144,7 +135,6 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
  unsigned stride,
  struct winsys_handle *whandle)
 {
-   whandle->modifier = nouveau_bo_get_modifier(bo);
whandle->stride = stride;
 
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.c 
b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
index 38d2b2e41c30..386bd3459bd3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
@@ -53,11 +53,9 @@ static struct pipe_resource *
 nv30_resource_create(struct pipe_screen *pscreen,
  const struct pipe_resource *tmpl)
 {
-   const uint64_t modifier = DRM_FORMAT_MOD_INVALID;
-
switch (tmpl->target) {
case PIPE_BUFFER:
-  return nouveau_buffer_create(pscreen, tmpl, , 1);
+  return nouveau_buffer_create(pscreen, tmpl);
default:
   return nv30_miptree_create(pscreen, tmpl);
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c 
b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
index 37592ad66349..aed8c6241d4b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -1,4 +1,3 @@
-#include 
 
 #include "pipe/p_context.h"
 #include "util/u_inlines.h"
@@ -12,11 +11,9 @@ static struct pipe_resource *
 nv50_resource_create(struct pipe_screen *screen,
  const struct pipe_resource *templ)
 {
-   const uint64_t modifier = DRM_FORMAT_MOD_INVALID;
-
switch (templ->target) {
case PIPE_BUFFER:
-  return nouveau_buffer_create(screen, templ, , 1);
+  return nouveau_buffer_create(screen, templ);
default:
   return nv50_miptree_create(screen, templ);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 627d6b7346c3..7983c4030876 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -24,6 +24,7 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
+#include "state_tracker/drm_driver.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 
@@ -235,9 +236,79 @@ nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt)
}
 }
 
+static uint64_t 

Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2018-02-22 Thread Kristian Høgsberg
On Wed, Feb 21, 2018 at 4:00 PM Alex Deucher  wrote:

> On Wed, Feb 21, 2018 at 1:14 AM, Chad Versace 
wrote:
> > On Thu 21 Dec 2017, Daniel Vetter wrote:
> >> On Thu, Dec 21, 2017 at 12:22 AM, Kristian Kristensen <
hoegsb...@google.com> wrote:
> >>> On Wed, Dec 20, 2017 at 12:41 PM, Miguel Angel Vico <
mvicom...@nvidia.com> wrote:
>  On Wed, 20 Dec 2017 11:54:10 -0800 Kristian Høgsberg <
hoegsb...@gmail.com> wrote:
> > I'd like to see concrete examples of actual display controllers
> > supporting more format layouts than what can be specified with a 64
> > bit modifier.
> 
>  The main problem is our tiling and other metadata parameters can't
>  generally fit in a modifier, so we find passing a blob of metadata a
>  more suitable mechanism.
> >>>
> >>> I understand that you may have n knobs with a total of more than a
total of
> >>> 56 bits that configure your tiling/swizzling for color buffers. What
I don't
> >>> buy is that you need all those combinations when passing buffers
around
> >>> between codecs, cameras and display controllers. Even if you're
sharing
> >>> between the same 3D drivers in different processes, I expect just
locking
> >>> down, say, 64 different combinations (you can add more over time) and
> >>> assigning each a modifier would be sufficient. I doubt you'd extract
> >>> meaningful performance gains from going all the way to a blob.
> >
> > I agree with Kristian above. In my opinion, choosing to encode in
> > modifiers a precise description of every possible tiling/compression
> > layout is not technically incorrect, but I believe it misses the point.
> > The intention behind modifiers is not to exhaustively describe all
> > possibilites.
> >
> > I summarized this opinion in VK_EXT_image_drm_format_modifier,
> > where I wrote an "introdution to modifiers" section. Here's an excerpt:
> >
> > One goal of modifiers in the Linux ecosystem is to enumerate for
each
> > vendor a reasonably sized set of tiling formats that are
appropriate for
> > images shared across processes, APIs, and/or devices, where each
> > participating component may possibly be from different vendors.
> > A non-goal is to enumerate all tiling formats supported by all
vendors.
> > Some tiling formats used internally by vendors are inappropriate for
> > sharing; no modifiers should be assigned to such tiling formats.

> Where it gets tricky is how to select that subset?  Our tiling mode
> are defined more by the asic specific constraints than the tiling mode
> itself.  At a high level we have basically 3 tiling modes (out of 16
> possible) that would be the minimum we'd want to expose for gfx6-8.
> gfx9 uses a completely new scheme.
> 1. Linear (per asic stride requirements, not usable by many hw blocks)
> 2. 1D Thin (5 layouts, displayable, depth, thin, rotated, thick)
> 3. 2D Thin (1D tiling constraints, plus pipe config (18 possible),
> tile split (7 possible), sample split (4 possible), num banks (4
> possible), bank width (4 possible), bank height (4 possible), macro
> tile aspect (4 possible) all of which are asic config specific)

> I guess we could do something like:
> AMD_GFX6_LINEAR_ALIGNED_64B
> AMD_GFX6_LINEAR_ALIGNED_256B
> AMD_GFX6_LINEAR_ALIGNED_512B
> AMD_GFX6_1D_THIN_DISPLAY
> AMD_GFX6_1D_THIN_DEPTH
> AMD_GFX6_1D_THIN_ROTATED
> AMD_GFX6_1D_THIN_THIN
> AMD_GFX6_1D_THIN_THICK

AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P2_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_DISPLAY_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_DEPTH_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_ROTATED_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_THIN_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1

AMD_GFX6_2D_1D_THIN_THICK_PIPE_CONFIG_P4_8x16_TILE_SPLIT_64B_SAMPLE_SPLIT_1_NUM_BANKS_2_BANK_WIDTH_1_BANK_HEIGHT_1_MACRO_TILE_ASPECT_1
> etc.

> We only probably need 40 bits to encode all of the tiling parameters
> so we could do family, plus tiling encoding that still seems unwieldy
> to deal with from an application perspective.  All of 

Re: [Mesa-dev] [PATCH] appveyor: Build with MSVC 2015.

2018-02-22 Thread Roland Scheidegger
Looks great.
Reviewed-by: Roland Scheidegger 

Am 22.02.2018 um 17:29 schrieb Jose Fonseca:
> The MSVC version we (at VMware) primarily care about from now on is
> 2015.
> 
> We can drop support for building with 2013 in a future commit.  I'm not
> aware of significant changes in C99/C11 support from MSVC 2013 to 2015,
> but there's no point in continuing supporting old MSVC versions when
> nobody cares.
> 
> Testing done: https://ci.appveyor.com/project/jrfonseca/mesa/build/46
> ---
>  appveyor.yml | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/appveyor.yml b/appveyor.yml
> index 96eb1a67b3..bd33e2e955 100644
> --- a/appveyor.yml
> +++ b/appveyor.yml
> @@ -35,13 +35,13 @@ clone_depth: 100
>  
>  cache:
>  - win_flex_bison-2.5.9.zip
> -- llvm-3.3.1-msvc2013-mtd.7z
> +- llvm-3.3.1-msvc2015-mtd.7z
>  
> -os: Visual Studio 2013
> +os: Visual Studio 2015
>  
>  environment:
>WINFLEXBISON_ARCHIVE: win_flex_bison-2.5.9.zip
> -  LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
> +  LLVM_ARCHIVE: llvm-3.3.1-msvc2015-mtd.7z
>  
>  install:
>  # Check pip
> @@ -69,10 +69,10 @@ install:
>  - set LLVM=%CD%\llvm
>  
>  build_script:
> -- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
> +- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.0 llvm=1
>  
>  after_build:
> -- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
> +- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.0 llvm=1 check
>  
>  
>  # It's possible to setup notification here, as described in
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2] glx: Properly handle cases where screen creation fails

2018-02-22 Thread Emil Velikov
On 22 February 2018 at 17:36, Chuck Atkins  wrote:
>> > If there's a better way forward for having a minimal-dependency
>> > software-only implementation, I'd certainly be willing to try it.  At
>> > the
>> > moment though, gallium-xlib-glx is our path for that.
>> >
>> I was merely mentioning that the xlib-glx are in worse shape than the dri
>> one.
>>
>> That aside:
>> IIRC previously you mentioned that libGL/OSMesa must be static
>> libraries. If that is still the case, then DRI based GLX won't cut it.
>> Alternatively if you can point out any specifics that would be
>>
>> amazing.
>
>
> libGL is fine shared since we typically only use it on the client side with
> a single process.  We do still need to build both shared and static
> libOSMesa though, typically on Cray systems.  The use case is for running
> ~50k processes across several thousand machines, all loading the application
> from the same shared file system.  Using shared libraries in that situation
> poses a significant scaling problem with all 50k processes trying to load
> hundreds of small files from the same shared filesystem at the same time,
> and can cause application startup times to climb to 20m on a good scenario,
> over an hour at worst.  In this case, static libraries bypass the problem
> entirely since there's only a single fat executable to load instead of
> countless small SOs.  Note that we have the same problem with Python so we
> end up building a frozen python in this case to address it.  This is also
> the motivation for the patch a month ago that let's libswrARCH.so bypass
> dlopen and just be builtin when only using a single architecture.
>
Thanks for the explanation Chuck.

Since libGL can be shared, may I suggest giving the DRI based GLX a
try and reporting any oddities.
On the OSMesa front, well lets leave it as-is for now.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105211] build failure after zwp_dmabuf commit if wayland-protocols is not installed

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105211

--- Comment #1 from Daniel Stone  ---
Yes, sorry about this, the autotools build is quite funny with
wayland-protocols.

At the moment, you can solve this by simply removing the linux-dmabuf lines
from src/vulkan/Makefile.sources, or you can install wayland-protocols. The
protocols package is purely a bunch of XML files, with no dependencies. You
don't need to actually enable Wayland support to bypass this build failure,
just to have wayland-protocols present somewhere in the pkg-config search path.

Hopefully we can find a real fix quite quickly.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] i965: add support for performance queries on CNL

2018-02-22 Thread Emil Velikov
Hi Lionel,

On 22 February 2018 at 17:24, Lionel Landwerlin
 wrote:

> One of the patch is quite big so, unlikely to make it to the ml. You
> can look at this series on my github :
>

>   i965: add performance query support on CNL
>
Indeed patch 4/4 seems stuck. Commit will work fine for meson,
autotools and android.
With the change below patch 4/4 is
Reviewed-by: Emil Velikov 

-Emil

diff --git a/src/mesa/drivers/dri/i965/Makefile.am
b/src/mesa/drivers/dri/i965/Makef
ile.am
index da56f67fa5f..276a24258ad 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -121,6 +121,7 @@ EXTRA_DIST = \
   brw_oa_glk.xml \
   brw_oa_cflgt2.xml \
   brw_oa_cflgt3.xml \
+   brw_oa_cnl.xml \
   brw_oa.py \
   meson.build
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/dri2: fix segfault when display initialisation fails

2018-02-22 Thread Emil Velikov
On 22 February 2018 at 17:25, Eric Engestrom  wrote:
> On Thursday, 2018-02-22 13:37:54 +, Frank Binns wrote:
>> dri2_display_destroy() is called when platform specific display
>> initialisation fails. However, this would typically lead to a
>> segfault due to the dri2_egl_display vbtl not having been set up.
>>
>> Fixes: 2db95482964 ("loader_dri3/glx/egl: Optionally use a blit
>> context for blitting operations")
>> Signed-off-by: Frank Binns 
>
> Error paths are basically never tested...
> You're entirely right, thanks for catching this!
>
> Reviewed-by: Eric Engestrom 
>
Yay error paths.

Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105211] build failure after zwp_dmabuf commit if wayland-protocols is not installed

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105211

Bug ID: 105211
   Summary: build failure after zwp_dmabuf commit if
wayland-protocols is not installed
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/Common
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: rei...@improbability.net
CC: airl...@freedesktop.org, chadvers...@chromium.org,
dan...@fooishbar.org, ja...@jlekstrand.net

Commit bfa22266cd4d268e52d2535b41ef972421e66f71 "vulkan/wsi/wayland: Add
support for zwp_dmabuf" causes a build failure if wayland protocols are not
installed. 
Similar to bug 101933 where if WL_DMABUF_XML=/unstable/... it should not be
used. Reverting resolves.

make[3]: Entering directory '/builddir/src/vulkan'
/bin/mkdir -p util
/bin/mkdir -p util
/bin/mkdir -p wsi
/usr/bin/python2.7  /builddir/src/vulkan/util/gen_enum_to_str.py \
--xml /builddir/src/vulkan/registry/vk.xml \
--xml /builddir/src/vulkan/registry/vk_android_native_buffer.xml \
--outdir ../../src/vulkan/util
/usr/bin/python2.7  /builddir/src/vulkan/util/gen_enum_to_str.py \
--xml /builddir/src/vulkan/registry/vk.xml \
--xml /builddir/src/vulkan/registry/vk_android_native_buffer.xml \
--outdir ../../src/vulkan/util
: code < /builddir/src/egl/wayland/wayland-drm/wayland-drm.xml >
wsi/wayland-drm-protocol.c
/bin/mkdir -p wsi
make[3]: *** No rule to make target
'/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml', needed by
'wsi/linux-dmabuf-unstable-v1-protocol.c'.  Stop.
make[3]: *** Waiting for unfinished jobs

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2] glx: Properly handle cases where screen creation fails

2018-02-22 Thread Chuck Atkins
>
> > If there's a better way forward for having a minimal-dependency
> > software-only implementation, I'd certainly be willing to try it.  At the
> > moment though, gallium-xlib-glx is our path for that.
> >
> I was merely mentioning that the xlib-glx are in worse shape than the dri
> one.
>
> That aside:
> IIRC previously you mentioned that libGL/OSMesa must be static
> libraries. If that is still the case, then DRI based GLX won't cut it.
> Alternatively if you can point out any specifics that would be

amazing.


libGL is fine shared since we typically only use it on the client side with
a single process.  We do still need to build both shared and static
libOSMesa though, typically on Cray systems.  The use case is for running
~50k processes across several thousand machines, all loading the
application from the same shared file system.  Using shared libraries in
that situation poses a significant scaling problem with all 50k processes
trying to load hundreds of small files from the same shared filesystem at
the same time, and can cause application startup times to climb to 20m on a
good scenario, over an hour at worst.  In this case, static libraries
bypass the problem entirely since there's only a single fat executable to
load instead of countless small SOs.  Note that we have the same problem
with Python so we end up building a frozen python in this case to address
it.  This is also the motivation for the patch a month ago that let's
libswrARCH.so bypass dlopen and just be builtin when only using a single
architecture.

- Chuck
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/dri2: fix segfault when display initialisation fails

2018-02-22 Thread Eric Engestrom
On Thursday, 2018-02-22 13:37:54 +, Frank Binns wrote:
> dri2_display_destroy() is called when platform specific display
> initialisation fails. However, this would typically lead to a
> segfault due to the dri2_egl_display vbtl not having been set up.
> 
> Fixes: 2db95482964 ("loader_dri3/glx/egl: Optionally use a blit
> context for blitting operations")
> Signed-off-by: Frank Binns 

Error paths are basically never tested...
You're entirely right, thanks for catching this!

Reviewed-by: Eric Engestrom 

> ---
>  src/egl/drivers/dri2/egl_dri2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
> index 17b646e..c06a0ca 100644
> --- a/src/egl/drivers/dri2/egl_dri2.c
> +++ b/src/egl/drivers/dri2/egl_dri2.c
> @@ -973,7 +973,7 @@ dri2_display_destroy(_EGLDisplay *disp)
> struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
>  
> if (dri2_dpy->own_dri_screen) {
> -  if (dri2_dpy->vtbl->close_screen_notify)
> +  if (dri2_dpy->vtbl && dri2_dpy->vtbl->close_screen_notify)
>   dri2_dpy->vtbl->close_screen_notify(disp);
>dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
> }
> -- 
> 2.7.4
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] i965: add support for performance queries on CNL

2018-02-22 Thread Lionel Landwerlin
Hi all,

This series adds support for perf queries on CNL. It depends on a new
kernel uAPI so I don't expect this to land just yet. The uAPI has
received enough review this can be look after from the Mesa side.

It's a bit verbose unfortunately because we have to support multiple
ways to getting somewhat similar information across kernels
versions :(

One of the patch is quite big so, unlikely to make it to the ml. You
can look at this series on my github :

   https://github.com/djdeath/mesa/tree/wip/djdeath/query-topology

Thanks,

Lionel Landwerlin (4):
  drm-uapi: bump headers
  i965: perf: query topology
  i965: perf: add support for new equation operators
  i965: add performance query support on CNL

 include/drm-uapi/i915_drm.h   |   146 +-
 src/mesa/drivers/dri/i965/Makefile.sources| 4 +-
 src/mesa/drivers/dri/i965/brw_context.h   |14 +
 src/mesa/drivers/dri/i965/brw_oa.py   |15 +
 src/mesa/drivers/dri/i965/brw_oa_cnl.xml  | 10410 
 src/mesa/drivers/dri/i965/brw_performance_query.c |   270 +-
 src/mesa/drivers/dri/i965/meson.build | 2 +-
 7 files changed, 10785 insertions(+), 76 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_cnl.xml

--
2.16.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] i965: perf: add support for new equation operators

2018-02-22 Thread Lionel Landwerlin
Some equations of the CNL metrics started to use operators we haven't
defined yet, just add those.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_oa.py | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 576ea6687fc..628fd76b450 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -125,6 +125,18 @@ def emit_umin(tmp_id, args):
 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0]))
 return tmp_id + 1
 
+def emit_lshft(tmp_id, args):
+c("uint64_t tmp{0} = {1} << {2};".format(tmp_id, args[1], args[0]))
+return tmp_id + 1
+
+def emit_rshft(tmp_id, args):
+c("uint64_t tmp{0} = {1} >> {2};".format(tmp_id, args[1], args[0]))
+return tmp_id + 1
+
+def emit_and(tmp_id, args):
+c("uint64_t tmp{0} = {1} & {2};".format(tmp_id, args[1], args[0]))
+return tmp_id + 1
+
 ops = {}
 # (n operands, emitter)
 ops["FADD"] = (2, emit_fadd)
@@ -138,6 +150,9 @@ ops["UDIV"] = (2, emit_udiv)
 ops["UMUL"] = (2, emit_umul)
 ops["USUB"] = (2, emit_usub)
 ops["UMIN"] = (2, emit_umin)
+ops["<<"]   = (2, emit_lshft)
+ops[">>"]   = (2, emit_rshft)
+ops["AND"]  = (2, emit_and)
 
 def brkt(subexp):
 if " " in subexp:
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] drm-uapi: bump headers

2018-02-22 Thread Lionel Landwerlin
This commit is meant to be replaced with a proper bump from drm-next.
---
 include/drm-uapi/i915_drm.h | 146 +++-
 1 file changed, 145 insertions(+), 1 deletion(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 7f28eea4035..9dfebbbe117 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -102,6 +102,46 @@ enum drm_i915_gem_engine_class {
I915_ENGINE_CLASS_INVALID   = -1
 };
 
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+   I915_SAMPLE_BUSY = 0,
+   I915_SAMPLE_WAIT = 1,
+   I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+   (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+   ((class) << I915_PMU_CLASS_SHIFT | \
+   (instance) << I915_PMU_SAMPLE_BITS | \
+   (sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+   __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+   __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+   __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY  __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255/* table size 2k - maximum due to use
@@ -278,6 +318,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_OPEN 0x36
 #define DRM_I915_PERF_ADD_CONFIG   0x37
 #define DRM_I915_PERF_REMOVE_CONFIG0x38
+#define DRM_I915_QUERY 0x39
 
 #define DRM_IOCTL_I915_INITDRM_IOW( DRM_COMMAND_BASE + 
DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH   DRM_IO ( DRM_COMMAND_BASE + 
DRM_I915_FLUSH)
@@ -335,6 +376,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_OPEN   DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG  DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_QUERY, struct drm_i915_query)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1318,7 +1360,9 @@ struct drm_intel_overlay_attrs {
  * active on a given plane.
  */
 
-#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */
+#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set
+   * flags==0 to disable 
colorkeying.
+   */
 #define I915_SET_COLORKEY_DESTINATION  (1<<1)
 #define I915_SET_COLORKEY_SOURCE   (1<<2)
 struct drm_intel_sprite_colorkey {
@@ -1573,6 +1617,106 @@ struct drm_i915_perf_oa_config {
__u64 flex_regs_ptr;
 };
 
+struct drm_i915_query_item {
+   __u64 query_id;
+#define DRM_I915_QUERY_TOPOLOGY_INFO1
+
+   /*
+* When set to zero by userspace, this is filled with the size of the
+* data to be written at the data_ptr pointer. The kernel set this
+* value to a negative value to signal an error on a particular query
+* item.
+*/
+   __s32 length;
+
+   /*
+* Unused for now.
+*/
+   __u32 flags;
+
+   /*
+* Data will be written at the location pointed by data_ptr when the
+* value of length matches the length of the data to be written by the
+* kernel.
+*/
+   __u64 data_ptr;
+};
+
+struct drm_i915_query {
+   __u32 num_items;
+
+   /*
+* Unused for now.
+*/
+   __u32 flags;
+
+   /*
+* This point to an array of num_items drm_i915_query_item structures.
+*/
+   __u64 items_ptr;
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ *
+ * data: contains the 3 pieces of information :
+ *
+ * - the slice mask with one bit per slice telling whether a slice is
+ *   available. The availability of slice X can be queried with the following
+ *   formula :
+ *
+ *   (data[X / 8] >> (X % 8)) & 1
+ *
+ * - the subslice mask for each slice with one bit per subslice 

[Mesa-dev] [PATCH 2/4] i965: perf: query topology

2018-02-22 Thread Lionel Landwerlin
With the introduction of asymmetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam to tell userspace what subslices
are available.

We introduce a new uAPI in the kernel driver to report exactly what
part of the GPU are fused and require this to be available on Gen10+.

Prior generations can continue to rely on GETPARAM on older kernels.

This patch is quite a lot of code because we have to support lots of
different kernel versions, ranging from not providing any information
(for Haswell on 4.13 through 4.17), to being able to query through
GETPARAM (for gen8/9 on 4.13 through 4.17), to finally requiring 4.17
for Gen10+.

This change stores topology information in a unified way on
brw_context.topology from the various kernel APIs. And then generates
the appropriate values for the equations from that unified topology.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h   |  14 ++
 src/mesa/drivers/dri/i965/brw_performance_query.c | 267 --
 2 files changed, 208 insertions(+), 73 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 050b656e3da..69bf7530fbc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1160,6 +1160,20 @@ struct brw_context
   bool supported;
} predicate;
 
+   struct {
+  uint8_t slice_mask[4];
+  uint8_t subslice_mask[100];
+  uint8_t eu_mask[100];
+
+  uint16_t max_slices;
+  uint16_t max_subslices;
+  uint16_t max_eus_per_subslice;
+
+  uint16_t subslice_slice_stride;
+  uint16_t eu_slice_stride;
+  uint16_t eu_subslice_stride;
+   } topology;
+
struct {
   /* Variables referenced in the XML meta data for OA performance
* counters, e.g in the normalization equations.
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index c0bb4442bec..10f519a757f 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1888,6 +1888,192 @@ init_oa_configs(struct brw_context *brw, const char 
*sysfs_dev_dir)
}
 }
 
+static bool
+query_topology(struct brw_context *brw)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct drm_i915_query_item item = {
+  .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
+   };
+   struct drm_i915_query query = {
+  .num_items = 1,
+  .items_ptr = (uintptr_t) ,
+   };
+
+   return false;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
+  return false;
+
+   struct drm_i915_query_topology_info *topo_info =
+  (struct drm_i915_query_topology_info *) calloc(1, item.length);
+   item.data_ptr = (uintptr_t) topo_info;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, ))
+  return false;
+
+   brw->topology.max_slices = topo_info->max_slices;
+   brw->topology.max_subslices = topo_info->max_subslices;
+   brw->topology.max_eus_per_subslice = topo_info->max_eus_per_subslice;
+
+   brw->topology.subslice_slice_stride =
+  DIV_ROUND_UP(brw->topology.max_subslices, 8);
+   brw->topology.eu_subslice_stride =
+  DIV_ROUND_UP(brw->topology.max_eus_per_subslice, 8);
+   brw->topology.eu_slice_stride = brw->topology.max_subslices *
+  brw->topology.eu_subslice_stride;
+
+   assert(DIV_ROUND_UP(topo_info->max_slices, 8) <=
+  sizeof(brw->topology.slice_mask));
+   memcpy(brw->topology.slice_mask, topo_info->data,
+  DIV_ROUND_UP(topo_info->max_slices, 8));
+
+   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices, 8) <=
+  sizeof(brw->topology.subslice_mask));
+   memcpy(brw->topology.subslice_mask,
+  _info->data[topo_info->subslice_offset],
+  topo_info->max_slices * topo_info->subslice_stride);
+
+   assert(DIV_ROUND_UP(topo_info->max_slices * topo_info->max_subslices *
+   topo_info->max_eus_per_subslice, 8) <=
+  sizeof(brw->topology.eu_mask));
+   memcpy(brw->topology.eu_mask,
+  _info->data[topo_info->eu_offset],
+  topo_info->max_slices * topo_info->max_subslices * 
topo_info->eu_stride);
+
+   free(topo_info);
+
+   return true;
+}
+
+static bool
+getparam_topology(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   drm_i915_getparam_t gp;
+   int ret;
+
+   /* On CNL+ we need to use the query ioctl(). */
+   assert(devinfo->gen < 10);
+
+   int slice_mask = 0;
+   gp.param = I915_PARAM_SLICE_MASK;
+   gp.value = _mask;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
+   if (ret)
+  return false;
+
+   int subslice_mask = 0;
+   gp.param = I915_PARAM_SUBSLICE_MASK;
+   gp.value = _mask;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
+   if (ret)
+  return false;
+
+   brw->topology.max_slices = 

Re: [Mesa-dev] [PATCH] intel: aubinator_error_decode: fix segfault on missing register

2018-02-22 Thread Kenneth Graunke
On Thursday, February 22, 2018 5:41:10 AM PST Lionel Landwerlin wrote:
> Some register might be missing in our genxmls. Don't try to decode
> them.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/tools/aubinator_error_decode.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/tools/aubinator_error_decode.c 
> b/src/intel/tools/aubinator_error_decode.c
> index 2331114b446..08ece771fde 100644
> --- a/src/intel/tools/aubinator_error_decode.c
> +++ b/src/intel/tools/aubinator_error_decode.c
> @@ -65,9 +65,9 @@ print_head(unsigned int reg)
>  static void
>  print_register(struct gen_spec *spec, const char *name, uint32_t reg)
>  {
> -   struct gen_group *reg_spec = gen_spec_find_register_by_name(spec, name);
> +   struct gen_group *reg_spec;
>  
> -   if (reg_spec) {
> +   if (name && (reg_spec = gen_spec_find_register_by_name(spec, name))) {
>gen_print_group(stdout, reg_spec, 0, , 0,
>option_color == COLOR_ALWAYS);
> }
> 

Personally I would write this as:

   struct gen_group *reg_spec = !name ? NULL :
  gen_spec_find_register_by_name(spec, name);

or

   struct gen_group *reg_spec =
  name ? gen_spec_find_register_by_name(spec, name) : NULL;

because assignments in expressions, giving them side-effects, can be a
bit surprising, while ternaries are common and straightforward.

Either way,
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104553] mat4: m[i][j] incorrect result with row_major UBO

2018-02-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104553

Alejandro Piñeiro (freenode IRC: apinheiro)  changed:

   What|Removed |Added

 CC||apinhe...@igalia.com

--- Comment #9 from Alejandro Piñeiro (freenode IRC: apinheiro) 
 ---
Found this bug when I was about to report the same problem. Some comments
below:

(In reply to florian.will from comment #0)
> Created attachment 136630 [details]
> Failing piglit test (when test_variant = 2) for this bug.
> 
> I hit another bug while trying to get Banshee 3D
>  to work correctly on Mesa
> radeonsi (HD 7870) / amdgpu kernel module. I use git commit 2719467eb6 right
> now, which is a few days old.
> 
> Accessing a "mat4 m" component, e.g. m[1][2], returns unexpected results if
> m is declared inside a UBO block and uses row_major matrix format. col_major
> works as expected. m[1].z also works. Some experiments indicate that for
> m[i][j], the UBO buffer is accessed at offset (i+j)*4 instead of (i+j*4)*4.
> 
> The invalid offsets for loading floats from the UBO are visible in the Mesa
> IR when linker.cpp is done (probably introduced by lower_ubo_reference()).

As you imply below, this problem also affects ssbo. So perhaps it would be a
good idea to update the bug description?

> While exploring this issue, I prepared a piglit test case that tests for
> this. It fails on my setup (when test_variant = 2, the other tests succeed).
> I will attach it to this bug report and send it to the piglit list for
> review.
> 
> 
> 
> Possible Fix:
> 
> I doubt this is enough/correct, because I haven't fully grasped the IR
> processing in the glsl compiler and which fields/data types can/can't be
> row_major and the implications, but this simple change in
> lower_buffer_access.cpp 

FWIW, while I was doing a skimming, I also got to lower_buffer_access and
lower_ubo_reference (this one touches both ssbo and ubo), so I agree that the
issue is likely at that code. Not sure if your fix is correct or in the good
path sorry.

(In reply to florian.will from comment #7)
> Created attachment 136878 [details] [review]
> Changes to piglit UBO test generator
> 
> I have now extended the random UBO piglit test generator python script (in a
> hackish way) to generate SSBO tests as well, and added std430 packing rules
> to generate std430 SSBO tests. My changes are in the attached patch file,
> but I'd say it's not suitable for piglit git (too ugly).
> 
> It was helpful to validate the mesa patch I've attached to this bug report
> earlier. Using mesa git master, 391 out of the 540 generated UBO tests
> fail. After applying my patch, only a few tests (3-7) fail. The failing
> tests are always very huge test files (some have more than 10k lines and
> sometimes up to 5MB shader_test files). Apparently they hit something like
> an internal size limit for vertex shaders, because the tests pass when
> commenting out one half of the test conditions in the vertex shader, and
> they still pass when commenting out the other half of the vertex shader.

Somewhat off-topic: Timothy mentioned that in the past it was not included due
all the amount of tests added. So perhaps a compromise would be added some
(~10?) barebone tests, to at least cover the most basic cases. Something like
this test I wrote while debugging this:
https://github.com/Igalia/piglit/blob/apinheiro/matrix-row-major-failure/tests/spec/arb_fake/execution/ubo/matrix-column-vs-row.shader_test

or in a ideal world, get the script to be configurable on how many tests to
create, and the default being a reasonable amount of tests (fwiw, 540 generated
tests seems somewhat too much).

> So I'm now fairly confident that my patch improves the SSBO / UBO buffer
> access behaviour when reading from SSBOs and UBOs.
> 
> Is there anything else that should be tested? Or any comments about the
> patch by someone who knows the lower_buffer_access code better than I do?

Unfourtunately although I would be interested on working on this, I don't have
the time right now.


And now totally off-topic, but probably it is worth to mention here to not
forget: VK-GL-CTS doesn't catch this problem either. And they have tons of
row_major tests, for example:
KHR-GL45.shaders.uniform_block.single_basic_type.std140.row_major_mediump_mat4

is passing properly. So or the test is wrong or it is incomplete. I tried to
take a look to the test, but it is somewhat hard to understand.
https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/openglcts/modules/common/glcUniformBlockCase.cpp#L1346

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Really use correct HTILE expanded words.

2018-02-22 Thread James Legg
When transitioning to an htile compressed depth format, Set the full
depth range, so later rasterization can pass HiZ. Previously, for depth
only formats, the depth range was set to 0 to 0. This caused unwanted
HiZ rejections with a VK_FORMAT_D16_UNORM depth buffer
(VK_FORMAT_D32_SFLOAT was not affected somehow).

These values are derived from PAL [0], since I can't find the
specification describing the htile values.

Fixes 5158603182fe7435: radv: Use correct HTILE expanded words.

[0] 
https://github.com/GPUOpen-Drivers/pal/blob/5cba4ecbda9452773f59692f5915301e7db4a183/src/core/hw/gfxip/gfx9/gfx9MaskRam.cpp#L1500

CC: Dave Airlie 
CC: Bas Nieuwenhuizen 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8a384b114c..2b41baea3d 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3440,8 +3440,8 @@ void radv_CmdEndRenderPass(
 
 /*
  * For HTILE we have the following interesting clear words:
- *   0x030f: Uncompressed for depth+stencil HTILE.
- *   0x000f: Uncompressed for depth only HTILE.
+ *   0xf30f: Uncompressed, full depth range, for depth+stencil HTILE
+ *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
  *   0xfff0: Clear depth to 1.0
  *   0x: Clear depth to 0.0
  */
@@ -3489,7 +3489,7 @@ static void radv_handle_depth_image_transition(struct 
radv_cmd_buffer *cmd_buffe
radv_initialize_htile(cmd_buffer, image, range, 0);
} else if (!radv_layout_is_htile_compressed(image, src_layout, 
src_queue_mask) &&
   radv_layout_is_htile_compressed(image, dst_layout, 
dst_queue_mask)) {
-   uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 
0x30f : 0xf;
+   uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 
0xf30f : 0xfffc000f;
radv_initialize_htile(cmd_buffer, image, range, clear_value);
} else if (radv_layout_is_htile_compressed(image, src_layout, 
src_queue_mask) &&
   !radv_layout_is_htile_compressed(image, dst_layout, 
dst_queue_mask)) {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] appveyor: Build with MSVC 2015.

2018-02-22 Thread Brian Paul

On 02/22/2018 09:29 AM, Jose Fonseca wrote:

The MSVC version we (at VMware) primarily care about from now on is
2015.

We can drop support for building with 2013 in a future commit.  I'm not
aware of significant changes in C99/C11 support from MSVC 2013 to 2015,
but there's no point in continuing supporting old MSVC versions when
nobody cares.

Testing done: https://ci.appveyor.com/project/jrfonseca/mesa/build/46
---
  appveyor.yml | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 96eb1a67b3..bd33e2e955 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -35,13 +35,13 @@ clone_depth: 100
  
  cache:

  - win_flex_bison-2.5.9.zip
-- llvm-3.3.1-msvc2013-mtd.7z
+- llvm-3.3.1-msvc2015-mtd.7z
  
-os: Visual Studio 2013

+os: Visual Studio 2015
  
  environment:

WINFLEXBISON_ARCHIVE: win_flex_bison-2.5.9.zip
-  LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
+  LLVM_ARCHIVE: llvm-3.3.1-msvc2015-mtd.7z
  
  install:

  # Check pip
@@ -69,10 +69,10 @@ install:
  - set LLVM=%CD%\llvm
  
  build_script:

-- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.0 llvm=1
  
  after_build:

-- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.0 llvm=1 check
  
  
  # It's possible to setup notification here, as described in




Looks OK to me.

Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] egl/dri2: fix segfault when display initialisation fails

2018-02-22 Thread Frank Binns
dri2_display_destroy() is called when platform specific display
initialisation fails. However, this would typically lead to a
segfault due to the dri2_egl_display vbtl not having been set up.

Fixes: 2db95482964 ("loader_dri3/glx/egl: Optionally use a blit
context for blitting operations")
Signed-off-by: Frank Binns 
---
 src/egl/drivers/dri2/egl_dri2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 17b646e..c06a0ca 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -973,7 +973,7 @@ dri2_display_destroy(_EGLDisplay *disp)
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
 
if (dri2_dpy->own_dri_screen) {
-  if (dri2_dpy->vtbl->close_screen_notify)
+  if (dri2_dpy->vtbl && dri2_dpy->vtbl->close_screen_notify)
  dri2_dpy->vtbl->close_screen_notify(disp);
   dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] swr/rast: Code generation cleanup

2018-02-22 Thread George Kyriazis
Generate more compact code from gen_llvm.hpp.
---
 .../swr/rasterizer/codegen/templates/gen_llvm.hpp  | 36 +-
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
index d61194d..190e660 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
@@ -1,5 +1,5 @@
 /
-* Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -39,19 +39,19 @@ namespace SwrJit
 %for type in types:
 INLINE static StructType *Gen_${type['name']}(JitManager* pJitMgr)
 {
+%if needs_ctx(type):
 LLVMContext& ctx = pJitMgr->mContext;
-   (void) ctx;
 
+%endif
 StructType* pRetType = 
pJitMgr->mpCurrentModule->getTypeByName("${type['name']}");
 if (pRetType == nullptr)
 {
-std::vector members;
-<%
-(max_type_len, max_name_len) = calc_max_len(type['members'])
-%>
-%for member in type['members']:
-/* ${member['name']} ${pad(len(member['name']), max_name_len)}*/ 
members.push_back(${ member['type'] });
-%endfor
+std::vector members =<% (max_type_len, max_name_len) = 
calc_max_len(type['members']) %>
+{
+%for member in type['members']:
+/* ${member['name']} ${pad(len(member['name']), 
max_name_len)}*/ ${member['type']},
+%endfor
+};
 
 pRetType = StructType::create(members, "${type['name']}", false);
 
@@ -59,13 +59,13 @@ namespace SwrJit
 llvm::DIBuilder builder(*pJitMgr->mpCurrentModule);
 llvm::DIFile* pFile = builder.createFile("${input_file}", 
"${os.path.normpath(input_dir).replace('\\', '/')}");
 
-std::vector> dbgMembers;
-%for member in type['members']:
-dbgMembers.push_back(std::make_pair("${member['name']}", ${ 
member['lineNum'] }));
-%endfor
-
+std::vector> dbgMembers =
+{
+%for member in type['members']:
+std::make_pair("${member['name']}", ${pad(len(member['name']), 
max_name_len)}${member['lineNum']}),
+%endfor
+};
 pJitMgr->CreateDebugStructType(pRetType, "${type['name']}", pFile, 
${type['lineNum']}, dbgMembers);
-
 }
 
 return pRetType;
@@ -80,6 +80,12 @@ namespace SwrJit
 
 <%! # Global function definitions
 import os
+def needs_ctx(struct_type):
+for m in struct_type.get('members', []):
+if '(ctx)' in m.get('type', ''):
+return True
+return False
+
 def calc_max_len(fields):
 max_type_len = 0
 max_name_len = 0
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >