Mesa (master): ilo: add image_get_gen{6,7}_alignment()

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: f1946546c7d4ac22799a8b4944d6c36b77e22626
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f1946546c7d4ac22799a8b4944d6c36b77e22626

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:16:11 2015 +0800

ilo: add image_get_gen{6,7}_alignment()

They replace img_init_alignments().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  336 --
 1 file changed, 177 insertions(+), 159 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index c22c119..d28cb07 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -57,6 +57,9 @@ struct ilo_image_layout {
enum gen_surface_tiling tiling;
 
enum ilo_image_aux_type aux;
+
+   int align_i;
+   int align_j;
 };
 
 static enum ilo_image_walk_type
@@ -382,6 +385,154 @@ image_get_gen7_mcs_enable(const struct ilo_dev *dev,
}
 }
 
+static void
+image_get_gen6_alignments(const struct ilo_dev *dev,
+  const struct ilo_image_info *info,
+  int *align_i, int *align_j)
+{
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /*
+* From the Sandy Bridge PRM, volume 1 part 1, page 113:
+*
+* surface format   align_i align_j
+*  YUV 4:2:2 formats4   *see below
+*  BC1-54   4
+*  FXT1 8   4
+*  all other formats4   *see below
+*
+* - align_j = 4 for any depth buffer
+*  - align_j = 2 for separate stencil buffer
+*  - align_j = 4 for any render target surface is multisampled (4x)
+*  - align_j = 4 for any render target surface with Surface Vertical
+*Alignment = VALIGN_4
+*  - align_j = 2 for any render target surface with Surface Vertical
+*Alignment = VALIGN_2
+*  - align_j = 2 for all other render target surface
+*  - align_j = 2 for any sampling engine surface with Surface Vertical
+*Alignment = VALIGN_2
+*  - align_j = 4 for any sampling engine surface with Surface Vertical
+*Alignment = VALIGN_4
+*
+* From the Sandy Bridge PRM, volume 4 part 1, page 86:
+*
+* This field (Surface Vertical Alignment) must be set to VALIGN_2 if
+*  the Surface Format is 96 bits per element (BPE).
+*
+* They can be rephrased as
+*
+*  align_ialign_j
+*   compressed formats block widthblock height
+*   GEN6_FORMAT_R8_UINT4  2
+*   other depth/stencil formats4  4
+*   4x multisampled4  4
+*   bpp 96 4  2
+*   others 4  2 or 4
+*/
+
+   *align_i = (info-compressed) ? info-block_width : 4;
+   if (info-compressed) {
+  *align_j = info-block_height;
+   } else if (info-bind_zs) {
+  *align_j = (info-format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
+   } else {
+  *align_j = (info-sample_count  1 || info-block_size != 12) ? 4 : 2;
+   }
+}
+
+static void
+image_get_gen7_alignments(const struct ilo_dev *dev,
+  const struct ilo_image_info *info,
+  enum gen_surface_tiling tiling,
+  int *align_i, int *align_j)
+{
+   int i, j;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+* From the Ivy Bridge PRM, volume 1 part 1, page 110:
+*
+* surface defined by  surface format align_i align_j
+*  3DSTATE_DEPTH_BUFFERD16_UNORM  8   4
+*  not D16_UNORM  4   4
+*  3DSTATE_STENCIL_BUFFER  N/A8   8
+*  SURFACE_STATE   BC*, ETC*, EAC*4   4
+*  FXT1   8   4
+*  all others (set by SURFACE_STATE)
+*
+* From the Ivy Bridge PRM, volume 4 part 1, page 63:
+*
+* - This field (Surface Vertical Aligment) is intended to be set to
+*VALIGN_4 if the surface was rendered as a depth buffer, for a
+*multisampled (4x) render target, or for a multisampled (8x)
+*render target, since these surfaces support only alignment of 4.
+*  - Use of VALIGN_4 for other surfaces is supported, but uses more
+*memory.
+*  - This field must be set to VALIGN_4 for all tiled Y Render Target
+*surfaces.
+*  - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
+*YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
+*  - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
+*must be set to VALIGN_4.
+*  - VALIGN_4 is not 

Mesa (master): ilo: add image_set_gen6_bo_size()

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: 3c6af396f9526bdc8351ff61bcc6c42a3892e6b8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c6af396f9526bdc8351ff61bcc6c42a3892e6b8

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:46:34 2015 +0800

ilo: add image_set_gen6_bo_size()

It replaces img_calculate_bo_size().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  191 --
 src/gallium/drivers/ilo/core/ilo_image.h |8 +-
 src/gallium/drivers/ilo/ilo_resource.c   |8 ++
 3 files changed, 89 insertions(+), 118 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index 4b53233..ec5ae04 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -43,7 +43,6 @@ enum {
 struct ilo_image_params {
const struct ilo_dev *dev;
const struct ilo_image_info *info;
-   unsigned valid_tilings;
 
unsigned h0, h1;
unsigned max_x, max_y;
@@ -295,7 +294,8 @@ image_get_gen6_tiling(const struct ilo_dev *dev,
  info-bind_surface_dp_typed))
  return GEN6_TILING_NONE;
 
-  if (estimated_size = 64)
+  if (estimated_size = 64 ||
+  estimated_size  info-prefer_linear_threshold)
  return GEN6_TILING_NONE;
 
   if (estimated_size = 2048)
@@ -987,114 +987,87 @@ img_init_size_and_format(struct ilo_image *img,
img-sample_count = info-sample_count;
 }
 
-/* note that this may force the texture to be linear */
-static void
-img_calculate_bo_size(struct ilo_image *img,
-  const struct ilo_image_params *params)
+static bool
+image_set_gen6_bo_size(struct ilo_image *img,
+   const struct ilo_dev *dev,
+   const struct ilo_image_info *info,
+   const struct ilo_image_layout *layout)
 {
-   assert(params-max_x % img-block_width == 0);
-   assert(params-max_y % img-block_height == 0);
-   assert(img-walk_layer_height % img-block_height == 0);
+   int stride, height;
+   int align_w, align_h;
 
-   img-bo_stride =
-  (params-max_x / img-block_width) * img-block_size;
-   img-bo_height = params-max_y / img-block_height;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   while (true) {
-  unsigned w = img-bo_stride, h = img-bo_height;
-  unsigned align_w, align_h;
+   stride = (layout-monolithic_width / info-block_width) * info-block_size;
+   height = layout-monolithic_height / info-block_height;
 
-  /*
-   * From the Haswell PRM, volume 5, page 163:
-   *
-   * For linear surfaces, additional padding of 64 bytes is required
-   *  at the bottom of the surface. This is in addition to the padding
-   *  required above.
-   */
-  if (ilo_dev_gen(params-dev) = ILO_GEN(7.5) 
-  params-info-bind_surface_sampler 
-  img-tiling == GEN6_TILING_NONE)
- h += (64 + img-bo_stride - 1) / img-bo_stride;
+   /*
+* From the Haswell PRM, volume 5, page 163:
+*
+* For linear surfaces, additional padding of 64 bytes is required
+*  at the bottom of the surface. This is in addition to the padding
+*  required above.
+*/
+   if (ilo_dev_gen(dev) = ILO_GEN(7.5)  info-bind_surface_sampler 
+   layout-tiling == GEN6_TILING_NONE)
+  height += (64 + stride - 1) / stride;
 
+   /*
+* From the Sandy Bridge PRM, volume 4 part 1, page 81:
+*
+* - For linear render target surfaces, the pitch must be a multiple
+*of the element size for non-YUV surface formats.  Pitch must be a
+*multiple of 2 * element size for YUV surface formats.
+*
+*  - For other linear surfaces, the pitch can be any multiple of
+*bytes.
+*  - For tiled surfaces, the pitch must be a multiple of the tile
+*width.
+*
+* Different requirements may exist when the image is used in different
+* places, but our alignments here should be good enough that we do not
+* need to check info-bind_x.
+*/
+   switch (layout-tiling) {
+   case GEN6_TILING_X:
+  align_w = 512;
+  align_h = 8;
+  break;
+   case GEN6_TILING_Y:
+  align_w = 128;
+  align_h = 32;
+  break;
+   case GEN8_TILING_W:
   /*
-   * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+   * From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
-   * - For linear render target surfaces, the pitch must be a
-   *multiple of the element size for non-YUV surface formats.
-   *Pitch must be a multiple of 2 * element size for YUV surface
-   *formats.
-   *  - For other linear surfaces, the pitch can be any multiple of
-   *bytes.
-   *  - For tiled surfaces, the pitch must be a multiple of the tile
-   *width.
-   *
-   * Different requirements may exist when the bo is used in different
-   * places, 

Mesa (master): ilo: add image_get_gen6_{hiz,mcs}_enable()

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: c88e6cdfbfd7a7727dbae6b47a803b18aca5d9f4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c88e6cdfbfd7a7727dbae6b47a803b18aca5d9f4

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:14:36 2015 +0800

ilo: add image_get_gen6_{hiz,mcs}_enable()

They replace img_init_aux().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  198 +++---
 1 file changed, 97 insertions(+), 101 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index 84001d8..c22c119 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -55,6 +55,8 @@ struct ilo_image_layout {
 
uint8_t valid_tilings;
enum gen_surface_tiling tiling;
+
+   enum ilo_image_aux_type aux;
 };
 
 static enum ilo_image_walk_type
@@ -296,6 +298,91 @@ image_get_gen6_tiling(const struct ilo_dev *dev,
 }
 
 static bool
+image_get_gen6_hiz_enable(const struct ilo_dev *dev,
+  const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* depth buffer? */
+   if (!info-bind_zs ||
+   info-format == GEN6_FORMAT_R8_UINT ||
+   info-interleaved_stencil)
+  return false;
+
+   /* we want to be able to force 8x4 alignments */
+   if (info-type == GEN6_SURFTYPE_1D)
+  return false;
+
+   if (info-aux_disable)
+  return false;
+
+   if (ilo_debug  ILO_DEBUG_NOHIZ)
+  return false;
+
+   return true;
+}
+
+static bool
+image_get_gen7_mcs_enable(const struct ilo_dev *dev,
+  const struct ilo_image_info *info,
+  enum gen_surface_tiling tiling)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!info-bind_surface_sampler  !info-bind_surface_dp_render)
+  return false;
+
+   /*
+* From the Ivy Bridge PRM, volume 4 part 1, page 77:
+*
+* For Render Target and Sampling Engine Surfaces:If the surface is
+*  multisampled (Number of Multisamples any value other than
+*  MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled.
+*
+* This field must be set to 0 for all SINT MSRTs when all RT channels
+*  are not written
+*/
+   if (info-sample_count  1) {
+  if (ilo_dev_gen(dev)  ILO_GEN(8))
+ assert(!info-is_integer);
+  return true;
+   }
+
+   if (info-aux_disable)
+  return false;
+
+   /*
+* From the Ivy Bridge PRM, volume 2 part 1, page 326:
+*
+* When MCS is buffer is used for color clear of non-multisampler
+*  render target, the following restrictions apply.
+*  - Support is limited to tiled render targets.
+*  - Support is for non-mip-mapped and non-array surface types only.
+*  - Clear is supported only on the full RT; i.e., no partial clear or
+*overlapping clears.
+*  - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
+*64bpp and 128bpp.
+*  ...
+*
+* How about SURFTYPE_3D?
+*/
+   if (!info-bind_surface_dp_render ||
+   tiling == GEN6_TILING_NONE ||
+   info-level_count  1 ||
+   info-array_size  1)
+  return false;
+
+   switch (info-block_size) {
+   case 4:
+   case 8:
+   case 16:
+  return true;
+   default:
+  return false;
+   }
+}
+
+static bool
 image_get_gen6_layout(const struct ilo_dev *dev,
   const struct ilo_image_info *info,
   struct ilo_image_layout *layout)
@@ -316,6 +403,14 @@ image_get_gen6_layout(const struct ilo_dev *dev,
 
layout-tiling = image_get_gen6_tiling(dev, info, layout-valid_tilings);
 
+   if (image_get_gen6_hiz_enable(dev, info))
+  layout-aux = ILO_IMAGE_AUX_HIZ;
+   else if (ilo_dev_gen(dev) = ILO_GEN(7) 
+image_get_gen7_mcs_enable(dev, info, layout-tiling))
+  layout-aux = ILO_IMAGE_AUX_MCS;
+   else
+  layout-aux = ILO_IMAGE_AUX_NONE;
+
return true;
 }
 
@@ -755,106 +850,6 @@ img_init_size_and_format(struct ilo_image *img,
img-sample_count = info-sample_count;
 }
 
-static bool
-img_want_mcs(const struct ilo_image *img,
- const struct ilo_image_params *params)
-{
-   const struct ilo_image_info *info = params-info;
-   bool want_mcs = false;
-
-   /* MCS is for RT on GEN7+ */
-   if (ilo_dev_gen(params-dev)  ILO_GEN(7))
-  return false;
-
-   if (info-type != GEN6_SURFTYPE_2D || !info-bind_surface_dp_render)
-  return false;
-
-   /*
-* From the Ivy Bridge PRM, volume 4 part 1, page 77:
-*
-* For Render Target and Sampling Engine Surfaces:If the surface is
-*  multisampled (Number of Multisamples any value other than
-*  MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled.
-*
-* This field must be set to 0 for all SINT MSRTs when all RT channels
-*  are not written
-*/
-   if (info-sample_count  1  !info-is_integer) {
-  want_mcs = true;
-   } else if 

Mesa (master): ilo: add image_set_gen6_{hiz,mcs}

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: 0896d629fded96178daa79c393ba4dae0d56f2ff
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0896d629fded96178daa79c393ba4dae0d56f2ff

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:42:04 2015 +0800

ilo: add image_set_gen6_{hiz,mcs}

They replace img_calculate_{hiz,mcs}_size().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  110 +-
 1 file changed, 61 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index 535407f..4b53233 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -1097,20 +1097,24 @@ img_calculate_bo_size(struct ilo_image *img,
}
 }
 
-static void
-img_calculate_hiz_size(struct ilo_image *img,
-   const struct ilo_image_params *params)
+static bool
+image_set_gen6_hiz(struct ilo_image *img,
+   const struct ilo_dev *dev,
+   const struct ilo_image_info *info,
+   const struct ilo_image_layout *layout)
 {
-   const struct ilo_image_info *info = params-info;
-   const unsigned hz_align_j = 8;
+   const int hz_align_j = 8;
enum ilo_image_walk_type hz_walk;
-   unsigned hz_width, hz_height, lv;
-   unsigned hz_clear_w, hz_clear_h;
+   int hz_width, hz_height;
+   int hz_clear_w, hz_clear_h;
+   uint8_t lv;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   assert(img-aux.type == ILO_IMAGE_AUX_HIZ);
+   assert(layout-aux == ILO_IMAGE_AUX_HIZ);
 
-   assert(img-walk == ILO_IMAGE_WALK_LAYER ||
-  img-walk == ILO_IMAGE_WALK_3D);
+   assert(layout-walk == ILO_IMAGE_WALK_LAYER ||
+  layout-walk == ILO_IMAGE_WALK_3D);
 
/*
 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
@@ -1123,8 +1127,8 @@ img_calculate_hiz_size(struct ilo_image *img,
 *
 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
 */
-   if (ilo_dev_gen(params-dev) = ILO_GEN(7))
-  hz_walk = img-walk;
+   if (ilo_dev_gen(dev) = ILO_GEN(7))
+  hz_walk = layout-walk;
else
   hz_walk = ILO_IMAGE_WALK_LOD;
 
@@ -1138,16 +1142,16 @@ img_calculate_hiz_size(struct ilo_image *img,
switch (hz_walk) {
case ILO_IMAGE_WALK_LAYER:
   {
- const unsigned h0 = align(params-h0, hz_align_j);
- const unsigned h1 = align(params-h1, hz_align_j);
- const unsigned htail =
-((ilo_dev_gen(params-dev) = ILO_GEN(7)) ? 12 : 11) * hz_align_j;
- const unsigned hz_qpitch = h0 + h1 + htail;
+ const int h0 = align(layout-walk_layer_h0, hz_align_j);
+ const int h1 = align(layout-walk_layer_h1, hz_align_j);
+ const int htail =
+((ilo_dev_gen(dev) = ILO_GEN(7)) ? 12 : 11) * hz_align_j;
+ const int hz_qpitch = h0 + h1 + htail;
 
- hz_width = align(img-lods[0].slice_width, 16);
+ hz_width = align(layout-lods[0].slice_width, 16);
 
  hz_height = hz_qpitch * info-array_size / 2;
- if (ilo_dev_gen(params-dev) = ILO_GEN(7))
+ if (ilo_dev_gen(dev) = ILO_GEN(7))
 hz_height = align(hz_height, 8);
 
  img-aux.walk_layer_height = hz_qpitch;
@@ -1155,9 +1159,9 @@ img_calculate_hiz_size(struct ilo_image *img,
   break;
case ILO_IMAGE_WALK_LOD:
   {
- unsigned lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
- unsigned lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
- unsigned cur_tx, cur_ty;
+ int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
+ int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
+ int cur_tx, cur_ty;
 
  /* figure out the tile offsets of LODs */
  hz_width = 0;
@@ -1165,17 +1169,17 @@ img_calculate_hiz_size(struct ilo_image *img,
  cur_tx = 0;
  cur_ty = 0;
  for (lv = 0; lv  info-level_count; lv++) {
-unsigned tw, th;
+int tw, th;
 
 lod_tx[lv] = cur_tx;
 lod_ty[lv] = cur_ty;
 
-tw = align(img-lods[lv].slice_width, 16);
-th = align(img-lods[lv].slice_height, hz_align_j) *
+tw = align(layout-lods[lv].slice_width, 16);
+th = align(layout-lods[lv].slice_height, hz_align_j) *
info-array_size / 2;
 /* convert to Y-tiles */
-tw = align(tw, 128) / 128;
-th = align(th, 32) / 32;
+tw = (tw + 127) / 128;
+th = (th + 31) / 32;
 
 if (hz_width  cur_tx + tw)
hz_width = cur_tx + tw;
@@ -1193,16 +1197,17 @@ img_calculate_hiz_size(struct ilo_image *img,
 img-aux.walk_lod_offsets[lv] =
(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
  }
+
  hz_width *= 128;
  hz_height *= 32;
   }
   break;
case ILO_IMAGE_WALK_3D:
-  hz_width = align(img-lods[0].slice_width, 16);
+  hz_width = align(layout-lods[0].slice_width, 16);
 
   hz_height = 0;
   for (lv = 0; lv  

Mesa (master): ilo: add image_get_gen6_monolithic_size()

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: 0da3b732ad156f63e32e7520bc1af97e1b733be7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0da3b732ad156f63e32e7520bc1af97e1b733be7

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:38:49 2015 +0800

ilo: add image_get_gen6_monolithic_size()

It replaces img_align().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  134 +++---
 1 file changed, 67 insertions(+), 67 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index c2a8487..535407f 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -765,6 +765,72 @@ image_get_gen6_walk_layer_heights(const struct ilo_dev 
*dev,
 }
 
 static void
+image_get_gen6_monolithic_size(const struct ilo_dev *dev,
+   const struct ilo_image_info *info,
+   struct ilo_image_layout *layout,
+   int max_x, int max_y)
+{
+   int align_w = 1, align_h = 1, pad_h = 0;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+* From the Sandy Bridge PRM, volume 1 part 1, page 118:
+*
+* To determine the necessary padding on the bottom and right side of
+*  the surface, refer to the table in Section 7.18.3.4 for the i and j
+*  parameters for the surface format in use. The surface must then be
+*  extended to the next multiple of the alignment unit size in each
+*  dimension, and all texels contained in this extended surface must
+*  have valid GTT entries.
+*
+* For cube surfaces, an additional two rows of padding are required
+*  at the bottom of the surface. This must be ensured regardless of
+*  whether the surface is stored tiled or linear.  This is due to the
+*  potential rotation of cache line orientation from memory to cache.
+*
+* For compressed textures (BC* and FXT1 surface formats), padding at
+*  the bottom of the surface is to an even compressed row, which is
+*  equal to a multiple of 8 uncompressed texel rows. Thus, for padding
+*  purposes, these surfaces behave as if j = 8 only for surface
+*  padding purposes. The value of 4 for j still applies for mip level
+*  alignment and QPitch calculation.
+*/
+   if (info-bind_surface_sampler) {
+  align_w = MAX2(align_w, layout-align_i);
+  align_h = MAX2(align_h, layout-align_j);
+
+  if (info-type == GEN6_SURFTYPE_CUBE)
+ pad_h += 2;
+
+  if (info-compressed)
+ align_h = MAX2(align_h, layout-align_j * 2);
+   }
+
+   /*
+* From the Sandy Bridge PRM, volume 1 part 1, page 118:
+*
+* If the surface contains an odd number of rows of data, a final row
+*  below the surface must be allocated.
+*/
+   if (info-bind_surface_dp_render)
+  align_h = MAX2(align_h, 2);
+
+   /*
+* Depth Buffer Clear/Resolve works in 8x4 sample blocks.  Pad to allow HiZ
+* for unaligned non-mipmapped and non-array images.
+*/
+   if (layout-aux == ILO_IMAGE_AUX_HIZ 
+   info-level_count == 1  info-array_size == 1  info-depth == 1) {
+  align_w = MAX2(align_w, 8);
+  align_h = MAX2(align_h, 4);
+   }
+
+   layout-monolithic_width = align(max_x, align_w);
+   layout-monolithic_height = align(max_y + pad_h, align_h);
+}
+
+static void
 image_get_gen6_lods(const struct ilo_dev *dev,
 const struct ilo_image_info *info,
 struct ilo_image_layout *layout)
@@ -852,8 +918,7 @@ image_get_gen6_lods(const struct ilo_dev *dev,
   layout-walk_layer_height = 0;
}
 
-   layout-monolithic_width = max_x;
-   layout-monolithic_height = max_y;
+   image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
 }
 
 static bool
@@ -922,70 +987,6 @@ img_init_size_and_format(struct ilo_image *img,
img-sample_count = info-sample_count;
 }
 
-static void
-img_align(struct ilo_image *img, struct ilo_image_params *params)
-{
-   const struct ilo_image_info *info = params-info;
-   int align_w = 1, align_h = 1, pad_h = 0;
-
-   /*
-* From the Sandy Bridge PRM, volume 1 part 1, page 118:
-*
-* To determine the necessary padding on the bottom and right side of
-*  the surface, refer to the table in Section 7.18.3.4 for the i and j
-*  parameters for the surface format in use. The surface must then be
-*  extended to the next multiple of the alignment unit size in each
-*  dimension, and all texels contained in this extended surface must
-*  have valid GTT entries.
-*
-* For cube surfaces, an additional two rows of padding are required
-*  at the bottom of the surface. This must be ensured regardless of
-*  whether the surface is stored tiled or linear.  This is due to the
-*  potential rotation of cache line orientation from memory to cache.
-*
-

Mesa (master): ilo: add image_get_gen6_layout()

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: 9e13f5c85f23ff67e685b41a4d439fc443de2dd0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9e13f5c85f23ff67e685b41a4d439fc443de2dd0

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:02:52 2015 +0800

ilo: add image_get_gen6_layout()

It replaces only img_init_walk() right now.  It will replace all img_init_*().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  189 +-
 1 file changed, 107 insertions(+), 82 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index 3209674..d4adffc 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -49,6 +49,102 @@ struct ilo_image_params {
unsigned max_x, max_y;
 };
 
+struct ilo_image_layout {
+   enum ilo_image_walk_type walk;
+   bool interleaved_samples;
+};
+
+static enum ilo_image_walk_type
+image_get_gen6_walk(const struct ilo_dev *dev,
+const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /* TODO we want LODs to be page-aligned */
+   if (info-type == GEN6_SURFTYPE_3D)
+  return ILO_IMAGE_WALK_3D;
+
+   /*
+* From the Sandy Bridge PRM, volume 1 part 1, page 115:
+*
+* The separate stencil buffer does not support mip mapping, thus the
+*  storage for LODs other than LOD 0 is not needed. The following
+*  QPitch equation applies only to the separate stencil buffer:
+*
+*QPitch = h_0
+*
+* Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
+* when bound.
+*/
+   if (info-bind_zs  info-format == GEN6_FORMAT_R8_UINT)
+  return ILO_IMAGE_WALK_LOD;
+
+   /* compact spacing is not supported otherwise */
+   return ILO_IMAGE_WALK_LAYER;
+}
+
+static enum ilo_image_walk_type
+image_get_gen7_walk(const struct ilo_dev *dev,
+const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (info-type == GEN6_SURFTYPE_3D)
+  return ILO_IMAGE_WALK_3D;
+
+   /*
+* From the Ivy Bridge PRM, volume 1 part 1, page 111:
+*
+* note that the depth buffer and stencil buffer have an implied value
+*  of ARYSPC_FULL
+*
+* From the Ivy Bridge PRM, volume 4 part 1, page 66:
+*
+* If Multisampled Surface Storage Format is MSFMT_MSS and Number of
+*  Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
+*  Spacing) must be set to ARYSPC_LOD0.
+*/
+   if (info-sample_count  1)
+  assert(info-level_count == 1);
+   return (info-bind_zs || info-level_count  1) ?
+  ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
+}
+
+static bool
+image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
+   const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+* Gen6 supports only interleaved samples.  It is not explicitly stated,
+* but on Gen7+, render targets are expected to be UMS/CMS (samples
+* non-interleaved) and depth/stencil buffers are expected to be IMS
+* (samples interleaved).
+*
+* See Multisampled Surface Storage Format field of SURFACE_STATE.
+*/
+   return (ilo_dev_gen(dev) == ILO_GEN(6) || info-bind_zs);
+}
+
+static bool
+image_get_gen6_layout(const struct ilo_dev *dev,
+  const struct ilo_image_info *info,
+  struct ilo_image_layout *layout)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (ilo_dev_gen(dev) = ILO_GEN(7))
+  layout-walk = image_get_gen7_walk(dev, info);
+   else
+  layout-walk = image_get_gen6_walk(dev, info);
+
+   layout-interleaved_samples =
+  image_get_gen6_interleaved_samples(dev, info);
+
+   return true;
+}
+
 static void
 img_get_slice_size(const struct ilo_image *img,
const struct ilo_image_params *params,
@@ -492,87 +588,6 @@ img_init_tiling(struct ilo_image *img,
   img-tiling = GEN6_TILING_NONE;
 }
 
-static void
-img_init_walk_gen7(struct ilo_image *img,
-   const struct ilo_image_params *params)
-{
-   const struct ilo_image_info *info = params-info;
-
-   /*
-* It is not explicitly states, but render targets are expected to be
-* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
-* to be IMS (samples interleaved).
-*
-* See Multisampled Surface Storage Format field of SURFACE_STATE.
-*/
-   if (info-bind_zs) {
-  /*
-   * From the Ivy Bridge PRM, volume 1 part 1, page 111:
-   *
-   * note that the depth buffer and stencil buffer have an implied
-   *  value of ARYSPC_FULL
-   */
-  img-walk = (info-type == GEN6_SURFTYPE_3D) ?
- ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
-
-  img-interleaved_samples = true;
-   } else {
-  /*
-   * From the Ivy Bridge PRM, volume 4 part 1, page 66:
-   *
-   * If Multisampled Surface Storage Format is 

Mesa (master): ilo: add image_init_gen6_transfer_layout()

2015-07-01 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: b4c66e4d3eadc04bdffbf4821636299bc49c89a4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b4c66e4d3eadc04bdffbf4821636299bc49c89a4

Author: Chia-I Wu olva...@gmail.com
Date:   Mon Jun 29 16:51:46 2015 +0800

ilo: add image_init_gen6_transfer_layout()

It replaces img_init_for_transfer().

---

 src/gallium/drivers/ilo/core/ilo_image.c |  112 ++
 1 file changed, 37 insertions(+), 75 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_image.c 
b/src/gallium/drivers/ilo/core/ilo_image.c
index ec5ae04..c1edffb 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -580,6 +580,25 @@ image_init_gen6_hardware_layout(const struct ilo_dev *dev,
return true;
 }
 
+static bool
+image_init_gen6_transfer_layout(const struct ilo_dev *dev,
+const struct ilo_image_info *info,
+struct ilo_image_layout *layout)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* we can define our own layout to save space */
+   layout-walk = ILO_IMAGE_WALK_LOD;
+   layout-interleaved_samples = false;
+   layout-valid_tilings = IMAGE_TILING_NONE;
+   layout-tiling = GEN6_TILING_NONE;
+   layout-aux = ILO_IMAGE_AUX_NONE;
+   layout-align_i = info-block_width;
+   layout-align_j = info-block_height;
+
+   return true;
+}
+
 static void
 image_get_gen6_slice_size(const struct ilo_dev *dev,
   const struct ilo_image_info *info,
@@ -922,14 +941,30 @@ image_get_gen6_lods(const struct ilo_dev *dev,
 }
 
 static bool
+image_bind_gpu(const struct ilo_image_info *info)
+{
+   return (info-bind_surface_sampler ||
+   info-bind_surface_dp_render ||
+   info-bind_surface_dp_typed ||
+   info-bind_zs ||
+   info-bind_scanout ||
+   info-bind_cursor);
+}
+
+static bool
 image_get_gen6_layout(const struct ilo_dev *dev,
   const struct ilo_image_info *info,
   struct ilo_image_layout *layout)
 {
ILO_DEV_ASSERT(dev, 6, 8);
 
-   if (!image_init_gen6_hardware_layout(dev, info, layout))
-  return false;
+   if (image_bind_gpu(info) || info-level_count  1) {
+  if (!image_init_gen6_hardware_layout(dev, info, layout))
+ return false;
+   } else {
+  if (!image_init_gen6_transfer_layout(dev, info, layout))
+ return false;
+   }
 
/*
 * the fact that align i and j are multiples of block width and height
@@ -1422,71 +1457,6 @@ img_init(struct ilo_image *img,
 }
 
 /**
- * The texutre is for transfer only.  We can define our own layout to save
- * space.
- */
-static void
-img_init_for_transfer(struct ilo_image *img,
-  const struct ilo_dev *dev,
-  const struct ilo_image_info *info)
-{
-   const unsigned num_layers = (info-type == GEN6_SURFTYPE_3D) ?
-  info-depth : info-array_size;
-   unsigned layer_width, layer_height;
-
-   assert(info-level_count == 1);
-   assert(info-sample_count == 1);
-
-   img-aux.type = ILO_IMAGE_AUX_NONE;
-
-   img-type = info-type;
-   img-width0 = info-width;
-   img-height0 = info-height;
-   img-depth0 = info-depth;
-   img-array_size = info-array_size;
-   img-level_count = 1;
-   img-sample_count = 1;
-
-   img-format = info-format;
-   img-block_width = info-block_width;
-   img-block_height = info-block_height;
-   img-block_size = info-block_size;
-
-   img-walk = ILO_IMAGE_WALK_LOD;
-
-   img-tiling = GEN6_TILING_NONE;
-
-   img-align_i = img-block_width;
-   img-align_j = img-block_height;
-
-   assert(util_is_power_of_two(img-block_width) 
-  util_is_power_of_two(img-block_height));
-
-   /* use packed layout */
-   layer_width = align(info-width, img-align_i);
-   layer_height = align(info-height, img-align_j);
-
-   img-lods[0].slice_width = layer_width;
-   img-lods[0].slice_height = layer_height;
-
-   img-bo_stride = (layer_width / img-block_width) * img-block_size;
-   img-bo_stride = align(img-bo_stride, 64);
-
-   img-bo_height = (layer_height / img-block_height) * num_layers;
-}
-
-static bool
-img_is_bind_gpu(const struct ilo_image_info *info)
-{
-   return (info-bind_surface_sampler ||
-   info-bind_surface_dp_render ||
-   info-bind_surface_dp_typed ||
-   info-bind_zs ||
-   info-bind_scanout ||
-   info-bind_cursor);
-}
-
-/**
  * Initialize the image.  Callers should zero-initialize \p img first.
  */
 bool
@@ -1498,14 +1468,6 @@ ilo_image_init(struct ilo_image *img,
 
assert(ilo_is_zeroed(img, sizeof(*img)));
 
-   /* use transfer layout when the texture is never bound to GPU */
-   if (!img_is_bind_gpu(info) 
-   info-level_count == 1 
-   info-sample_count == 1) {
-  img_init_for_transfer(img, dev, info);
-  return true;
-   }
-
memset(params, 0, sizeof(params));
params.dev = dev;
params.info = info;

___

Mesa (master): i965/fs: Relax fs_builder channel group assertion when force_writemask_all is on.

2015-07-01 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: dabec9c293ee29335f5a6d5d1d3c2b7a715605c1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dabec9c293ee29335f5a6d5d1d3c2b7a715605c1

Author: Francisco Jerez curroje...@riseup.net
Date:   Tue Jun 30 15:15:44 2015 +0300

i965/fs: Relax fs_builder channel group assertion when force_writemask_all is 
on.

This assertion was meant to catch code inadvertently escaping the
control flow jail determined by the group of channel enable signals
selected by some caller, however it seems useful to be able to
increase the default execution size as long as force_writemask_all is
enabled, because force_writemask_all is an explicit indication that
there is no longer a one-to-one correspondence between channels and
SIMD components so the restriction doesn't apply.

In addition reorder the calls to fs_builder::group and ::exec_all in a
couple of places to make sure that we don't temporarily break this
invariant in the future for instructions with exec_size higher than
the dispatch width.

Reviewed-by: Jason Ekstrand jason.ekstr...@intel.com

---

 src/mesa/drivers/dri/i965/brw_fs.cpp   |6 +++---
 src/mesa/drivers/dri/i965/brw_fs_builder.h |4 ++--
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp   |4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a19ea66..189da1d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2915,7 +2915,7 @@ fs_visitor::lower_load_payload()
   if (dst.file == MRF)
  dst.reg = dst.reg  ~BRW_MRF_COMPR4;
 
-  const fs_builder hbld = bld.group(8, 0).exec_all().at(block, inst);
+  const fs_builder hbld = bld.exec_all().group(8, 0).at(block, inst);
 
   for (uint8_t i = 0; i  inst-header_size; i++) {
  if (inst-src[i].file != BAD_FILE) {
@@ -2926,8 +2926,8 @@ fs_visitor::lower_load_payload()
  dst = offset(dst, hbld, 1);
   }
 
-  const fs_builder ibld = bld.group(inst-exec_size, inst-force_sechalf)
- .exec_all(inst-force_writemask_all)
+  const fs_builder ibld = bld.exec_all(inst-force_writemask_all)
+ .group(inst-exec_size, inst-force_sechalf)
  .at(block, inst);
 
   if (inst-dst.file == MRF  (inst-dst.reg  BRW_MRF_COMPR4) 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h 
b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 2c36e07..34646d7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -99,8 +99,8 @@ namespace brw {
   fs_builder
   group(unsigned n, unsigned i) const
   {
- assert(n = dispatch_width() 
-i  dispatch_width() / n);
+ assert(force_writemask_all ||
+(n = dispatch_width()  i  dispatch_width() / n));
  fs_builder bld = *this;
  bld._dispatch_width = n;
  bld._group += i * n;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 291feb3..e33fe6a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -180,8 +180,8 @@ create_copy_instr(const fs_builder bld, fs_inst *inst, 
fs_reg src, bool negate)
 {
int written = inst-regs_written;
int dst_width = inst-exec_size / 8;
-   const fs_builder ubld = bld.group(inst-exec_size, inst-force_sechalf)
-  .exec_all(inst-force_writemask_all);
+   const fs_builder ubld = bld.exec_all(inst-force_writemask_all)
+  .group(inst-exec_size, inst-force_sechalf);
fs_inst *copy;
 
if (written  dst_width) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/fs: Use the builder directly for the gen6 interpolation add(32)

2015-07-01 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: 80fc9c01dfe4cbbcf1c6b101fcdfdecbda63131e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=80fc9c01dfe4cbbcf1c6b101fcdfdecbda63131e

Author: Jason Ekstrand jason.ekstr...@intel.com
Date:   Tue Jun 30 17:04:52 2015 -0700

i965/fs: Use the builder directly for the gen6 interpolation add(32)

Now that we can create builders with a bigger width than their parent as
long as it's exec_all, we don't need to create the instruction manually.

Reviewed-by: Francisco Jerez curroje...@riseup.net

---

 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 79ebb2d..94d6a58 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1358,12 +1358,11 @@ fs_visitor::emit_interpolation_setup_gen6()
*/
   fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
   BRW_REGISTER_TYPE_UW);
-  fs_inst *add =
- new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dispatch_width * 2,
-   int_pixel_xy,
-   fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
-   fs_reg(brw_imm_v(0x11001010)));
-  abld.exec_all().emit(add);
+
+  const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
+  dbld.ADD(int_pixel_xy,
+   fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
+   fs_reg(brw_imm_v(0x11001010)));
 
   this-pixel_x = vgrf(glsl_type::float_type);
   this-pixel_y = vgrf(glsl_type::float_type);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glsl: expose build_program_resource_list function

2015-07-01 Thread Tapani Pälli
Module: Mesa
Branch: master
Commit: 73afa31f07fe4af605088f6590edc4227652c482
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=73afa31f07fe4af605088f6590edc4227652c482

Author: Tapani Pälli tapani.pa...@intel.com
Date:   Mon Jun 29 14:39:05 2015 +0300

glsl: expose build_program_resource_list function

This is required so that we can move resource list creation
to happen later.

Signed-off-by: Tapani Pälli tapani.pa...@intel.com
Reviewed-by: Martin Peres martin.pe...@linux.intel.com

---

 src/glsl/linker.cpp |2 +-
 src/glsl/program.h  |4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index d9527d4..e0ce00c 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2689,7 +2689,7 @@ add_interface_variables(struct gl_shader_program *shProg,
  * Builds up a list of program resources that point to existing
  * resource data.
  */
-static void
+void
 build_program_resource_list(struct gl_context *ctx,
 struct gl_shader_program *shProg)
 {
diff --git a/src/glsl/program.h b/src/glsl/program.h
index f15113a..c06541a 100644
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -40,6 +40,10 @@ extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
 
 extern void
+build_program_resource_list(struct gl_context *ctx,
+struct gl_shader_program *shProg);
+
+extern void
 linker_error(struct gl_shader_program *prog, const char *fmt, ...)
PRINTFLIKE(2, 3);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glsl: create program resource list after LinkShader

2015-07-01 Thread Tapani Pälli
Module: Mesa
Branch: master
Commit: f045b8b2ff5ac75da3e092f482fd1717571d8462
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f045b8b2ff5ac75da3e092f482fd1717571d8462

Author: Tapani Pälli tapani.pa...@intel.com
Date:   Mon Jun 29 15:23:45 2015 +0300

glsl: create program resource list after LinkShader

Resource list can be created properly  only after LinkShader hook
has been called to make sure all dead variables have been removed.

Signed-off-by: Tapani Pälli tapani.pa...@intel.com
Reviewed-by: Martin Peres martin.pe...@linux.intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90925

---

 src/glsl/linker.cpp |4 
 src/mesa/program/ir_to_mesa.cpp |2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index e0ce00c..71a45e8 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3242,10 +3242,6 @@ link_shaders(struct gl_context *ctx, struct 
gl_shader_program *prog)
   }
}
 
-   build_program_resource_list(ctx, prog);
-   if (!prog-LinkStatus)
-  goto done;
-
/* FINISHME: Assign fragment shader output locations. */
 
 done:
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 18e3bc5..0b2eb12 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2975,6 +2975,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
if (prog-LinkStatus) {
   if (!ctx-Driver.LinkShader(ctx, prog)) {
 prog-LinkStatus = GL_FALSE;
+  } else {
+ build_program_resource_list(ctx, prog);
   }
}
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glsl: build stageref mask using IR, not symbol table

2015-07-01 Thread Tapani Pälli
Module: Mesa
Branch: master
Commit: ccaf37f4496eb836866c9daacf21f1f5ac8c6d66
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ccaf37f4496eb836866c9daacf21f1f5ac8c6d66

Author: Tapani Pälli tapani.pa...@intel.com
Date:   Mon Jun 29 14:19:00 2015 +0300

glsl: build stageref mask using IR, not symbol table

Instead of using symbol table, build mask by inspecting IR. This
change is required by further patches to move resource list creation
to happen later when symbol table does not exist anymore.

Signed-off-by: Tapani Pälli tapani.pa...@intel.com
Reviewed-by: Martin Peres martin.pe...@linux.intel.com

---

 src/glsl/linker.cpp |   14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 74c2f2d..d9527d4 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2624,9 +2624,17 @@ build_stageref(struct gl_shader_program *shProg, const 
char *name)
   struct gl_shader *sh = shProg-_LinkedShaders[i];
   if (!sh)
  continue;
-  ir_variable *var = sh-symbols-get_variable(name);
-  if (var)
- stages |= (1  i);
+
+  /* Shader symbol table may contain variables that have
+   * been optimized away. Search IR for the variable instead.
+   */
+  foreach_in_list(ir_instruction, node, sh-ir) {
+ ir_variable *var = node-as_variable();
+ if (var  strcmp(var-name, name) == 0) {
+stages |= (1  i);
+break;
+ }
+  }
}
return stages;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): nouveau: rename var name for nouveau_vieux to avoid conflict with nouveau

2015-07-01 Thread Ilia Mirkin
Module: Mesa
Branch: master
Commit: 8276ba260e5500664b8d8748f3224f73ef221887
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8276ba260e5500664b8d8748f3224f73ef221887

Author: Ilia Mirkin imir...@alum.mit.edu
Date:   Wed Jul  1 03:47:41 2015 -0400

nouveau: rename var name for nouveau_vieux to avoid conflict with nouveau

We want to require different versions for nouveau and nouveau_vieux.
autoconf will only check for NOUVEAU once if both drivers are enabled,
meaning both version checks don't get executed. Rename the nouveau_vieux
one to NVVIEUX to avoid the issue.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Tested-by: Alexandre Courbot acour...@nvidia.com
Tested-by: Martin Peres martin.pe...@free.fr
Reviewed-by: Emil Velikov emil.l.veli...@gmail.com

---

 configure.ac |2 +-
 src/mesa/drivers/dri/nouveau/Makefile.am |4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index af61aa2..60d180d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1421,7 +1421,7 @@ if test -n $with_dri_drivers; then
 ;;
 xnouveau)
 HAVE_NOUVEAU_DRI=yes;
-PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau = 
$LIBDRM_NVVIEUX_REQUIRED])
+PKG_CHECK_MODULES([NVVIEUX], [libdrm_nouveau = 
$LIBDRM_NVVIEUX_REQUIRED])
 ;;
 xradeon)
 HAVE_RADEON_DRI=yes;
diff --git a/src/mesa/drivers/dri/nouveau/Makefile.am 
b/src/mesa/drivers/dri/nouveau/Makefile.am
index 61af95a..01e34a8 100644
--- a/src/mesa/drivers/dri/nouveau/Makefile.am
+++ b/src/mesa/drivers/dri/nouveau/Makefile.am
@@ -38,8 +38,8 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
$(DEFINES) \
$(VISIBILITY_CFLAGS) \
-   $(NOUVEAU_CFLAGS)
+   $(NVVIEUX_CFLAGS)
 
 noinst_LTLIBRARIES = libnouveau_dri.la
 libnouveau_dri_la_SOURCES = $(NOUVEAU_C_FILES)
-libnouveau_dri_la_LIBADD = $(NOUVEAU_LIBS)
+libnouveau_dri_la_LIBADD = $(NVVIEUX_LIBS)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/gen9: use an unreserved surface alignment value

2015-07-01 Thread Nanley Chery
Module: Mesa
Branch: master
Commit: 2c8f251369072ce382f651ba73ca280517d26e7f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2c8f251369072ce382f651ba73ca280517d26e7f

Author: Nanley Chery nanley.g.ch...@intel.com
Date:   Wed Jun 24 10:59:13 2015 -0700

i965/gen9: use an unreserved surface alignment value

Although the horizontal and vertical alignment fields are ignored here,
0 is a reserved value for them and may cause undefined behavior. Change
the default value to an abitrary valid one.

v2: add comment about chosen value (Topi).

Reviewed-by: Anuj Phogat anuj.pho...@gmail.com
Signed-off-by: Nanley Chery nanley.g.ch...@intel.com

---

 src/mesa/drivers/dri/i965/gen8_surface_state.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index b2d1a57..bd3eb00 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -88,12 +88,12 @@ vertical_alignment(const struct brw_context *brw,
uint32_t surf_type)
 {
/* On Gen9+ vertical alignment is ignored for 1D surfaces and when
-* tr_mode is not TRMODE_NONE.
+* tr_mode is not TRMODE_NONE. Set to an arbitrary non-reserved value.
 */
if (brw-gen  8 
(mt-tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
 surf_type == BRW_SURFACE_1D))
-  return 0;
+  return GEN8_SURFACE_VALIGN_4;
 
switch (mt-align_h) {
case 4:
@@ -113,12 +113,12 @@ horizontal_alignment(const struct brw_context *brw,
  uint32_t surf_type)
 {
/* On Gen9+ horizontal alignment is ignored when tr_mode is not
-* TRMODE_NONE.
+* TRMODE_NONE. Set to an arbitrary non-reserved value.
 */
if (brw-gen  8 
(mt-tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
 gen9_use_linear_1d_layout(brw, mt)))
-  return 0;
+  return GEN8_SURFACE_HALIGN_4;
 
switch (mt-align_w) {
case 4:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): nv50/ir: copy joinAt when splitting both before and after

2015-07-01 Thread Ilia Mirkin
Module: Mesa
Branch: master
Commit: 5dcb28c3d26828ed1b0e2bd5a0589c5baab04b85
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5dcb28c3d26828ed1b0e2bd5a0589c5baab04b85

Author: Ilia Mirkin imir...@alum.mit.edu
Date:   Wed Jul  1 02:11:39 2015 -0400

nv50/ir: copy joinAt when splitting both before and after

The current implementation only moves the joinAt when splitting after
the given instruction, not before it. So if you have a BB with

  foo
  instr
  bar
  joinat

and thus with joinAt set, we end up first splitting before instr, at
which point the instr's bb is updated to the new bb. Since that bb
doesn't have a joinAt set (despite containing one), when splitting after
the instr, there is nothing to copy over. Since the joinat will be in
the split bb irrespective of whether we're splitting before or after
the instruction, move it over in either case.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91124
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org

---

 src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp|3 +++
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp |1 +
 3 files changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
index 51b9225..fa8ee07 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
@@ -332,6 +332,9 @@ BasicBlock::splitBefore(Instruction *insn, bool attach)
BasicBlock *bb = new BasicBlock(func);
assert(!insn || insn-op != OP_PHI);
 
+   bb-joinAt = joinAt;
+   joinAt = NULL;
+
splitCommon(insn, bb, attach);
return bb;
 }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index ecd115f..9839a0e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1687,6 +1687,7 @@ Converter::insertConvergenceOps(BasicBlock *conv, 
BasicBlock *fork)
join-fixed = 1;
conv-insertHead(join);
 
+   assert(!fork-joinAt);
fork-joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
fork-insertBefore(fork-getExit(), fork-joinAt);
 }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 2c7f7e3..bea293b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -871,6 +871,7 @@ NV50LoweringPreSSA::handleTXL(TexInstruction *i)
BasicBlock *joinBB = i-bb-splitAfter(i);
 
bld.setPosition(currBB, true);
+   assert(!currBB-joinAt);
currBB-joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
 
for (int l = 0; l = 3; ++l) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): nv50/ir: don't emit src2 in immediate form

2015-07-01 Thread Ilia Mirkin
Module: Mesa
Branch: master
Commit: c3215ef204c0fdfc44230adbd423720169d44dcb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3215ef204c0fdfc44230adbd423720169d44dcb

Author: Ilia Mirkin imir...@alum.mit.edu
Date:   Thu Jul  2 00:13:36 2015 -0400

nv50/ir: don't emit src2 in immediate form

In the immediate form, src2 == dst, so it does not need to be emitted.
Otherwise it overlaps with the immediate value's low bits.

Fixes: 09ee907266 (nv50/ir: Fold IMM into MAD)
Cc: 10.6 mesa-sta...@lists.freedesktop.org
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu

---

 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 6de8f45..67ea6df 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -550,7 +550,7 @@ CodeEmitterNV50::emitForm_MUL(const Instruction *i)
 }
 
 // usual immediate form
-// - 1 to 3 sources where last is immediate (rir, gir)
+// - 1 to 3 sources where second is immediate (rir, gir)
 // - no address or predicate possible
 void
 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
@@ -566,7 +566,7 @@ CodeEmitterNV50::emitForm_IMM(const Instruction *i)
if (Target::operationSrcNr[i-op]  1) {
   setSrc(i, 0, 0);
   setImmediate(i, 1);
-  setSrc(i, 2, 1);
+  // If there is another source, it has to be the same as the dest reg.
} else {
   setImmediate(i, 0);
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): nvc0: tune PREFER_BLIT_BASED_TEXTURE_TRANSFER capability

2015-07-01 Thread Ilia Mirkin
Module: Mesa
Branch: master
Commit: 1087c566e3496d08fe70bc0725073e3022716dc5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1087c566e3496d08fe70bc0725073e3022716dc5

Author: Alexandre Courbot acour...@nvidia.com
Date:   Thu Jul  2 11:36:55 2015 +0900

nvc0: tune PREFER_BLIT_BASED_TEXTURE_TRANSFER capability

Prefer blit-based texture transfers only if the chip has dedicated VRAM
since it would translate to a copy into the same memory on shared-memory
chips.

Signed-off-by: Alexandre Courbot acour...@nvidia.com
Reported-by: Ilia Mirkin imir...@alum.mit.edu
Reviewed-by: Ilia Mirkin imir...@alum.mit.edu

---

 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 95e246b..3f52c85 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -163,7 +163,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@@ -179,6 +178,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
   return (class_3d = NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
   return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+  return nouveau_screen(pscreen)-vram_domain  NOUVEAU_BO_VRAM ? 1 : 0;
 
/* unsupported caps */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): nvc0: create screen fence objects with coherent attribute

2015-07-01 Thread Martin Peres
Module: Mesa
Branch: master
Commit: e212a80db37b0fc9d57beb91dbca1c43ae4476a0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e212a80db37b0fc9d57beb91dbca1c43ae4476a0

Author: Alexandre Courbot acour...@nvidia.com
Date:   Tue Jun 30 22:37:40 2015 +0900

nvc0: create screen fence objects with coherent attribute

This is required on non-coherent architectures to ensure the value of
the fence is correct at all times. Failure to do this results in the
display freezing for a few seconds every now and then on Tegra.

The NOUVEAU_BO_COHERENT is a no-op for coherent architectures, so behavior
on x86 should not be affected by this patch.

Also bump the required libdrm version to 2.4.62, which introduced this
flag.

Signed-off-by: Alexandre Courbot acour...@nvidia.com
Reviewed-by: Martin Peres martin.pe...@free.fr

---

 configure.ac   |2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |8 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 60d180d..ea0f069 100644
--- a/configure.ac
+++ b/configure.ac
@@ -70,7 +70,7 @@ LIBDRM_REQUIRED=2.4.38
 LIBDRM_RADEON_REQUIRED=2.4.56
 LIBDRM_INTEL_REQUIRED=2.4.60
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
-LIBDRM_NOUVEAU_REQUIRED=2.4.33 libdrm = 2.4.41
+LIBDRM_NOUVEAU_REQUIRED=2.4.62
 LIBDRM_FREEDRENO_REQUIRED=2.4.57
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 4c53106..95e246b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -614,6 +614,7 @@ nvc0_screen_create(struct nouveau_device *dev)
struct nouveau_pushbuf *push;
uint64_t value;
uint32_t obj_class;
+   uint32_t flags;
int ret;
unsigned i;
 
@@ -669,8 +670,11 @@ nvc0_screen_create(struct nouveau_device *dev)
screen-base.base.get_video_param = nouveau_vp3_screen_get_video_param;
screen-base.base.is_video_format_supported = 
nouveau_vp3_screen_video_supported;
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
-screen-fence.bo);
+   flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+   if (dev-drm_version = 0x01000202)
+  flags |= NOUVEAU_BO_COHERENT;
+
+   ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, screen-fence.bo);
if (ret)
   goto fail;
nouveau_bo_map(screen-fence.bo, 0, NULL);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: reset the source packing when creating temp transfer image

2015-07-01 Thread Ilia Mirkin
Module: Mesa
Branch: master
Commit: 4f57cdba2767b56eb4752f14ba9853ba6bc06d0e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4f57cdba2767b56eb4752f14ba9853ba6bc06d0e

Author: Ilia Mirkin imir...@alum.mit.edu
Date:   Wed Jul  1 15:18:47 2015 -0400

mesa: reset the source packing when creating temp transfer image

Commit 4b249d2ee (mesa: Handle transferOps in texstore_rgba) introduced
proper transferops handling, but in updating the source to the newly
allocated temporary image neglected to reset the source packing. Set it
to the default which should be appropriate for the floats used.

Fixes: 4b249d2ee (mesa: Handle transferOps in texstore_rgba)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91173
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org
Reviewed-by: Chris Forbes chr...@ijw.co.nz
Reviewed-by: Anuj Phogat anuj.pho...@gmail.com

---

 src/mesa/main/texstore.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 1525205..37c0569 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -787,6 +787,7 @@ texstore_rgba(TEXSTORE_PARAMS)
   srcType = GL_FLOAT;
   srcRowStride = srcWidth * 4 * sizeof(float);
   srcMesaFormat = RGBA32_FLOAT;
+  srcPacking = ctx-DefaultPacking;
}
 
src = (GLubyte *)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit